In [None]:
import os
import sys
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm, tqdm_notebook

from lyft_dataset_sdk.lyftdataset import LyftDataset
from lyft_dataset_sdk.utils.data_classes import LidarPointCloud, Box, Quaternion
from lyft_dataset_sdk.utils.geometry_utils import view_points, transform_matrix

from config import config as cfg

In [None]:
train = pd.read_csv(os.path.join(cfg.input_dir, 'train/train.csv'))

In [None]:
object_columns = ['sample_id', 'object_id',
                  'center_x', 'center_y', 'center_z',
                  'width', 'length', 'height',
                  'yaw',
                  'class_name']
objects = []

In [None]:
for sample_id, ps in tqdm(train.values[:]):
    object_params = ps.split()
    n_objects = len(object_params)
    for i in range(n_objects // 8):
        x, y, z, w, l, h, yaw, c = tuple(object_params[i * 8: (i + 1) * 8])
        objects.append([sample_id, i, x, y, z, w, l, h, yaw, c])
train_objects = pd.DataFrame(
    objects,
    columns = object_columns
)

In [None]:
numerical_cols = ['object_id', 'center_x', 'center_y', 'center_z', 'width', 'length', 'height', 'yaw']
train_objects[numerical_cols] = np.float32(train_objects[numerical_cols].values)

In [None]:
len(train_objects)

In [None]:
train_objects.head()

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
plot = sns.countplot(y="class_name", data=train_objects.query('class_name != "motorcycle" and class_name != "emergency_vehicle" and class_name != "animal"'),
                     palette=['navy', 'darkblue', 'blue', 'dodgerblue', 'skyblue', 'lightblue']).set_title('Object Frequencies', fontsize=16)
plt.yticks(fontsize=14)
plt.xlabel("Count", fontsize=15)
plt.ylabel("Class Name", fontsize=15)
plt.show(plot)

In [None]:
fig, ax = plt.subplots(figsize=(15, 15))

plot = sns.boxplot(x="class_name", y="width",
                   data=train_objects.query('class_name != "motorcycle" and class_name != "emergency_vehicle" and class_name != "animal"'),
                   palette='YlGn', ax=ax).set_title('width (for different objects)', fontsize=16)

plt.yticks(fontsize=14)
plt.xticks(fontsize=14)
plt.xlabel("Class Name", fontsize=15)
plt.ylabel("width", fontsize=15)
plt.show(plot)

In [None]:
l = train_objects.groupby("class_name")['length'].mean()
l

In [None]:
w = train_objects.groupby("class_name")['width'].mean()
w

In [None]:
h = train_objects.groupby("class_name")['height'].mean()
h

In [None]:
df_l = pd.DataFrame(l)
df_l

In [None]:
df_w = pd.DataFrame(w)
df_w

In [None]:
df_h = pd.DataFrame(h)
df_h

In [None]:
df = pd.concat([df_l, df_w, df_h], axis=1)
df

In [None]:
df.to_csv(os.path.join(cfg.work_dir, "data/mean_length_width_height.csv"))