In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.express as px

from src.features import load_trips

sns.set_theme()


In [None]:
%time tmp = load_trips(['M3_VH', 'M3_HH', 'CPH1_HH', 'CPH1_VH', 'CPH6_VH', 'CPH6_HH'], trips='all', sampling='2s')
df = tmp

In [None]:
df['is_speeding'] = (df['velocity'] * 0.975) > df['speed_limit']

In [None]:
df.groupby(['route', 'trip', 'pass']).mean()

In [None]:
df.groupby(['route', 'trip', 'pass']).std()

In [None]:
def remove_outliers(df, columns):
    for col in columns:
        q_low = df[col].quantile(0.01)
        q_hi = df[col].quantile(0.99)

        df = df[(df[col] <= q_hi) & (df[col] >= q_low)]

    return df

In [None]:
outlier_columns = ['acceleration_z',
                   'acceleration_y',
                   'acceleration_x',
                   'steering_position',
                   'steering_acceleration',
                   'traction_instant_consumption',
                   'yaw_rate',
                   'velocity']

df = remove_outliers(df, outlier_columns)

In [None]:
px.histogram(df, x=['steering_position'])

In [None]:
px.histogram(df, x=['steering_acceleration'])

In [None]:
px.histogram(df, x=['yaw_rate'])

In [None]:
px.histogram(df, x=['acceleration_x', 'acceleration_y', 'acceleration_z'], nbins=100)

In [None]:
px.histogram(df, x='velocity')

In [None]:
px.histogram(df, x='traction_instant_consumption')

In [None]:
df.columns

In [None]:
df['is_speeding_int'] = df['is_speeding'].astype(int)

In [None]:
# features = ['acceleration_x', 'steering_position', 'steering_acceleration', 'yaw_rate', 'traction_instant_consumption', 'velocity',
#             'driver_safety_belt_reminder', 'is_speeding_int']

features = ['acceleration_x', 'steering_acceleration', 'yaw_rate', 'velocity', 'traction_instant_consumption']

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
pd.DataFrame(df_scaled, index=df.index, columns=df[features].columns)

In [None]:
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[features])

In [None]:
import umap

In [None]:
fit = umap.UMAP(
    n_neighbors=20,
    n_components=2,
    init='spectral',
    spread=1.0,
    min_dist=0.25,
    a=None,
    b=None
)

%time u = fit.fit_transform(df_scaled)

In [None]:
px.scatter_3d(u, x=0, y=1, z=2)

In [None]:
from sklearn.cluster import KMeans

In [None]:
clusterer = KMeans(n_clusters=3)

In [None]:
clusters = clusterer.fit_predict(u)
clusters[:5]

In [None]:
import pandas as pd
import numpy as np

In [None]:
df_cluster = pd.DataFrame(np.column_stack([u, clusters]), columns=list(range(u.shape[1])) + ['cluster'])

In [None]:
px.scatter_3d(df_cluster, x=0, y=1, z=2, color='cluster')

In [None]:
df['cluster'] = clusters

In [None]:
px.histogram(df, x=['steering_position'], color='cluster')

In [None]:
px.histogram(df, x=['steering_acceleration'], color='cluster')

In [None]:
px.histogram(df, x=['yaw_rate'], color='cluster', nbins=300)

In [None]:
px.histogram(df, x=['acceleration_x', 'acceleration_y', 'acceleration_z'], nbins=300, color='cluster')

In [None]:
px.histogram(df, x='velocity', color='cluster')

In [None]:
px.histogram(df, x='traction_instant_consumption', color='cluster')

In [None]:
px.histogram(df, x='is_speeding', color='cluster')

In [110]:
px.histogram(df, x='velocity', color='cluster')

In [111]:
px.histogram(df, x='traction_instant_consumption', color='cluster')

In [112]:
px.histogram(df, x='is_speeding', color='cluster')