In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import plot,iplot
from scipy.stats import norm, kurtosis
import os
from scipy.signal import butter, lfilter, freqz
from scipy import signal
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from joblib import dump, load
import pandas as pd
from processing_funcs import *

In [38]:
df = read_measurement("../data/raw_data_train/rsq_q1/*", 0.02)

In [39]:
df = df[df["measurementID"] == 0]

In [40]:
def kurtosis_time(x):

    return kurtosis(x, fisher=True)

def rms_100(x):

    return np.sqrt(np.mean(x**2))

def crest(x):

    return max(abs(x))/np.sqrt(np.mean(x**2))

def create_aggregated(df):
    """Create a aggregated dataframe in time domain"""
    signals = ['x_lin_acc', 'y_lin_acc', "z_lin_acc", 
               'x_acc', 'y_acc', 'z_acc',
               'x_gyro', 'y_gyro', 'z_gyro']

    agg_df = df.groupby(["id_gps"]).agg({x: ["sum", "mean", "mad",
                                                "median", "min", "max",
                                                "std", "var", "sem",
                                                "skew", "quantile",
                                                kurtosis_time, rms_100,
                                                crest] for x in signals})

    return agg_df

In [41]:
# filtered_df = filter_acc(df)
# new_df = df.drop(filtered_df.columns, axis=1)
# df = pd.concat([new_df.reset_index(),filtered_df.reset_index()], axis=1)

In [42]:
feature_df = create_aggregated(df).reset_index()

In [43]:
scaler = StandardScaler()
scaled = scaler.fit_transform(feature_df.drop("id_gps", axis=1)) 


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



In [44]:
model = load("../models/rfc_v10.joblib")

In [45]:
preds = model.predict(scaled)

In [46]:
result = pd.concat([feature_df,pd.DataFrame(preds, columns=["label"])],axis = 1)

In [47]:
result = result[[('id_gps', ''),"label"]]
result.columns = ["id_gps", "label"]


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray



In [48]:
final_result = df.merge(result, on="id_gps", how = "outer")

###  Testing the merge

If the groupped result dataframe id_gps is the same as the result then the merge is correct.

In [49]:
num = (result["id_gps"] == final_result.groupby("id_gps").count().index).sum()

In [50]:
final_result  = final_result.groupby("id_gps").mean()

In [51]:
fig = px.scatter_mapbox(final_result, 
                        lat="lat", 
                        lon="lon",
                        zoom = 12, 
                        height=500, 
                        color = "label", 
                        color_continuous_scale=["green","blue","red"])
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.layout.coloraxis.showscale = False
fig

In [33]:
real = [1 for  i in range(num)]

In [34]:
final_result["label"].values

array([0, 0, 1, 1, 2, 1, 1, 1, 1, 0, 1, 0, 1, 2, 1, 0, 1, 1, 0, 1, 2, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 2, 1, 1, 1, 0, 0, 0, 2, 0, 0, 1,
       2, 1, 0, 1, 1, 2, 2, 2, 2, 0, 1, 1, 0, 1, 2, 2, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 2, 0, 0, 0, 0, 1, 0, 2, 2, 1, 1, 0, 0, 1, 0, 0, 0, 2, 2,
       1, 2, 0, 1, 1, 1, 1, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1, 1, 0, 2, 0,
       1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 2, 0, 0, 0, 1, 0, 1, 0, 2, 2, 2, 2,
       1, 2, 1, 1, 1, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 2, 2, 1, 0, 0, 2, 1, 0, 0, 2, 0, 0, 2, 0, 1, 1, 2, 2,
       0], dtype=int64)

In [35]:
from sklearn.metrics import accuracy_score

accuracy_score(real, final_result["label"].values)

0.3446327683615819

In [36]:
len(final_result)

177

In [37]:
Counter(final_result["label"]).most_common()

[(0, 74), (1, 61), (2, 42)]