In [37]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import plot,iplot
from scipy.stats import norm, kurtosis
import os
from scipy.signal import butter, lfilter, freqz
from scipy import signal
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from joblib import dump, load
import pandas as pd
from processing_funcs import *

In [125]:
df = read_measurement("../data/raw_data_train/rsq_q3/*", 0.01)

In [126]:
def butter_lowpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y


In [127]:
def filter_acc(df, cutoff=10, fs=50, order=2):

    signals = [ 'x_lin_acc', 'y_lin_acc', 'x_acc', 'y_acc', 'z_acc',]

    new_df = pd.DataFrame(columns=signals)


    for j in signals:

        filtered_signal = butter_lowpass_filter(experiment_df[j], cutoff=cutoff, fs=fs, order=order)

        list_signals.append(filtered_signal)

        new_df = pd.concat([new_df, pd.DataFrame(np.array(list_signals).T,columns=signals)])

    return new_df

In [128]:
def filter_gyro(df, cutoff=10, fs=50, order=2):

    signals = ['x_gyro', 'y_gyro', 'z_gyro']

    new_df = pd.DataFrame(columns=signals)

    for j in signals:

        filtered_signal = butter_lowpass_filter(experiment_df[j], cutoff=cutoff, fs=fs, order=order)

        list_signals.append(filtered_signal)

        new_df = pd.concat([new_df, pd.DataFrame(np.array(list_signals).T,columns=signals)])

    return new_df

In [129]:
def kurtosis_time(x):

    return kurtosis(x, fisher=True)

def rms_100(x):

    return np.sqrt(np.mean(x**2))

def crest(x):

    return max(abs(x))/np.sqrt(np.mean(x**2))

def create_aggregated(df):
    """Create a aggregated dataframe in time domain"""
    signals = ['x_lin_acc', 'y_lin_acc', 
               'x_acc', 'y_acc', 'z_acc',
               'x_gyro', 'y_gyro', 'z_gyro']

    agg_df = df.groupby(["id_gps"]).agg({x: ["sum", "mean", "mad",
                                                "median", "min", "max",
                                                "std", "var", "sem",
                                                "skew", "quantile",
                                                kurtosis_time, rms_100,
                                                crest] for x in signals})

    return agg_df

In [130]:
feature_df = create_aggregated(df).reset_index()

In [131]:
scaler = MinMaxScaler()
scaled = scaler.fit_transform(feature_df.drop("id_gps", axis=1)) 


dropping on a non-lexsorted multi-index without a level parameter may impact performance.



In [132]:
model = load("../models/rfc_v01.joblib")

In [133]:
preds = model.predict(scaled)

In [134]:
result = pd.concat([feature_df,pd.DataFrame(preds, columns=["label"])],axis = 1)

In [135]:
result = result[[('id_gps', ''),"label"]]
result.columns = ["id_gps", "label"]


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray



In [136]:
df

Unnamed: 0,time,lat,lon,height,velocity,direction,h_accuracy,v_accuracy,id_gps,x_lin_acc,y_lin_acc,z_lin_acc,x_gyro,y_gyro,z_gyro,x_acc,y_acc,z_acc
15,0.15,47.686343,17.679912,113.113219,-1.000000,-1.000000,65.000000,65.000000,2,-0.165025,-0.008977,-0.074449,-0.001077,0.001447,-0.000870,-0.167951,-0.224832,9.733359
16,0.16,47.686343,17.679912,113.113219,-1.000000,-1.000000,65.000000,65.000000,2,-0.072877,0.049781,-0.117403,-0.002516,0.002074,0.000089,-0.238454,-0.154928,9.708062
17,0.17,47.686343,17.679912,113.113219,-1.000000,-1.000000,65.000000,65.000000,2,0.188222,-0.006156,-0.123387,-0.001361,0.004807,0.000367,0.019010,-0.164658,9.678573
18,0.18,47.686343,17.679912,113.113219,-1.000000,-1.000000,65.000000,65.000000,2,0.063026,-0.038822,-0.088505,-0.001860,0.002107,0.000095,0.079634,-0.227826,9.702973
19,0.19,47.686343,17.679912,113.113219,-1.000000,-1.000000,65.000000,65.000000,2,-0.148809,0.014897,-0.073083,-0.000987,-0.002325,-0.000523,-0.178728,-0.203128,9.738149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12875,128.75,47.694844,17.675054,117.642292,11.712904,85.817919,6.000906,6.000906,129,0.048561,-0.268939,0.157282,0.027666,-0.029211,-0.012400,-0.074246,-0.426763,9.939780
12876,128.76,47.694844,17.675054,117.642292,11.712904,85.817919,6.000906,6.000906,129,-0.078647,-0.060692,0.127630,0.035673,-0.008810,-0.015961,-0.224533,-0.329914,9.992920
12877,128.77,47.694844,17.675054,117.642292,11.712904,85.817919,6.000906,6.000906,129,0.192671,0.036597,-0.431795,0.041637,0.014038,-0.008203,-0.070952,-0.128583,9.599238
12878,128.78,47.694844,17.675054,117.642292,11.712904,85.817919,6.000906,6.000906,129,0.597249,0.024606,-0.744228,0.061041,0.035687,0.000275,0.276774,-0.104632,9.185050


In [137]:
final_result = df.merge(result, on="id_gps", how = "outer")

###  Testing the merge

If the groupped result dataframe id_gps is the same as the result then the merge is correct.

In [138]:
final_result.groupby("id_gps").count()

Unnamed: 0_level_0,time,lat,lon,height,velocity,direction,h_accuracy,v_accuracy,x_lin_acc,y_lin_acc,z_lin_acc,x_gyro,y_gyro,z_gyro,x_acc,y_acc,z_acc,label
id_gps,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25
3,160,160,160,160,160,160,160,160,160,160,160,160,160,160,160,160,160,160
4,151,151,151,151,151,151,151,151,151,151,151,151,151,151,151,151,151,151
5,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
6,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
126,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
127,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
128,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100


In [139]:
(result["id_gps"] == final_result.groupby("id_gps").count().index).sum()

128

In [140]:
final_result  = final_result.groupby("id_gps").mean()

In [141]:
fig = px.scatter_mapbox(final_result, 
                        lat="lat", 
                        lon="lon",
                        zoom = 12, 
                        height=500, 
                        color = "label", 
                        color_continuous_scale=["green","blue","red"])
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.layout.coloraxis.showscale = False
fig