# Sliding time window
La ventana de tiempo deslizante a implementar consta de ...

### Imports

In [186]:
import numpy as np
import pandas as pd
from scipy import stats as st
import plotly.graph_objects as go
from tqdm import tqdm

### Load data

In [187]:
df_train = pd.read_csv('./filtered_data/smartphone_02-Feb-2022-20-19_cambio de línea agresivo izquierda_Data-Mv2L_EBJR_gDU3LpwVs.csv')
df_test = pd.read_csv('./filtered_data/smartphone_02-Feb-2022-20-22_cambio de línea agresivo izquierda_Data-Mv2MN6xv2W3Rv1Rmb_Z.csv')


print("Train data size: ", df_train.shape)
print("Near crash event data size: ", df_train["eventClass"][df_train["eventClass"] == 1].shape)
print("No near crash event data size: ", df_train["eventClass"][df_train["eventClass"] == 0].shape)


print("\nTrain data size: ", df_test.shape)
print("Near crash event data size: ", df_test["eventClass"][df_test["eventClass"] == 1].shape)
print("No near crash event data size: ", df_test["eventClass"][df_test["eventClass"] == 0].shape)

df_train.head()

Train data size:  (380, 18)
Near crash event data size:  (46,)
No near crash event data size:  (334,)

Train data size:  (478, 18)
Near crash event data size:  (48,)
No near crash event data size:  (430,)


Unnamed: 0,id,accX,accY,accZ,eventClass,idTrip,idVehicle,latitude,longitude,magX,magY,magZ,route,speed,timestamp,velAngX,velAngY,velAngZ
0,37294,0.006871,-0.243843,9.71314,0,17,Nissan March (Xiaomi Redmi Note 9S),0.833505,-77.650076,19.675431,-33.610343,53.879037,Cambio de línea agresivo izquierda,23.534069,2022-02-02 20:19:34.318000-05:00,-0.000499,-0.000741,0.000182
1,37295,0.021488,-0.234203,9.711581,0,17,Nissan March (Xiaomi Redmi Note 9S),0.833505,-77.650076,19.556573,-33.602171,53.855977,Cambio de línea agresivo izquierda,23.534069,2022-02-02 20:19:34.368000-05:00,-0.000521,-0.000248,5.2e-05
2,37296,0.006405,-0.249547,9.707207,0,17,Nissan March (Xiaomi Redmi Note 9S),0.833505,-77.650076,19.474418,-33.539403,53.851095,Cambio de línea agresivo izquierda,23.534069,2022-02-02 20:19:34.418000-05:00,-7.9e-05,1.8e-05,-0.000379
3,37297,0.015948,-0.244837,9.711803,0,17,Nissan March (Xiaomi Redmi Note 9S),0.833505,-77.650076,19.25109,-33.497147,53.804195,Cambio de línea agresivo izquierda,23.534069,2022-02-02 20:19:34.468000-05:00,-0.000387,-0.000648,-0.00069
4,37298,0.004772,-0.24007,9.715476,0,17,Nissan March (Xiaomi Redmi Note 9S),0.833505,-77.650076,19.173706,-33.474026,53.780318,Cambio de línea agresivo izquierda,23.534069,2022-02-02 20:19:34.518000-05:00,-3.4e-05,8.7e-05,-0.000573


### Select the interest variables

In [188]:
# Transform pandas.Series to numpy.Array

#variables = df[["speed","accX","accY","magX","magY","magZ",
#                "velAngX","velAngY","velAngZ","timestamp",
#                "eventClass"]].to_numpy()

select_v = ["accX","velAngZ", "eventClass"]
variables_train = df_train[select_v].to_numpy()
print("Variables Train shape", variables_train.shape)

variables_test = df_test[select_v].to_numpy()
print("Variables Test shape", variables_test.shape)

Variables Train shape (380, 3)
Variables Test shape (478, 3)


### Make a sliding window

The sliding window have this shape:

$ rows = m - ws + 1 \\  columns = ws \\ pages = d$

![](https://es.mathworks.com/help/matlab/math/nddemo_02_es.gif)

Where:
- ***m***:  Is the data size,
- ***ws***: Is the window size
- ***d***:  Is the dimensions of the data (measured variables)


#### Naive alternative

In [189]:
# Select axis 0 to slide over all rows of the data
def sliding_windows(variables):
      ws = 20
      sld_window = np.lib.stride_tricks.sliding_window_view(variables, ws, axis=0)#[::1, :] # Add this for define window step

      # Sample of sliding window 
      print("Sample of sliding window")
      print("sliding window shape: ",sld_window.shape)
      print(sld_window[0:2,:,:])

      # Processing the sliding window
      measured_variables = sld_window[:,0:2,:]   #TODO: obtener el numero de dimensiones "d" no harcodear
      event_variables = sld_window[:,2,:]

      # Mean, median, std, max and min value for measured variables
      mean = measured_variables.mean(axis=2)
      median = np.median(measured_variables, axis=2)
      std = measured_variables.std(axis=2)
      max_val = measured_variables.max(axis=2)
      min_val = measured_variables.min(axis=2)

      # Tendency for measured variables
      divider = np.array([mean[0],*mean[:-1]])
      tendency = mean/np.where(divider == 0, 1, divider)
      tendency = np.nan_to_num(tendency, nan=0, posinf=60, neginf=0)

      # Event class for measured variables
      event_mode = st.mode(event_variables, axis=1)[0]

      print("\nSample of sliding window process")
      print("Processing slide window shape: " +
            "mean={}, median={}, std={}, max_val={}, min_val={}, tendency={}, event_mode={}"
            .format(mean.shape, median.shape, std.shape, max_val.shape, min_val.shape, tendency.shape, event_mode.shape))
      print("Mean:\n {} \nMedian:\n {} \nStandard deviation:\n {} \nMax_val:\n {} \nMin_val\n {} \ntendency:\n {} \nEvent class Mode:\n {}"
            .format(mean[41:44,:], median[0:2,:], std[0:2,:], max_val[0:2,:], min_val[0:2,:], tendency[41:44], event_mode[0:2]))

      # Concatenate processing data
      input_features = np.concatenate((mean, median, std, max_val, min_val, tendency), axis=1)
      labels = event_mode.reshape(event_mode.shape[0])
      print(input_features)
      print(labels)

      return (input_features, labels)


In [190]:
X_train, y_train = sliding_windows(variables_train)
X_test, y_test = sliding_windows(variables_test)

Sample of sliding window
sliding window shape:  (361, 3, 20)
[[[ 6.87071848e-03  2.14882876e-02  6.40542128e-03  1.59477141e-02
    4.77156290e-03  9.66452388e-03  2.13948053e-02  8.65991297e-03
    3.08399689e-02  6.09524351e-03  2.45098531e-02  1.75197473e-02
    6.28788939e-03  3.14781521e-02  1.75089602e-02  1.10388460e-02
    2.67462771e-02  3.23035057e-03  2.52273486e-02  1.57740870e-02]
  [ 1.81710340e-04  5.23058009e-05 -3.79458691e-04 -6.89833466e-04
   -5.73485384e-04 -2.47615585e-04 -1.62907631e-04 -3.37971539e-04
   -8.78534672e-05 -1.21378521e-04 -1.30093137e-04  1.63266335e-04
   -5.61015885e-05 -1.13124830e-04  1.67677134e-04  2.40669748e-04
    6.25605097e-05 -8.22794019e-05 -1.19929579e-04 -4.25341272e-04]
  [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
    0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00

#### Input features

In [191]:
print("Input features shape:", X_train.shape)
print(X_train[0:5])

print("train features shape:", X_test.shape)
print(X_test[0:5])

print("Near crash event train data size: ", np.where(y_train == 1)[0].shape)
print("No near crash event testdata size: ", np.where(y_train == 0)[0].shape)

print("Near crash event test data size: ", np.where(y_test == 1)[0].shape)
print("No near crash event test data size: ", np.where(y_test == 0)[0].shape)

Input features shape: (361, 12)
[[ 1.55729835e-02 -1.32959211e-04  1.58609005e-02 -1.16527205e-04
   8.85623955e-03  2.45663237e-04  3.14781521e-02  2.40669748e-04
   3.23035057e-03 -6.89833466e-04  1.00000000e+00  1.00000000e+00]
 [ 1.52603102e-02 -1.47572961e-04  1.58609005e-02 -1.16527205e-04
   9.25918131e-03  2.34970410e-04  3.14781521e-02  2.40669748e-04
   6.17252174e-04 -6.89833466e-04  9.79922067e-01  1.10991152e+00]
 [ 1.46114725e-02 -1.51625278e-04  1.34064665e-02 -1.16527205e-04
   9.25469406e-03  2.32170527e-04  3.14781521e-02  2.40669748e-04
   6.17252174e-04 -6.89833466e-04  9.57482012e-01  1.02745976e+00]
 [ 1.39943868e-02 -1.23171730e-04  1.34064665e-02 -1.11844741e-04
   1.01494895e-02  2.37318982e-04  3.14781521e-02  2.40669748e-04
  -5.93629285e-03 -6.89833466e-04  9.57767042e-01  8.12342979e-01]
 [ 1.38233525e-02 -1.15778112e-04  1.17829368e-02 -1.11844741e-04
   1.01439522e-02  2.21313317e-04  3.14781521e-02  2.40669748e-04
  -5.93629285e-03 -5.73485384e-04  9.877

#### Machine Learning 

In [195]:
from sklearn import svm
from sklearn.model_selection import train_test_split

clf = svm.SVC(C=1, kernel='rbf', random_state=0)
clf.fit(X_train, y_train)

print(clf.score(X_test, y_test))

#np.where(clf.predict(X_test) == 1)[0].shape
clf.predict(X_test)

0.9281045751633987


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.

In [193]:
# Tendency
'''
def trend(arr):
    """This function calculates the trend with the unique loopback option for this type of calculation

    This tendency is not make for frame form, for this reason the equation is this: register(i)/last_register
    where "i" is the position of a register

    Note: I calculate the first tendency how last_register/last_register = 1, 
    this data appear in all first values of all rows in the slide window
    """
    t = [arr[-i]/arr[-1] for i in range(1, len(arr)+1)]
    #mean[::-1]/mean[-1]
    #for i in range(1,len(array)+1):
    #    t.append(array[-i]/array[-1])
    return np.array(t)

variables_trend = np.apply_along_axis(trend, axis=2, arr=measured_variables)
variables_trend = variables_trend.reshape(variables_trend.shape[0], variables_trend.shape[1]*ws)   # Son 20 para la speed y 20 para la accY
print("\nSample of sliding window process for tendency")
print("Tendency slide window shape: ", variables_trend.shape)
print("Tendency:\n {}".format(variables_trend[0:2,:]))
'''

'\ndef trend(arr):\n    """This function calculates the trend with the unique loopback option for this type of calculation\n\n    This tendency is not make for frame form, for this reason the equation is this: register(i)/last_register\n    where "i" is the position of a register\n\n    Note: I calculate the first tendency how last_register/last_register = 1, \n    this data appear in all first values of all rows in the slide window\n    """\n    t = [arr[-i]/arr[-1] for i in range(1, len(arr)+1)]\n    #mean[::-1]/mean[-1]\n    #for i in range(1,len(array)+1):\n    #    t.append(array[-i]/array[-1])\n    return np.array(t)\n\nvariables_trend = np.apply_along_axis(trend, axis=2, arr=measured_variables)\nvariables_trend = variables_trend.reshape(variables_trend.shape[0], variables_trend.shape[1]*ws)   # Son 20 para la speed y 20 para la accY\nprint("\nSample of sliding window process for tendency")\nprint("Tendency slide window shape: ", variables_trend.shape)\nprint("Tendency:\n {}".for

In [194]:
# tendency
'''
def means_loopback(array):
    """This functions calculate the means with loop back option
    """
    means = []
    for i in range(1,len(array)+1):
        means.append(array[-i:].mean())
    return np.array(means)

print("data to get loop back mean: \n", measured_variables[0:2,:,:])
# mean loopback test
mean_lb = np.apply_along_axis(means_loopback, 2, measured_variables[0:2,:,:])
print("Mean loopback: \n",mean_lb)
'''

'\ndef means_loopback(array):\n    """This functions calculate the means with loop back option\n    """\n    means = []\n    for i in range(1,len(array)+1):\n        means.append(array[-i:].mean())\n    return np.array(means)\n\nprint("data to get loop back mean: \n", measured_variables[0:2,:,:])\n# mean loopback test\nmean_lb = np.apply_along_axis(means_loopback, 2, measured_variables[0:2,:,:])\nprint("Mean loopback: \n",mean_lb)\n'