<font size="+0.5">Load libraries and data.</font>

In [None]:
# Ignore warnings 
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)

In [None]:
import lightgbm as lgb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

from scipy.signal import savgol_filter
from sklearn.utils import shuffle

In [None]:
train = pd.read_csv('data/train_accelerometer.csv')
val = pd.read_csv('data/val_accelerometer.csv')

# <center> Data filtering

### <center> Accelerometer data filtering

<font size="+0.5">Look at both curves: initial and filtered and find those <i><b>window lenght</b></i> which filtered curva describe data in the best way.</font>

In [None]:
window_lengths = np.arange(11, 151, 10)
polyorder = 3

In [None]:
data_to_transform = train.copy()

In [None]:
for window_length in window_lengths:
    
    data_to_transform['x_accelerometer_fil'] = savgol_filter(data_to_transform['x_accelerometer'].values, window_length, polyorder)
    data_to_transform['y_accelerometer_fil'] = savgol_filter(data_to_transform['y_accelerometer'].values, window_length, polyorder)
    data_to_transform['z_accelerometer_fil'] = savgol_filter(data_to_transform['z_accelerometer'].values, window_length, polyorder)
    
    fig, ax = plt.subplots(1, 3, figsize=(10, 5))
    
    ax[0].plot(data_to_transform[:500]['x_accelerometer'].values, label='x accelerometer')
    ax[0].plot(data_to_transform[:500]['x_accelerometer_fil'].values, label='x accelerometer filtered')
    ax[0].legend();
    
    ax[1].plot(data_to_transform[:500]['y_accelerometer'].values, label='y accelerometer')
    ax[1].plot(data_to_transform[:500]['y_accelerometer_fil'].values, label='y accelerometer filtered')
    ax[1].legend();
    
    ax[2].plot(data_to_transform[:500]['z_accelerometer'].values, label='z accelerometer')
    ax[2].plot(data_to_transform[:500]['z_accelerometer_fil'].values, label='z accelerometer filtered')
    plt.suptitle(f"Window length: {window_length}", fontsize=20)
    

<font size="+0.5">Look at both curves: initial and filtered and find those <i><b>polyorder</b></i> which filtered curve describe data in the best way.</font>



In [None]:
polyorders = np.arange(2, 15, 1)
window_length = 51

In [None]:
for polyorder in polyorders:
    
    data_to_transform['x_accelerometer_fil'] = savgol_filter(data_to_transform['x_accelerometer'].values, window_length, polyorder)
    data_to_transform['y_accelerometer_fil'] = savgol_filter(data_to_transform['y_accelerometer'].values, window_length, polyorder)
    data_to_transform['z_accelerometer_fil'] = savgol_filter(data_to_transform['z_accelerometer'].values, window_length, polyorder)
    
    fig, ax = plt.subplots(1, 3, figsize=(10, 5))
    
    ax[0].plot(data_to_transform[:500]['x_accelerometer'].values, label='x accelerometer')
    ax[0].plot(data_to_transform[:500]['x_accelerometer_fil'].values, label='x accelerometer filtered')
    ax[0].legend();
    
    ax[1].plot(data_to_transform[:500]['y_accelerometer'].values, label='y accelerometer')
    ax[1].plot(data_to_transform[:500]['y_accelerometer_fil'].values, label='y accelerometer filtered')
    ax[1].legend();
    
    ax[2].plot(data_to_transform[:500]['z_accelerometer'].values, label='z accelerometer')
    ax[2].plot(data_to_transform[:500]['z_accelerometer_fil'].values, label='z accelerometer filtered')
    plt.suptitle(f"Window length: {window_length}", fontsize=20)
    

In [None]:
polyorder = 5

<font size="+0.5">Use selected parameters for filtering accelerometer data.</font>

In [None]:
train['x_accelerometer'] = savgol_filter(train['x_accelerometer'].values, window_length, polyorder)
train['y_accelerometer'] = savgol_filter(train['y_accelerometer'].values, window_length, polyorder)
train['z_accelerometer'] = savgol_filter(train['z_accelerometer'].values, window_length, polyorder)

val['x_accelerometer'] = savgol_filter(val['x_accelerometer'].values, window_length, polyorder)
val['y_accelerometer'] = savgol_filter(val['y_accelerometer'].values, window_length, polyorder)
val['z_accelerometer'] = savgol_filter(val['z_accelerometer'].values, window_length, polyorder)

In [None]:
data_to_transform['x_accelerometer_fil'] = savgol_filter(data_to_transform['x_accelerometer'].values, window_length, polyorder)
data_to_transform['y_accelerometer_fil'] = savgol_filter(data_to_transform['y_accelerometer'].values, window_length, polyorder)
data_to_transform['z_accelerometer_fil'] = savgol_filter(data_to_transform['z_accelerometer'].values, window_length, polyorder) 

In [None]:
train.head()

<font size="+0.5">Plots for showing difference in the initial and filtered values.</font>

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
    
ax[0].plot(data_to_transform[:500]['x_accelerometer'].values, label='x accelerometer')
ax[0].plot(data_to_transform[:500]['x_accelerometer_fil'].values, label='x accelerometer filtered')
ax[0].legend();

ax[1].plot(data_to_transform[:500]['y_accelerometer'].values, label='y accelerometer')
ax[1].plot(data_to_transform[:500]['y_accelerometer_fil'].values, label='y accelerometer filtered')
ax[1].legend();

ax[2].plot(data_to_transform[:500]['z_accelerometer'].values, label='z accelerometer')
ax[2].plot(data_to_transform[:500]['z_accelerometer_fil'].values, label='z accelerometer filtered')
ax[2].legend();

plt.show();

In [None]:
train = shuffle(train)
val = shuffle(val)

In [None]:
train.to_csv("data/train_filtered_accelerometer.csv", index=False)
val.to_csv("data/val_filtered_accelerometer.csv", index=False)