<font size="+0.5">Load libraries and data.</font>

In [None]:
# Ignore warnings 
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)

In [None]:
import lightgbm as lgb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

from scipy.signal import savgol_filter
from sklearn.utils import shuffle

In [None]:
train = pd.read_csv('data/train_accelerometer_gyroscope.csv')
val = pd.read_csv('data/val_accelerometer_gyroscope.csv')

# <center> Data filtering

### <center> Gyroscope data filtering

<font size="+0.5">Look at both curves: initial and filtered and find those <i><b>window lenght</b></i> which filtered curva describe data in the best way.</font>

In [None]:
window_lengths = np.arange(11, 151, 10)
polyorder = 3

In [None]:
data_to_transform = train.copy()

In [None]:
for window_length in window_lengths:
    
    data_to_transform['x_gyroscope_fil'] = savgol_filter(data_to_transform['x_gyroscope'].values, window_length, polyorder)
    data_to_transform['y_gyroscope_fil'] = savgol_filter(data_to_transform['y_gyroscope'].values, window_length, polyorder)
    data_to_transform['z_gyroscope_fil'] = savgol_filter(data_to_transform['z_gyroscope'].values, window_length, polyorder)
    
    fig, ax = plt.subplots(1, 3, figsize=(10, 5))
    
    ax[0].plot(data_to_transform[:500]['x_gyroscope'].values, label='x gyroscope')
    ax[0].plot(data_to_transform[:500]['x_gyroscope_fil'].values, label='x gyroscope filtered')
    ax[0].legend();
    
    ax[1].plot(data_to_transform[:500]['y_gyroscope'].values, label='y gyroscope')
    ax[1].plot(data_to_transform[:500]['y_gyroscope_fil'].values, label='y gyroscope filtered')
    ax[1].legend();
    
    ax[2].plot(data_to_transform[:500]['z_gyroscope'].values, label='z gyroscope')
    ax[2].plot(data_to_transform[:500]['z_gyroscope_fil'].values, label='z gyroscope filtered')
    plt.suptitle(f"Window length: {window_length}", fontsize=20)
    

<font size="+0.5">Look at both curves: initial and filtered and find those <i><b>polyorder</b></i> which filtered curve describe data in the best way.</font>



In [None]:
polyorders = np.arange(2, 15, 1)
window_length = 31

In [None]:
for polyorder in polyorders:
    
    data_to_transform['x_gyroscope_fil'] = savgol_filter(data_to_transform['x_gyroscope'].values, window_length, polyorder)
    data_to_transform['y_gyroscope_fil'] = savgol_filter(data_to_transform['y_gyroscope'].values, window_length, polyorder)
    data_to_transform['z_gyroscope_fil'] = savgol_filter(data_to_transform['z_gyroscope'].values, window_length, polyorder)
    
    fig, ax = plt.subplots(1, 3, figsize=(10, 5))
    
    ax[0].plot(data_to_transform[:500]['x_gyroscope'].values, label='x gyroscope')
    ax[0].plot(data_to_transform[:500]['x_gyroscope_fil'].values, label='x gyroscope filtered')
    ax[0].legend();
    
    ax[1].plot(data_to_transform[:500]['y_gyroscope'].values, label='y gyroscope')
    ax[1].plot(data_to_transform[:500]['y_gyroscope_fil'].values, label='y gyroscope filtered')
    ax[1].legend();
    
    ax[2].plot(data_to_transform[:500]['z_gyroscope'].values, label='z gyroscope')
    ax[2].plot(data_to_transform[:500]['z_gyroscope_fil'].values, label='z gyroscope filtered')
    plt.suptitle(f"Window length: {window_length}", fontsize=20)
    

In [None]:
polyorder = 4

<font size="+0.5">Use selected parameters for filtering accelerometer data.</font>

In [None]:
train['x_gyroscope'] = savgol_filter(train['x_gyroscope'].values, window_length, polyorder)
train['y_gyroscope'] = savgol_filter(train['y_gyroscope'].values, window_length, polyorder)
train['z_gyroscope'] = savgol_filter(train['z_gyroscope'].values, window_length, polyorder)

In [None]:
val['x_gyroscope'] = savgol_filter(val['x_gyroscope'].values, window_length, polyorder)
val['y_gyroscope'] = savgol_filter(val['y_gyroscope'].values, window_length, polyorder)
val['z_gyroscope'] = savgol_filter(val['z_gyroscope'].values, window_length, polyorder)

<font size="+0.5">Plots for showing difference in the initial and filtered values.</font>

In [None]:
data_to_transform['x_gyroscope_fil'] = savgol_filter(data_to_transform['x_gyroscope'].values, window_length, polyorder)
data_to_transform['y_gyroscope_fil'] = savgol_filter(data_to_transform['y_gyroscope'].values, window_length, polyorder)
data_to_transform['z_gyroscope_fil'] = savgol_filter(data_to_transform['z_gyroscope'].values, window_length, polyorder)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
    
ax[0].plot(data_to_transform[:500]['x_gyroscope'].values, label='x gyroscope')
ax[0].plot(data_to_transform[:500]['x_gyroscope_fil'].values, label='x gyroscope filtered')
ax[0].legend();

ax[1].plot(data_to_transform[:500]['y_gyroscope'].values, label='y gyroscope')
ax[1].plot(data_to_transform[:500]['y_gyroscope_fil'].values, label='y gyroscope filtered')
ax[1].legend();

ax[2].plot(data_to_transform[:500]['z_gyroscope'].values, label='z gyroscope')
ax[2].plot(data_to_transform[:500]['z_gyroscope_fil'].values, label='z gyroscope filtered')
ax[2].legend();

plt.show();

In [None]:
train = shuffle(train)
val = shuffle(val)

In [None]:
columns_to_save = ["x_gyroscope", "y_gyroscope", "z_gyroscope", "event"]

In [None]:
train[columns_to_save].to_csv("data/train_filtered_gyroscope.csv", index=False)
val[columns_to_save].to_csv("data/val_filtered_gyroscope.csv", index=False)