In [3]:
import pandas as pd

In [4]:
import numpy as np

In [5]:
import matplotlib.pyplot as plt

In [6]:
from scipy.signal import lfilter

In [7]:
from sklearn.ensemble import RandomForestClassifier

In [8]:
from sklearn.metrics import accuracy_score

In [9]:
### sampling rate, 's' = 52Hz and cut off frequency, 'fc' is assumed as 1Hz
rc = 1/(2*3.141*1)

In [10]:
dt = 1/52.0

In [11]:
### alpha: coefficient for low pass filtering of the raw acceleration signal
alpha = dt/(dt + rc)
beta = 1-alpha

In [12]:
df_var = pd.DataFrame()

In [13]:
### reading 15 activity files 
for j in range(1,16):
    df = pd.read_csv('C:/Users/Downloads/Activity/'+str(j)+'.csv', header = None)
    df.columns = ['id','ax','ay','az','label']
    df = df[df.label != 0]
    df_var = pd.concat([df_var, df])

In [14]:
df_var.index = range(len(df_var.label))

In [15]:
### creation of different features
### window of samples with sample length = 128 records with 50% overlapping(i.e 64 records overlap)
### 1. root mean squared value of acceleration in x,y and z directions
### 2. velocity is calculated in 3 directions; v = u + a(t) ; u is assumed to be 0 at the start of every window, t is 1/52 = 0.019 sec/sample
### 3. Low pass filter is applied with alpha, The DC components of the signal are calculated. 
### 4. AC components are then derived AX_AC = AX - AX_DC
### 5. RMS vectors of AC and DC components are calculated for both velocity and acceleration
### 6. Other features like mean, min, max, min-max, std, skew and kurtosis are calculated for the different features.


fin_df = pd.DataFrame()
dfseq = range(0, len(df_var.label), 64)
for window in range(len(dfseq)): 
    subdf = pd.DataFrame()
    if window+2 < len(dfseq):
        sub = df_var[dfseq[window]:dfseq[window+2]] 
        subdf = pd.concat([sub]) 
        subdf['acc_rms'] = pow((subdf.ax**2) + (subdf.ay**2) + (subdf.az**2), 0.5 )
        subdf['x_at'] = subdf.ax*0.019
        subdf['y_at'] = subdf.ay*0.019  
        subdf['z_at'] = subdf.az*0.019
        subdf['vx'] = subdf.x_at.cumsum()
        subdf['vy'] = subdf.y_at.cumsum()
        subdf['vz'] = subdf.z_at.cumsum()
        subdf['v_rms'] = pow((subdf.vx**2) + (subdf.vy**2) + (subdf.vz**2), 0.5)
        subdf['ax_dc'] = pd.Series(lfilter([alpha],[1,-beta], subdf['ax'].astype(float)), index = range(dfseq[window],dfseq[window+2]))
        subdf['ay_dc'] = pd.Series(lfilter([alpha],[1,-beta], subdf['ay'].astype(float)), index = range(dfseq[window],dfseq[window+2]))
        subdf['az_dc'] = pd.Series(lfilter([alpha],[1,-beta], subdf['az'].astype(float)), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_dc_rms'] = pow((subdf.ax_dc**2) + (subdf.ay_dc**2) + (subdf.az_dc**2), 0.5)
        subdf['ax_ac'] = subdf.ax - subdf.ax_dc
        subdf['ay_ac'] = subdf.ay - subdf.ay_dc
        subdf['az_ac'] = subdf.az - subdf.az_dc
        subdf['acc_ac_rms'] = pow((subdf.ax_ac**2) + (subdf.ay_ac**2) + (subdf.az_ac**2), 0.5)        
        subdf['x_dc_at'] = subdf.ax_dc*0.019
        subdf['y_dc_at'] = subdf.ay_dc*0.019  
        subdf['z_dc_at'] = subdf.az_dc*0.019
        subdf['vx_dc'] = subdf.x_dc_at.cumsum()
        subdf['vy_dc'] = subdf.y_dc_at.cumsum()
        subdf['vz_dc'] = subdf.z_dc_at.cumsum()
        subdf['v_dc_rms'] = pow((subdf.vx_dc**2) + (subdf.vy_dc**2) + (subdf.vz_dc**2), 0.5)        
        subdf['x_ac_at'] = subdf.ax_ac*0.019
        subdf['y_ac_at'] = subdf.ay_ac*0.019  
        subdf['z_ac_at'] = subdf.az_ac*0.019
        subdf['vx_ac'] = subdf.x_ac_at.cumsum()
        subdf['vy_ac'] = subdf.y_ac_at.cumsum()
        subdf['vz_ac'] = subdf.z_ac_at.cumsum()
        subdf['v_ac_rms'] = pow((subdf.vx_ac**2) + (subdf.vy_ac**2) + (subdf.vz_ac**2), 0.5)        
        subdf['acc_rms_mean'] = pd.Series([subdf.acc_rms.mean()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_dc_rms_mean'] = pd.Series([subdf.acc_dc_rms.mean()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_ac_rms_mean'] = pd.Series([subdf.acc_ac_rms.mean()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_rms_mean'] = pd.Series([subdf.v_rms.mean()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_dc_rms_mean'] = pd.Series([subdf.v_dc_rms.mean()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_ac_rms_mean'] = pd.Series([subdf.v_ac_rms.mean()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))        
        subdf['acc_rms_min'] = pd.Series([subdf.acc_rms.min()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_dc_rms_min'] = pd.Series([subdf.acc_dc_rms.min()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_ac_rms_min'] = pd.Series([subdf.acc_ac_rms.min()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_rms_min'] = pd.Series([subdf.v_rms.min()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_dc_rms_min'] = pd.Series([subdf.v_dc_rms.min()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_ac_rms_min'] = pd.Series([subdf.v_ac_rms.min()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))        
        subdf['acc_rms_max'] = pd.Series([subdf.acc_rms.max()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_dc_rms_max'] = pd.Series([subdf.acc_dc_rms.max()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_ac_rms_max'] = pd.Series([subdf.acc_ac_rms.max()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_rms_max'] = pd.Series([subdf.v_rms.max()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_dc_rms_max'] = pd.Series([subdf.v_dc_rms.max()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_ac_rms_max'] = pd.Series([subdf.v_ac_rms.max()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))        
        subdf['acc_rms_ampl'] = subdf['acc_rms_max'] - subdf['acc_rms_min']
        subdf['acc_dc_rms_ampl'] = subdf['acc_dc_rms_max'] - subdf['acc_dc_rms_min']
        subdf['acc_ac_rms_ampl'] = subdf['acc_ac_rms_max'] - subdf['acc_ac_rms_min']
        subdf['v_rms_ampl'] = subdf['v_rms_max'] - subdf['v_rms_min']
        subdf['v_dc_rms_ampl'] = subdf['v_dc_rms_max'] - subdf['v_dc_rms_min']
        subdf['v_ac_rms_ampl'] = subdf['v_ac_rms_max'] - subdf['v_ac_rms_min']        
        subdf['acc_rms_sd'] = pd.Series([subdf.acc_rms.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_dc_rms_sd'] = pd.Series([subdf.acc_dc_rms.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_ac_rms_sd'] = pd.Series([subdf.acc_ac_rms.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_rms_sd'] = pd.Series([subdf.v_rms.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_dc_rms_sd'] = pd.Series([subdf.v_dc_rms.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_ac_rms_sd'] = pd.Series([subdf.v_ac_rms.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))        
        subdf['acc_rms_skew'] = pd.Series([subdf.acc_rms.skew()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_dc_rms_skew'] = pd.Series([subdf.acc_dc_rms.skew()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_ac_rms_skew'] = pd.Series([subdf.acc_ac_rms.skew()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_rms_skew'] = pd.Series([subdf.v_rms.skew()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_dc_rms_skew'] = pd.Series([subdf.v_dc_rms.skew()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_ac_rms_skew'] = pd.Series([subdf.v_ac_rms.skew()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))        
        subdf['acc_rms_kurt'] = pd.Series([subdf.acc_rms.kurtosis()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_dc_rms_kurt'] = pd.Series([subdf.acc_dc_rms.kurtosis()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['acc_ac_rms_kurt'] = pd.Series([subdf.acc_ac_rms.kurtosis()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_rms_kurt'] = pd.Series([subdf.v_rms.kurtosis()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_dc_rms_kurt'] = pd.Series([subdf.v_dc_rms.kurtosis()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['v_ac_rms_kurt'] = pd.Series([subdf.v_ac_rms.kurtosis()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))        
        subdf['ax_std'] = pd.Series([subdf.ax.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['ay_std'] = pd.Series([subdf.ay.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['az_std'] = pd.Series([subdf.az.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['ax_dc_std'] = pd.Series([subdf.ax_dc.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['ay_dc_std'] = pd.Series([subdf.ay_dc.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['az_dc_std'] = pd.Series([subdf.az_dc.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['ax_ac_std'] = pd.Series([subdf.ax_ac.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['ay_ac_std'] = pd.Series([subdf.ay_ac.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['az_ac_std'] = pd.Series([subdf.az_ac.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))        
        subdf['vx_std'] = pd.Series([subdf.vx.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['vy_std'] = pd.Series([subdf.vy.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['vz_std'] = pd.Series([subdf.vz.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['vx_dc_std'] = pd.Series([subdf.vx_dc.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['vy_dc_std'] = pd.Series([subdf.vy_dc.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['vz_dc_std'] = pd.Series([subdf.vz_dc.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['vx_ac_std'] = pd.Series([subdf.vx_ac.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['vy_ac_std'] = pd.Series([subdf.vy_ac.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
        subdf['vz_ac_std'] = pd.Series([subdf.vz_ac.std()]* len(subdf), index = range(dfseq[window],dfseq[window+2]))
    else:        
        sub = df_var[dfseq[window]:]
        subdf = pd.concat([sub])
        subdf['acc_rms'] = pow((subdf.ax**2) + (subdf.ay**2) + (subdf.az**2), 0.5 )
        subdf['x_at'] = subdf.ax*0.019
        subdf['y_at'] = subdf.ay*0.019  
        subdf['z_at'] = subdf.az*0.019
        subdf['vx'] = subdf.x_at.cumsum()
        subdf['vy'] = subdf.y_at.cumsum()
        subdf['vz'] = subdf.z_at.cumsum()
        subdf['v_rms'] = pow((subdf.vx**2) + (subdf.vy**2) + (subdf.vz**2), 0.5)
        subdf['ax_dc'] = pd.Series(lfilter([alpha],[1,-beta], subdf['ax'].astype(float)), index = range(dfseq[window],len(df_var.label)))
        subdf['ay_dc'] = pd.Series(lfilter([alpha],[1,-beta], subdf['ay'].astype(float)), index = range(dfseq[window],len(df_var.label)))
        subdf['az_dc'] = pd.Series(lfilter([alpha],[1,-beta], subdf['az'].astype(float)), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_dc_rms'] = pow((subdf.ax_dc**2) + (subdf.ay_dc**2) + (subdf.az_dc**2), 0.5)
        subdf['ax_ac'] = subdf.ax - subdf.ax_dc
        subdf['ay_ac'] = subdf.ay - subdf.ay_dc
        subdf['az_ac'] = subdf.az - subdf.az_dc
        subdf['acc_ac_rms'] = pow((subdf.ax_ac**2) + (subdf.ay_ac**2) + (subdf.az_ac**2), 0.5)
        subdf['x_dc_at'] = subdf.ax_dc*0.019
        subdf['y_dc_at'] = subdf.ay_dc*0.019  
        subdf['z_dc_at'] = subdf.az_dc*0.019
        subdf['vx_dc'] = subdf.x_dc_at.cumsum()
        subdf['vy_dc'] = subdf.y_dc_at.cumsum()
        subdf['vz_dc'] = subdf.z_dc_at.cumsum()
        subdf['v_dc_rms'] = pow((subdf.vx_dc**2) + (subdf.vy_dc**2) + (subdf.vz_dc**2), 0.5)        
        subdf['x_ac_at'] = subdf.ax_ac*0.019
        subdf['y_ac_at'] = subdf.ay_ac*0.019  
        subdf['z_ac_at'] = subdf.az_ac*0.019
        subdf['vx_ac'] = subdf.x_ac_at.cumsum()
        subdf['vy_ac'] = subdf.y_ac_at.cumsum()
        subdf['vz_ac'] = subdf.z_ac_at.cumsum()
        subdf['v_ac_rms'] = pow((subdf.vx_ac**2) + (subdf.vy_ac**2) + (subdf.vz_ac**2), 0.5)
        subdf['acc_rms_mean'] = pd.Series([subdf.acc_rms.mean()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_dc_rms_mean'] = pd.Series([subdf.acc_dc_rms.mean()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_ac_rms_mean'] = pd.Series([subdf.acc_ac_rms.mean()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_rms_mean'] = pd.Series([subdf.v_rms.mean()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_dc_rms_mean'] = pd.Series([subdf.v_dc_rms.mean()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_ac_rms_mean'] = pd.Series([subdf.v_ac_rms.mean()]* len(subdf), index = range(dfseq[window],len(df_var.label)))        
        subdf['acc_rms_min'] = pd.Series([subdf.acc_rms.min()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_dc_rms_min'] = pd.Series([subdf.acc_dc_rms.min()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_ac_rms_min'] = pd.Series([subdf.acc_ac_rms.min()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_rms_min'] = pd.Series([subdf.v_rms.min()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_dc_rms_min'] = pd.Series([subdf.v_dc_rms.min()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_ac_rms_min'] = pd.Series([subdf.v_ac_rms.min()]* len(subdf), index = range(dfseq[window],len(df_var.label)))        
        subdf['acc_rms_max'] = pd.Series([subdf.acc_rms.max()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_dc_rms_max'] = pd.Series([subdf.acc_dc_rms.max()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_ac_rms_max'] = pd.Series([subdf.acc_ac_rms.max()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_rms_max'] = pd.Series([subdf.v_rms.max()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_dc_rms_max'] = pd.Series([subdf.v_dc_rms.max()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_ac_rms_max'] = pd.Series([subdf.v_ac_rms.max()]* len(subdf), index = range(dfseq[window],len(df_var.label)))        
        subdf['acc_rms_ampl'] = subdf['acc_rms_max'] - subdf['acc_rms_min']
        subdf['acc_dc_rms_ampl'] = subdf['acc_dc_rms_max'] - subdf['acc_dc_rms_min']
        subdf['acc_ac_rms_ampl'] = subdf['acc_ac_rms_max'] - subdf['acc_ac_rms_min']
        subdf['v_rms_ampl'] = subdf['v_rms_max'] - subdf['v_rms_min']
        subdf['v_dc_rms_ampl'] = subdf['v_dc_rms_max'] - subdf['v_dc_rms_min']
        subdf['v_ac_rms_ampl'] = subdf['v_ac_rms_max'] - subdf['v_ac_rms_min']        
        subdf['acc_rms_sd'] = pd.Series([subdf.acc_rms.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_dc_rms_sd'] = pd.Series([subdf.acc_dc_rms.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_ac_rms_sd'] = pd.Series([subdf.acc_ac_rms.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_rms_sd'] = pd.Series([subdf.v_rms.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_dc_rms_sd'] = pd.Series([subdf.v_dc_rms.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_ac_rms_sd'] = pd.Series([subdf.v_ac_rms.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))        
        subdf['acc_rms_skew'] = pd.Series([subdf.acc_rms.skew()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_dc_rms_skew'] = pd.Series([subdf.acc_dc_rms.skew()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_ac_rms_skew'] = pd.Series([subdf.acc_ac_rms.skew()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_rms_skew'] = pd.Series([subdf.v_rms.skew()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_dc_rms_skew'] = pd.Series([subdf.v_dc_rms.skew()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_ac_rms_skew'] = pd.Series([subdf.v_ac_rms.skew()]* len(subdf), index = range(dfseq[window],len(df_var.label)))        
        subdf['acc_rms_kurt'] = pd.Series([subdf.acc_rms.kurtosis()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_dc_rms_kurt'] = pd.Series([subdf.acc_dc_rms.kurtosis()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['acc_ac_rms_kurt'] = pd.Series([subdf.acc_ac_rms.kurtosis()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_rms_kurt'] = pd.Series([subdf.v_rms.kurtosis()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_dc_rms_kurt'] = pd.Series([subdf.v_dc_rms.kurtosis()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['v_ac_rms_kurt'] = pd.Series([subdf.v_ac_rms.kurtosis()]* len(subdf), index = range(dfseq[window],len(df_var.label)))        
        subdf['ax_std'] = pd.Series([subdf.ax.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['ay_std'] = pd.Series([subdf.ay.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['az_std'] = pd.Series([subdf.az.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['ax_dc_std'] = pd.Series([subdf.ax_dc.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['ay_dc_std'] = pd.Series([subdf.ay_dc.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['az_dc_std'] = pd.Series([subdf.az_dc.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['ax_ac_std'] = pd.Series([subdf.ax_ac.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['ay_ac_std'] = pd.Series([subdf.ay_ac.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['az_ac_std'] = pd.Series([subdf.az_ac.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))        
        subdf['vx_std'] = pd.Series([subdf.vx.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['vy_std'] = pd.Series([subdf.vy.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['vz_std'] = pd.Series([subdf.vz.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['vx_dc_std'] = pd.Series([subdf.vx_dc.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['vy_dc_std'] = pd.Series([subdf.vy_dc.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['vz_dc_std'] = pd.Series([subdf.vz_dc.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['vx_ac_std'] = pd.Series([subdf.vx_ac.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['vy_ac_std'] = pd.Series([subdf.vy_ac.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
        subdf['vz_ac_std'] = pd.Series([subdf.vz_ac.std()]* len(subdf), index = range(dfseq[window],len(df_var.label)))
    fin_df = pd.concat([fin_df, subdf])

fin_df = fin_df.drop(['id','x_at','y_at','z_at','x_dc_at','y_dc_at','z_dc_at','x_ac_at','y_ac_at','z_ac_at'], axis =1)    
fin_df.index = range(len(fin_df))

In [31]:
### sample the dataset into training and testing. taking records from start to 75% into training set and the rest into testing set
samplelimit =  int(0.75*len(fin_df))

In [32]:
train = fin_df[0:samplelimit]
target = fin_df.label
train = fin_df.drop(['label'], axis = 1)

In [33]:
test = fin_df[samplelimit:]
test_target = test.label
test = test.drop(['label'], axis = 1)

In [24]:
clf = RandomForestClassifier(n_estimators=100)

In [25]:
clf.fit(train, target)


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [41]:
pred = clf.predict(test)

In [42]:
### testing on the same training set with the trained classifier ; accuracy is 100%
pd.crosstab(test_target, pred, rownames = ['actual'], colnames=['pred'])

pred,1,2,3,4,5,6,7
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,67290,0,0,0,0,0,0
2,0,1856,0,0,0,0,0
3,0,0,22358,0,0,0,0
4,0,0,0,53720,0,0,0
5,0,0,0,0,6382,0,0
6,0,0,0,0,0,5834,0
7,0,0,0,0,0,0,167496


In [43]:
accuracy_score(test_target, pred)


1.0

In [44]:
imp = clf.feature_importances_

In [45]:
imp = pd.Series(imp, index = train.columns)

In [46]:
imp.sort(ascending = False)

In [47]:
plt.figure()

<matplotlib.figure.Figure at 0x199f29b0>

In [48]:
plt.bar(range(len(imp)), imp,color = 'r')

<Container object of 84 artists>

In [49]:
plt.show()

In [50]:
imp[0:10]

acc_rms_max        0.076892
vx_std             0.063014
vx_dc_std          0.062395
ay_std             0.060530
acc_rms_sd         0.049911
acc_ac_rms_sd      0.048791
acc_rms_min        0.042725
v_rms_ampl         0.036867
acc_ac_rms_mean    0.034734
az_std             0.034210
dtype: float64