In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
import scipy.stats as stats

In [None]:
data = pd.read_csv('/content/processeddataLab.csv')

In [None]:
data.head()

Unnamed: 0.1,Unnamed: 0,user_id,datetime,x,y,z,label
0,0,1,2018-07-25T14:58:46.247+1000,0.612,7.7,0.0,11
1,1,1,2018-07-25T14:55:30.396+1000,5.286,7.7,0.0,10
2,2,1,2018-07-25T14:55:30.402+1000,5.286,7.7,0.0,10
3,3,1,2018-07-25T14:58:46.247+1000,0.612,7.7,0.0,11
4,4,1,2018-07-25T14:55:30.396+1000,5.286,7.7,0.0,10


# New Section

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270789 entries, 0 to 270788
Data columns (total 7 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   Unnamed: 0  270789 non-null  int64  
 1   user_id     270789 non-null  int64  
 2   datetime    270789 non-null  object 
 3   x           270789 non-null  float64
 4   y           270789 non-null  float64
 5   z           270789 non-null  float64
 6   label       270789 non-null  int64  
dtypes: float64(3), int64(3), object(1)
memory usage: 14.5+ MB


In [None]:
data.isnull().sum()

Unnamed: 0    0
user_id       0
datetime      0
x             0
y             0
z             0
label         0
dtype: int64

**Balancing**

In [None]:
cnt = data['label'].value_counts()
mn = min(cnt);
print(mn)

8992


In [None]:
labels = len(cnt) 
balanced_data = pd.DataFrame()
for i in range(1,labels+1):
  balanced_data = balanced_data.append(data[data['label']==i].head(mn).copy())
balanced_data.shape

(107904, 7)

In [None]:
balanced_data['label'].value_counts()

12    8992
11    8992
10    8992
9     8992
8     8992
7     8992
6     8992
5     8992
4     8992
3     8992
2     8992
1     8992
Name: label, dtype: int64

**Standardization**

In [None]:
X = balanced_data[['x','y','z']]
y = balanced_data['label']

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

scaled_X = pd.DataFrame(data=X, columns=['x', 'y', 'z'])
scaled_X['label'] = y.values

scaled_X

Unnamed: 0,x,y,z,label
0,-1.563224,0.522496,-0.136867,1
1,-1.563224,0.522496,-0.136867,1
2,0.747795,0.601846,-0.117374,1
3,0.747795,0.601846,-0.117374,1
4,0.747795,0.601846,-0.117374,1
...,...,...,...,...
107899,-0.089798,0.767897,-0.206457,12
107900,-0.147498,0.767897,-0.206457,12
107901,-0.147498,0.767897,-0.206457,12
107902,-0.205198,0.767897,-0.206457,12


**Framing**

In [None]:
frame_size = 30
hop_size = 10

In [None]:
def get_frames(df, frame_size, hop_size):
    
    N_FEATURES = 3
    
    frames = []
    labels = []
    
    for i in range(0, len(df)-frame_size, hop_size):
        x = df['x'].values[i: i+frame_size]
        y = df['y'].values[i: i+frame_size]
        z = df['z'].values[i: i+frame_size]
        
        label = stats.mode(df['label'][i: i+frame_size])[0][0]
        frames.append([x, y, z])
        labels.append(label)
        
    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)
    
    return frames, labels

In [None]:
Frame_X, y = get_frames(scaled_X, frame_size, hop_size)
Frame_X.shape, y.shape

((10788, 30, 3), (10788,))

**Feature Extraction (Mean, Median, Mode, Variance, Standard Deviation, RMS)**

In [None]:
X = []
N_FEATURES = 6
for i in range(Frame_X.shape[0]):
  mean_X = np.mean(Frame_X[i],axis=0)
  median_X = np.median(Frame_X[i],axis=0)
  mode_X = np.asarray(stats.mode(Frame_X[i])[0][0])
  variance_X = np.var(Frame_X[i],axis=0)
  std_deviation_X = np.std(Frame_X[i],axis=0)
  rms_X = np.sqrt(np.mean(np.square(Frame_X[i]),axis=0))
  comb = np.array([mean_X,median_X,mode_X,variance_X,std_deviation_X,rms_X])
  #print(np.squeeze(comb,axis=0).shape)
  X.append(np.squeeze(comb))
X = np.asarray(X)
X.shape

(10788, 6, 3)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((8630, 6, 3), (2158, 6, 3), (8630,), (2158,))

# RANDOM FOREST

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)
nsamples, nx, ny = X_train.shape
d2_X_train = X_train.reshape((nsamples, nx*ny))

rf.fit(d2_X_train, y_train);

In [None]:
nsamples, nx, ny = X_test.shape
d2_X_test = X_test.reshape((nsamples, nx*ny))
predictions = rf.predict(d2_X_test)

In [None]:
errors = abs (predictions - y_test)

In [None]:
print('Mean Absolute Error: ', round(np.mean(errors),2))

Mean Absolute Error:  1.34


# Performance

In [None]:
#Mean Absolute Percentage Error
mape = 100*(errors/y_test) 

In [None]:
accuracy = 100 - np.mean(mape)
print('Accuracy: ', round(accuracy,2), '%')

Accuracy:  64.76 %


In [None]:
#Score
print('Score: ',rf.score(d2_X_test, y_test)*100,'%')

Score:  71.15791816413237 %


# TRY

In [None]:
from sklearn.ensemble import RandomForestRegressor

nsamples, nx, ny = X_train.shape
d2_X_train = X_train.reshape((nsamples, nx*ny))

nsamples, nx, ny = X_test.shape
d2_X_test = X_test.reshape((nsamples, nx*ny))

In [None]:
for i in range (1):
  rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)

  rf.fit(d2_X_train, y_train);

  predictions = rf.predict(d2_X_test)

  errors = abs (predictions - y_test)
  print('Mean Absolute Error: ', round(np.mean(errors),2))

  #Mean Absolute Percentage Error
  mape = 100*(errors/y_test) 
  accuracy = 100 - np.mean(mape)
  print('Accuracy: ', round(accuracy,2), '%')

  #Score
  print('Score: ',rf.score(d2_X_test, y_test)*100,'%')




Mean Absolute Error:  1.34
Accuracy:  64.76 %
Score:  71.15791816413237 %
