In [1]:
import time
from IPython.display import clear_output
import numpy    as np
import pandas   as pd
import seaborn  as sns
import matplotlib.pyplot as plt
import sklearn  as skl
from numpy import asarray

from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline      
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, balanced_accuracy_score, plot_confusion_matrix
from sklearn import set_config

set_config(display='diagram') # Useful for display the pipeline
from sklearn.tree          import DecisionTreeClassifier
from sklearn.ensemble      import RandomForestClassifier
from sklearn.ensemble      import ExtraTreesClassifier
from sklearn.ensemble      import AdaBoostClassifier
from sklearn.ensemble      import GradientBoostingClassifier
from sklearn.experimental  import enable_hist_gradient_boosting # Necesary for HistGradientBoostingClassifier
from sklearn.ensemble      import HistGradientBoostingClassifier
from xgboost               import XGBClassifier
from lightgbm              import LGBMClassifier
from catboost              import CatBoostClassifier
from sklearn.svm           import SVC

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [2]:
# Getting the dataset
data = pd.read_csv(r'C:\Users\rnr31\Documents\GitHub\apple_watch_tracker\dataset_5sec.csv')
data

Unnamed: 0.1,Unnamed: 0,id,time,activityrecognition#0,activityrecognition#1,android.sensor.accelerometer#mean,android.sensor.accelerometer#min,android.sensor.accelerometer#max,android.sensor.accelerometer#std,android.sensor.game_rotation_vector#mean,...,sound#mean,sound#min,sound#max,sound#std,speed#mean,speed#min,speed#max,speed#std,target,user
0,0,16170,78.0,,100.0,9.811476,9.758895,9.849411,0.014626,0.029340,...,,,,,0.000000,0.000000,0.000000,0.000000,Still,U12
1,1,15871,145.0,,100.0,9.939207,7.707437,17.146631,1.775944,0.999925,...,89.200210,89.065143,89.335277,0.191013,16.539349,16.539349,16.539349,0.628595,Car,U12
2,2,16811,150.0,,100.0,9.827178,9.804817,9.849262,0.011199,0.665215,...,,,,,0.000000,0.000000,0.000000,0.000000,Still,U12
3,3,15831,105.0,,77.0,9.673039,7.659674,12.304298,0.862553,0.996221,...,87.470377,87.470377,87.470377,2.284186,17.739895,17.739895,17.739895,0.628595,Car,U12
4,4,876,77.0,,100.0,9.993466,8.965621,10.891645,0.504117,0.563792,...,89.770732,89.770732,89.770732,0.006389,9.000000,9.000000,9.000000,,Car,U1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5888,5888,14125,4.0,,100.0,10.066610,4.564648,13.414389,3.373972,0.986319,...,,,,,,,,,Walking,U2
5889,5889,19582,32.0,,10.0,9.964271,8.768007,11.454213,0.665529,0.648602,...,,,,,31.960054,31.960054,31.960054,,Train,U3
5890,5890,16178,86.0,,100.0,9.812893,9.772881,9.862587,0.017633,0.024392,...,,,,,0.000000,0.000000,0.000000,0.000000,Still,U12
5891,5891,10973,199.0,,100.0,9.831144,9.807347,9.874450,0.013485,0.481212,...,,,,,1.090415,1.090415,1.090415,,Still,U10


In [3]:
data.drop(['Unnamed: 0','id', 'activityrecognition#0','user'],axis=1, inplace=True)
data_col = ['time','activityrecognition_1']
for i in data.columns[2:58]:
    b = i.split('.')[2].split('#')
    data_col.append(f'{b[0]}_{b[1]}')

for i in data.columns[58:66]:
    b = i.split('#')
    data_col.append(f'{b[0]}_{b[1]}')

data_col.append('target')
data = pd.DataFrame.from_records(data.values)
data.columns = data_col

data.head()


Unnamed: 0,time,activityrecognition_1,accelerometer_mean,accelerometer_min,accelerometer_max,accelerometer_std,game_rotation_vector_mean,game_rotation_vector_min,game_rotation_vector_max,game_rotation_vector_std,...,step_counter_std,sound_mean,sound_min,sound_max,sound_std,speed_mean,speed_min,speed_max,speed_std,target
0,78.0,100.0,9.811476,9.758895,9.849411,0.014626,0.02934,0.029014,0.029526,0.000119,...,,,,,,0.0,0.0,0.0,0.0,Still
1,145.0,100.0,9.939207,7.707437,17.146631,1.775944,0.999925,0.999903,0.999946,3e-05,...,,89.20021,89.065143,89.335277,0.191013,16.539349,16.539349,16.539349,0.628595,Car
2,150.0,100.0,9.827178,9.804817,9.849262,0.011199,0.665215,0.665213,0.665218,4e-06,...,,,,,,0.0,0.0,0.0,0.0,Still
3,105.0,77.0,9.673039,7.659674,12.304298,0.862553,0.996221,0.993781,0.998661,0.003451,...,,87.470377,87.470377,87.470377,2.284186,17.739895,17.739895,17.739895,0.628595,Car
4,77.0,100.0,9.993466,8.965621,10.891645,0.504117,0.563792,0.521799,0.672664,0.064792,...,,89.770732,89.770732,89.770732,0.006389,9.0,9.0,9.0,,Car


In [4]:
oe = OrdinalEncoder()
ct = asarray(data['target'])

data['target'] = oe.fit_transform(ct.reshape(-1,1))
data.head()

Unnamed: 0,time,activityrecognition_1,accelerometer_mean,accelerometer_min,accelerometer_max,accelerometer_std,game_rotation_vector_mean,game_rotation_vector_min,game_rotation_vector_max,game_rotation_vector_std,...,step_counter_std,sound_mean,sound_min,sound_max,sound_std,speed_mean,speed_min,speed_max,speed_std,target
0,78.0,100.0,9.811476,9.758895,9.849411,0.014626,0.02934,0.029014,0.029526,0.000119,...,,,,,,0.0,0.0,0.0,0.0,2.0
1,145.0,100.0,9.939207,7.707437,17.146631,1.775944,0.999925,0.999903,0.999946,3e-05,...,,89.20021,89.065143,89.335277,0.191013,16.539349,16.539349,16.539349,0.628595,1.0
2,150.0,100.0,9.827178,9.804817,9.849262,0.011199,0.665215,0.665213,0.665218,4e-06,...,,,,,,0.0,0.0,0.0,0.0,2.0
3,105.0,77.0,9.673039,7.659674,12.304298,0.862553,0.996221,0.993781,0.998661,0.003451,...,,87.470377,87.470377,87.470377,2.284186,17.739895,17.739895,17.739895,0.628595,1.0
4,77.0,100.0,9.993466,8.965621,10.891645,0.504117,0.563792,0.521799,0.672664,0.064792,...,,89.770732,89.770732,89.770732,0.006389,9.0,9.0,9.0,,1.0


In [5]:
x = data.drop(['target'], axis=1)
y = data['target']


In [6]:
x= x.fillna(0)
x

Unnamed: 0,time,activityrecognition_1,accelerometer_mean,accelerometer_min,accelerometer_max,accelerometer_std,game_rotation_vector_mean,game_rotation_vector_min,game_rotation_vector_max,game_rotation_vector_std,...,step_counter_max,step_counter_std,sound_mean,sound_min,sound_max,sound_std,speed_mean,speed_min,speed_max,speed_std
0,78.0,100.0,9.811476,9.758895,9.849411,0.014626,0.029340,0.029014,0.029526,0.000119,...,28966.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,145.0,100.0,9.939207,7.707437,17.146631,1.775944,0.999925,0.999903,0.999946,0.000030,...,0.0,0.0,89.200210,89.065143,89.335277,0.191013,16.539349,16.539349,16.539349,0.628595
2,150.0,100.0,9.827178,9.804817,9.849262,0.011199,0.665215,0.665213,0.665218,0.000004,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,105.0,77.0,9.673039,7.659674,12.304298,0.862553,0.996221,0.993781,0.998661,0.003451,...,0.0,0.0,87.470377,87.470377,87.470377,2.284186,17.739895,17.739895,17.739895,0.628595
4,77.0,100.0,9.993466,8.965621,10.891645,0.504117,0.563792,0.521799,0.672664,0.064792,...,0.0,0.0,89.770732,89.770732,89.770732,0.006389,9.000000,9.000000,9.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5888,4.0,100.0,10.066610,4.564648,13.414389,3.373972,0.986319,0.974523,0.992084,0.007186,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5889,32.0,10.0,9.964271,8.768007,11.454213,0.665529,0.648602,0.648343,0.648974,0.000331,...,0.0,0.0,0.000000,0.000000,0.000000,0.000000,31.960054,31.960054,31.960054,0.000000
5890,86.0,100.0,9.812893,9.772881,9.862587,0.017633,0.024392,0.024081,0.024696,0.000211,...,28966.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5891,199.0,100.0,9.831144,9.807347,9.874450,0.013485,0.481212,0.481209,0.481216,0.000002,...,6803.0,0.0,0.000000,0.000000,0.000000,0.000000,1.090415,1.090415,1.090415,0.000000


In [7]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3, random_state=0, stratify=y)

In [8]:

tree_classifiers = {
  "Decision Tree": DecisionTreeClassifier(),
  "Extra Trees": ExtraTreesClassifier(n_estimators=100),
  "Random Forest": RandomForestClassifier(n_estimators=100),
  "AdaBoost": AdaBoostClassifier(n_estimators=100),
  "Skl GBM": GradientBoostingClassifier(n_estimators=100),
  "Skl HistGBM": HistGradientBoostingClassifier(max_iter=100),
  "XGBoost": XGBClassifier(n_estimators=100),
  "LightGBM": LGBMClassifier(n_estimators=100),
  "CatBoost":  CatBoostClassifier(n_estimators=100),
}

In [9]:
tree_classifiers = {name: make_pipeline(model) for name, model in tree_classifiers.items()}

results = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})

In [10]:
rang = abs(y_train.max()) + abs(y_train.min())
for model_name, model in tree_classifiers.items():
    
    start_time = time.time()
    model.fit(x_train, y_train)
    total_time = time.time() - start_time
        
    pred = model.predict(x_test)
    
    results = results.append({"Model":    model_name,
                              "Accuracy": accuracy_score(y_test, pred)*100,
                              "Bal Acc.": balanced_accuracy_score(y_test, pred)*100,
                              "Time":     total_time},
                              ignore_index=True)
                              
                              
results_ord = results.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_ord.index += 1 
results_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')



Learning rate set to 0.5
0:	learn: 1.0807955	total: 229ms	remaining: 22.7s
1:	learn: 0.8046587	total: 276ms	remaining: 13.5s
2:	learn: 0.6443487	total: 319ms	remaining: 10.3s
3:	learn: 0.5168570	total: 360ms	remaining: 8.63s
4:	learn: 0.4454898	total: 397ms	remaining: 7.55s
5:	learn: 0.3661148	total: 435ms	remaining: 6.81s
6:	learn: 0.3281374	total: 473ms	remaining: 6.28s
7:	learn: 0.3014214	total: 513ms	remaining: 5.9s
8:	learn: 0.2837250	total: 554ms	remaining: 5.6s
9:	learn: 0.2587014	total: 593ms	remaining: 5.34s
10:	learn: 0.2320204	total: 631ms	remaining: 5.11s
11:	learn: 0.2163061	total: 671ms	remaining: 4.92s
12:	learn: 0.2029830	total: 711ms	remaining: 4.75s
13:	learn: 0.1887159	total: 751ms	remaining: 4.61s
14:	learn: 0.1790528	total: 796ms	remaining: 4.51s
15:	learn: 0.1661208	total: 833ms	remaining: 4.37s
16:	learn: 0.1519258	total: 870ms	remaining: 4.25s
17:	learn: 0.1469034	total: 910ms	remaining: 4.14s
18:	learn: 0.1363169	total: 950ms	remaining: 4.05s
19:	learn: 0.13441

Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,Extra Trees,99.038462,99.038588,0.418882
2,LightGBM,98.9819,98.982411,1.445136
3,Skl HistGBM,98.925339,98.925753,7.547784
4,XGBoost,98.699095,98.699925,3.070788
5,Random Forest,98.585973,98.58693,1.29753
6,CatBoost,98.529412,98.529953,4.302128
7,Skl GBM,98.246606,98.247627,22.385174
8,Decision Tree,94.400452,94.401658,0.188529
9,AdaBoost,86.934389,86.938269,2.105337


In [11]:
results_ord

Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,Extra Trees,99.038462,99.038588,0.418882
2,LightGBM,98.9819,98.982411,1.445136
3,Skl HistGBM,98.925339,98.925753,7.547784
4,XGBoost,98.699095,98.699925,3.070788
5,Random Forest,98.585973,98.58693,1.29753
6,CatBoost,98.529412,98.529953,4.302128
7,Skl GBM,98.246606,98.247627,22.385174
8,Decision Tree,94.400452,94.401658,0.188529
9,AdaBoost,86.934389,86.938269,2.105337


In [12]:
df = pd.read_csv(r'C:\Users\rnr31\Documents\GitHub\apple_watch_tracker\dataset_0.5sec.csv')
df

Unnamed: 0.1,Unnamed: 0,id,time,activityrecognition#0,activityrecognition#1,android.sensor.accelerometer#mean,android.sensor.accelerometer#min,android.sensor.accelerometer#max,android.sensor.accelerometer#std,android.sensor.game_rotation_vector#mean,...,sound#mean,sound#min,sound#max,sound#std,speed#mean,speed#min,speed#max,speed#std,target,user
0,0,156661,3.0,,62.0,10.053802,4.328630,14.419712,2.359507,0.629076,...,,,,,,,,,Bus,U12
1,1,156662,4.0,,62.0,10.687402,6.373194,16.329818,2.912315,0.577839,...,,,,,,,,,Bus,U12
2,2,156663,5.0,,62.0,11.816800,4.817410,16.854629,3.199759,0.651511,...,,,,,,,,,Bus,U12
3,3,156664,6.0,,62.0,9.401312,6.124588,12.230792,1.572354,0.713226,...,,,,,,,,,Bus,U12
4,4,156665,7.0,,62.0,10.389851,5.313790,38.533453,5.426978,0.707375,...,,,,,,,,,Bus,U12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62580,62580,82695,4066.0,,51.0,10.105329,10.089159,10.135641,0.026270,0.834765,...,89.761985,89.761985,89.761985,,0.00,0.00,0.00,,Walking,U1
62581,62581,77608,604.0,,100.0,9.544118,9.544118,9.544118,14.491194,0.965309,...,89.747010,89.747010,89.747010,,1.50,1.50,1.50,,Walking,U1
62582,62582,85219,1341.0,,100.0,14.258467,14.258467,14.258467,0.829544,0.773370,...,89.729460,89.729460,89.729460,,0.00,0.00,0.00,,Walking,U1
62583,62583,69368,1186.0,,90.0,10.726491,10.726491,10.726491,7.089701,0.789832,...,89.778343,89.778343,89.778343,,1.00,1.00,1.00,,Walking,U1


In [13]:
df.drop(['Unnamed: 0','id', 'activityrecognition#0','user'],axis=1, inplace=True)
data_col_new = ['time','activityrecognition_1']
for i in df.columns[2:58]:
    b = i.split('.')[2].split('#')
    data_col_new.append(f'{b[0]}_{b[1]}')

for i in df.columns[58:66]:
    b = i.split('#')
    data_col_new.append(f'{b[0]}_{b[1]}')

data_col_new.append('target')
df = pd.DataFrame.from_records(df.values)
df.columns = data_col_new

ct = asarray(df['target'])

df['target'] = oe.fit_transform(ct.reshape(-1,1))

In [14]:
x_new = data.drop(['target'], axis=1)
y_new = data['target']

x_new = x_new.fillna(0)

x_train_check, x_test_check, y_train_check, y_test_check = train_test_split(x_new, y_new, random_state=0, test_size=0.5, stratify=y_new) 



In [15]:
rang = abs(y_train_check.max()) + abs(y_train_check.min())
results_train_check = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})
for model_name, model in tree_classifiers.items():
    
    start_time = time.time()
    model.fit(x_train, y_train)
    total_time = time.time() - start_time
        
    pred = model.predict(x_train_check)
    
    results_train_check = results_train_check.append({"Model":    model_name,
                            "Accuracy": accuracy_score(y_train_check, pred)*100,
                            "Bal Acc.": balanced_accuracy_score(y_train_check, pred)*100,
                            "Time":     total_time},
                            ignore_index=True)
    

results_train_check_ord = results_train_check.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_train_check_ord.index += 1 
results_train_check_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')



Learning rate set to 0.5
0:	learn: 1.0807955	total: 41ms	remaining: 4.06s
1:	learn: 0.8046587	total: 82.6ms	remaining: 4.05s
2:	learn: 0.6443487	total: 120ms	remaining: 3.88s
3:	learn: 0.5168570	total: 159ms	remaining: 3.82s
4:	learn: 0.4454898	total: 195ms	remaining: 3.7s
5:	learn: 0.3661148	total: 234ms	remaining: 3.66s
6:	learn: 0.3281374	total: 274ms	remaining: 3.64s
7:	learn: 0.3014214	total: 314ms	remaining: 3.61s
8:	learn: 0.2837250	total: 352ms	remaining: 3.56s
9:	learn: 0.2587014	total: 393ms	remaining: 3.54s
10:	learn: 0.2320204	total: 433ms	remaining: 3.5s
11:	learn: 0.2163061	total: 473ms	remaining: 3.47s
12:	learn: 0.2029830	total: 516ms	remaining: 3.45s
13:	learn: 0.1887159	total: 553ms	remaining: 3.4s
14:	learn: 0.1790528	total: 598ms	remaining: 3.39s
15:	learn: 0.1661208	total: 637ms	remaining: 3.35s
16:	learn: 0.1519258	total: 676ms	remaining: 3.3s
17:	learn: 0.1469034	total: 715ms	remaining: 3.25s
18:	learn: 0.1363169	total: 762ms	remaining: 3.25s
19:	learn: 0.1344186

Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,Decision Tree,100.0,100.0,0.178446
2,Extra Trees,100.0,100.0,0.42959
3,Random Forest,100.0,100.0,1.280576
4,Skl HistGBM,100.0,100.0,7.423118
5,XGBoost,100.0,100.0,3.03688
6,LightGBM,100.0,100.0,1.44015
7,CatBoost,99.966056,99.966102,4.604455
8,Skl GBM,99.796334,99.796322,22.091629
9,AdaBoost,88.357094,88.361256,2.103487


In [16]:
results_train_check_ord

Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,Decision Tree,100.0,100.0,0.178446
2,Extra Trees,100.0,100.0,0.42959
3,Random Forest,100.0,100.0,1.280576
4,Skl HistGBM,100.0,100.0,7.423118
5,XGBoost,100.0,100.0,3.03688
6,LightGBM,100.0,100.0,1.44015
7,CatBoost,99.966056,99.966102,4.604455
8,Skl GBM,99.796334,99.796322,22.091629
9,AdaBoost,88.357094,88.361256,2.103487


In [17]:
rang = abs(y_test_check.max()) + abs(y_test_check.min())
results_test_check = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})
for model_name, model in tree_classifiers.items():
    
    start_time = time.time()
    model.fit(x_train, y_train)
    total_time = time.time() - start_time
        
    pred = model.predict(x_test_check)

    results_test_check = results_test_check.append({"Model":    model_name,
                        "Accuracy": accuracy_score(y_test_check, pred)*100,
                        "Bal Acc.": balanced_accuracy_score(y_test_check, pred)*100,
                        "Time":     total_time},
                        ignore_index=True)
    
    


results_test_check_ord = results_test_check.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_test_check_ord.index += 1 
results_test_check_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')



Learning rate set to 0.5
0:	learn: 1.0807955	total: 76.9ms	remaining: 7.61s
1:	learn: 0.8046587	total: 124ms	remaining: 6.07s
2:	learn: 0.6443487	total: 162ms	remaining: 5.24s
3:	learn: 0.5168570	total: 205ms	remaining: 4.92s
4:	learn: 0.4454898	total: 243ms	remaining: 4.62s
5:	learn: 0.3661148	total: 288ms	remaining: 4.52s
6:	learn: 0.3281374	total: 331ms	remaining: 4.39s
7:	learn: 0.3014214	total: 371ms	remaining: 4.27s
8:	learn: 0.2837250	total: 410ms	remaining: 4.15s
9:	learn: 0.2587014	total: 452ms	remaining: 4.07s
10:	learn: 0.2320204	total: 502ms	remaining: 4.06s
11:	learn: 0.2163061	total: 544ms	remaining: 3.99s
12:	learn: 0.2029830	total: 592ms	remaining: 3.96s
13:	learn: 0.1887159	total: 636ms	remaining: 3.91s
14:	learn: 0.1790528	total: 678ms	remaining: 3.84s
15:	learn: 0.1661208	total: 719ms	remaining: 3.77s
16:	learn: 0.1519258	total: 768ms	remaining: 3.75s
17:	learn: 0.1469034	total: 805ms	remaining: 3.67s
18:	learn: 0.1363169	total: 845ms	remaining: 3.6s
19:	learn: 0.134

Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,LightGBM,99.389209,99.389427,2.655284
2,Skl HistGBM,99.355277,99.355413,7.769192
3,Extra Trees,99.219545,99.219648,0.511667
4,XGBoost,99.219545,99.219878,3.435544
5,Random Forest,99.185612,99.186037,1.323494
6,CatBoost,99.117747,99.11801,5.565374
7,Skl GBM,98.91415,98.914678,22.547732
8,Decision Tree,96.74245,96.742709,0.291222
9,AdaBoost,87.275195,87.278918,2.127347


In [18]:
results_test_check_ord

Unnamed: 0,Model,Accuracy,Bal Acc.,Time
1,LightGBM,99.389209,99.389427,2.655284
2,Skl HistGBM,99.355277,99.355413,7.769192
3,Extra Trees,99.219545,99.219648,0.511667
4,XGBoost,99.219545,99.219878,3.435544
5,Random Forest,99.185612,99.186037,1.323494
6,CatBoost,99.117747,99.11801,5.565374
7,Skl GBM,98.91415,98.914678,22.547732
8,Decision Tree,96.74245,96.742709,0.291222
9,AdaBoost,87.275195,87.278918,2.127347
