In [None]:
import time
from IPython.display import clear_output
import numpy    as np
import pandas   as pd
import seaborn  as sns
import matplotlib.pyplot as plt
import sklearn  as skl
from numpy import asarray

from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline      
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, balanced_accuracy_score, plot_confusion_matrix
from sklearn import set_config

set_config(display='diagram') # Useful for display the pipeline
from sklearn.tree          import DecisionTreeClassifier
from sklearn.ensemble      import RandomForestClassifier
from sklearn.ensemble      import ExtraTreesClassifier
from sklearn.ensemble      import AdaBoostClassifier
from sklearn.ensemble      import GradientBoostingClassifier
from sklearn.experimental  import enable_hist_gradient_boosting # Necesary for HistGradientBoostingClassifier
from sklearn.ensemble      import HistGradientBoostingClassifier
from xgboost               import XGBClassifier
from lightgbm              import LGBMClassifier
from catboost              import CatBoostClassifier
from sklearn.svm           import SVC

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [None]:
# Getting the dataset
data = pd.read_csv(r'C:\Users\rnr31\Documents\GitHub\apple_watch_tracker\dataset_5sec.csv')
data

In [None]:
data.drop(['Unnamed: 0','id', 'activityrecognition#0','user'],axis=1, inplace=True)
data_col = ['time','activityrecognition_1']
for i in data.columns[2:58]:
    b = i.split('.')[2].split('#')
    data_col.append(f'{b[0]}_{b[1]}')

for i in data.columns[58:66]:
    b = i.split('#')
    data_col.append(f'{b[0]}_{b[1]}')

data_col.append('target')
data = pd.DataFrame.from_records(data.values)
data.columns = data_col

data.head()


In [None]:

data['target']= data['target'].apply({'Bus':0, 'car':1, 'Still':2, 'Train':3, 'Walking':4}.get)
data.head()

In [None]:
x = data.drop(['target'], axis=1)
y = data['target']


In [None]:
x= x.fillna(0)
x

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3, random_state=0, stratify=y)

In [None]:

tree_classifiers = {
  "Decision Tree": DecisionTreeClassifier(),
  "Extra Trees": ExtraTreesClassifier(n_estimators=100),
  "Random Forest": RandomForestClassifier(n_estimators=100),
  "AdaBoost": AdaBoostClassifier(n_estimators=100),
  "Skl GBM": GradientBoostingClassifier(n_estimators=100),
  "Skl HistGBM": HistGradientBoostingClassifier(max_iter=100),
  "XGBoost": XGBClassifier(n_estimators=100),
  "LightGBM": LGBMClassifier(n_estimators=100),
  "CatBoost":  CatBoostClassifier(n_estimators=100),
}

In [None]:
tree_classifiers = {name: make_pipeline(model) for name, model in tree_classifiers.items()}

results = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})

In [None]:
rang = abs(y_train.max()) + abs(y_train.min())
for model_name, model in tree_classifiers.items():
    
    start_time = time.time()
    model.fit(x_train, y_train)
    total_time = time.time() - start_time
        
    pred = model.predict(x_test)
    
    results = results.append({"Model":    model_name,
                              "Accuracy": accuracy_score(y_test, pred)*100,
                              "Bal Acc.": balanced_accuracy_score(y_test, pred)*100,
                              "Time":     total_time},
                              ignore_index=True)
                              
                              
results_ord = results.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_ord.index += 1 
results_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')

In [None]:
results_ord

In [None]:
df = pd.read_csv(r'C:\Users\rnr31\Documents\GitHub\apple_watch_tracker\dataset_0.5sec.csv')
df

In [None]:
df.drop(['Unnamed: 0','id', 'activityrecognition#0','user'],axis=1, inplace=True)
data_col_new = ['time','activityrecognition_1']
for i in df.columns[2:58]:
    b = i.split('.')[2].split('#')
    data_col_new.append(f'{b[0]}_{b[1]}')

for i in df.columns[58:66]:
    b = i.split('#')
    data_col_new.append(f'{b[0]}_{b[1]}')

data_col_new.append('target')
df = pd.DataFrame.from_records(df.values)
df.columns = data_col_new

ct = asarray(df['target'])

df['target'] = oe.fit_transform(ct.reshape(-1,1))

In [None]:
x_new = data.drop(['target'], axis=1)
y_new = data['target']

x_new = x_new.fillna(0)

x_train_check, x_test_check, y_train_check, y_test_check = train_test_split(x_new, y_new, random_state=0, test_size=0.5, stratify=y_new) 



In [None]:
rang = abs(y_train_check.max()) + abs(y_train_check.min())
results_train_check = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})
for model_name, model in tree_classifiers.items():
    
    start_time = time.time()
    model.fit(x_train, y_train)
    total_time = time.time() - start_time
        
    pred = model.predict(x_train_check)
    
    results_train_check = results_train_check.append({"Model":    model_name,
                            "Accuracy": accuracy_score(y_train_check, pred)*100,
                            "Bal Acc.": balanced_accuracy_score(y_train_check, pred)*100,
                            "Time":     total_time},
                            ignore_index=True)
    

results_train_check_ord = results_train_check.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_train_check_ord.index += 1 
results_train_check_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')

In [None]:
results_train_check_ord

In [None]:
rang = abs(y_test_check.max()) + abs(y_test_check.min())
results_test_check = pd.DataFrame({'Model': [], 'Accuracy': [], 'Bal Acc.': [], 'Time': []})
for model_name, model in tree_classifiers.items():
    
    start_time = time.time()
    model.fit(x_train, y_train)
    total_time = time.time() - start_time
        
    pred = model.predict(x_test_check)

    results_test_check = results_test_check.append({"Model":    model_name,
                        "Accuracy": accuracy_score(y_test_check, pred)*100,
                        "Bal Acc.": balanced_accuracy_score(y_test_check, pred)*100,
                        "Time":     total_time},
                        ignore_index=True)
    
    


results_test_check_ord = results_test_check.sort_values(by=['Accuracy'], ascending=False, ignore_index=True)
results_test_check_ord.index += 1 
results_test_check_ord.style.bar(subset=['Accuracy', 'Bal Acc.'], vmin=0, vmax=100, color='#5fba7d')

In [None]:
results_test_check_ord