In [19]:
# import libraries

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split


from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.metrics import accuracy_score # Accuracy metrics 

import pickle

In [4]:
# load data

coords = pd.read_csv("/work/img_coords.csv")
coords.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z541,v541,x542,y542,z542,v542,x543,y543,z543,v543
0,c0_safe,0.545086,0.168519,-0.356953,1.0,0.534952,0.129982,-0.338494,1.0,0.541106,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,c0_safe,0.451869,0.340804,-0.414619,1.0,0.433471,0.310055,-0.396678,1.0,0.435723,...,-0.010021,0.0,0.927008,0.3822,-0.007705,0.0,0.937594,0.383414,-0.003735,0.0
2,c0_safe,0.424279,0.346203,-0.320999,1.0,0.402844,0.316319,-0.321175,1.0,0.399517,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,c0_safe,0.335687,0.278907,-0.669487,1.0,0.320887,0.222015,-0.63829,1.0,0.334692,...,-0.05334,0.0,0.78792,0.217617,-0.065578,0.0,0.792619,0.21414,-0.07263,0.0
4,c0_safe,0.427172,0.244115,-0.251969,1.0,0.408947,0.205869,-0.227807,1.0,0.414374,...,0.000705,0.0,1.004587,0.404081,0.019041,0.0,0.994604,0.404329,0.023989,0.0


In [12]:
# split X and y

X = coords.drop('class', axis=1)
y = coords['class']
y.value_counts()

c0_safe               2489
c3_texting_left       2346
c4_phonecall_left     2326
c2_phonecall_right    2317
c1_texting_right      2267
c6_talking            2129
c5_behind             2002
Name: class, dtype: int64

In [14]:
# train_test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [16]:
# define pipelines

pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [17]:
# fit models

fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [18]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [20]:
# check accuracy

for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))



lr 0.7587654839386941
rc 0.7801805584715515
rf 0.8253201763594373
gb 0.8194415284484569


In [21]:
# save random forest model

with open('rf_baseline.pkl', 'wb') as f:
    pickle.dump(fit_models['rf'], f)



<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=7c555c04-0c9e-4bda-be05-56c77451a586' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>