# YMCA Pose Model Training

Train a multinomial LogisticRegression algorithm on the collected data.

In [1]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import make_pipeline

import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_data(file_name):
    """
    read training.csv and return the X,y as series
    :return: X - the data representing the road view
             y - what turn value
    """
    df = pd.read_csv(f'data/{file_name}', header=None)
#     print(df.head())
    X = df.loc[:, 1:]
    y = df.loc[:, 0]
    # print(X.shape)
    # print(y.shape)
    classes = []
    if y.dtype == object:
        # then we need to labelbinarize it
        le = LabelEncoder()
        y_notused = le.fit_transform(y)
        classes = le.classes_

    return X, y, classes


In [3]:
X, y, classes = get_data("ymca_training.csv")

In [4]:
X

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,0.561728,0.771587,-0.162252,0.436373,0.765288,-0.192959,0.597814,0.907482,-0.184370,0.379148,0.898178,-0.220386,0.626990,1.002246,-0.325643,0.382508,1.036272,-0.322547
1,0.551079,0.761984,-0.039519,0.432480,0.753380,-0.034821,0.626731,0.830323,-0.214382,0.371372,0.817650,-0.230957,0.649688,0.800309,-0.455547,0.343538,0.817020,-0.526281
2,0.544549,0.760656,-0.034900,0.428598,0.756397,-0.028312,0.630615,0.749785,-0.210157,0.350203,0.784802,-0.218243,0.644441,0.682965,-0.422997,0.334618,0.700532,-0.436179
3,0.537734,0.753881,0.045089,0.425675,0.749667,0.044266,0.622343,0.720850,-0.056557,0.348894,0.736559,-0.049486,0.644950,0.618357,-0.227115,0.328643,0.628556,-0.184775
4,0.530375,0.736013,-0.238045,0.426229,0.739088,-0.164763,0.599038,0.680531,-0.326870,0.348011,0.710933,-0.169867,0.629410,0.550343,-0.439407,0.329981,0.583502,-0.280747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2730,0.596576,0.418279,-0.101931,0.417590,0.420099,-0.076686,0.730446,0.392611,-0.263164,0.285284,0.425948,-0.254256,0.624012,0.262082,-0.313953,0.372351,0.303846,-0.262556
2731,0.599782,0.417877,-0.101922,0.418046,0.418203,-0.088106,0.735210,0.402261,-0.248099,0.287979,0.421756,-0.255778,0.626346,0.268012,-0.276362,0.375217,0.298596,-0.236444
2732,0.604329,0.410070,-0.099348,0.422566,0.414207,-0.081391,0.740079,0.401420,-0.224092,0.293555,0.405108,-0.224094,0.638126,0.268476,-0.232376,0.379879,0.281148,-0.135601
2733,0.605879,0.407112,-0.112233,0.423121,0.412330,-0.097919,0.744672,0.397736,-0.258928,0.294266,0.400259,-0.278894,0.648069,0.268211,-0.288286,0.382379,0.276100,-0.246851


In [5]:
y

0       Y
1       Y
2       Y
3       Y
4       Y
       ..
2730    M
2731    M
2732    M
2733    M
2734    M
Name: 0, Length: 2735, dtype: object

In [6]:
classes

array(['A', 'C', 'M', 'Y', 'dance'], dtype=object)

## Create Model Pipeline

In [7]:
logreg = LogisticRegression(multi_class='multinomial')

In [8]:
pipeline = make_pipeline(StandardScaler(), logreg)

In [9]:
param_grid = dict(logisticregression__penalty=['l2'], 
                  logisticregression__C=[10, 1, 0.1, 0.01], 
                  logisticregression__solver=['newton-cg', 'sag', 'lbfgs'],
                   logisticregression__max_iter=[100, 200, 300]),


In [10]:
grid = GridSearchCV(pipeline, param_grid, cv=5)
grid.fit(X, y)


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('logisticregression',
                                        LogisticRegression(multi_class='multinomial'))]),
             param_grid=({'logisticregression__C': [10, 1, 0.1, 0.01],
                          'logisticregression__max_iter': [100, 200, 300],
                          'logisticregression__penalty': ['l2'],
                          'logisticregression__solver': ['newton-cg', 'sag',
                                                         'lbfgs']},))

In [11]:
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)
best_model = grid.best_estimator_
best_params = grid.best_params_
best_score = grid.best_score_



0.9875685557586837
{'logisticregression__C': 10, 'logisticregression__max_iter': 100, 'logisticregression__penalty': 'l2', 'logisticregression__solver': 'newton-cg'}
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('logisticregression',
                 LogisticRegression(C=10, multi_class='multinomial',
                                    solver='newton-cg'))])


## Model Prediction

Yes - I realize I am using data from the training to do the prediction.  This section is for demo purposes only

In [12]:
X.shape

(2735, 18)

In [13]:
import random
index = random.randint(0, X.shape[0])
x_new = X.iloc[index]
y_actual = y.iloc[index]
print(f"Actual Label: {y_actual}")
pred = best_model.predict([x_new])
print(f"Pred Label: {pred[0]}")

Actual Label: A
Pred Label: A
