# Synchro Project
- [github link](https://github.com/romainmartinez/envergo)

Todos
- tester Dyn en features ET label

## 0. Setup

In [1]:
# Common imports
import scipy.io as sio
import pandas as pd
import numpy as np
import os

# Figures
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

# to make this notebook's output stable across runs
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

## 1. Load data

In [2]:
# load from mat
# NOTES: nan previously removed
DATA_PATH = './data/'
X_mat = sio.loadmat(os.path.join(DATA_PATH, 'X.mat'))['TableauNorm']
y_mat = sio.loadmat(os.path.join(DATA_PATH, 'y.mat'))['TestDataN']

X_description = ['AddL', 'AddR', 'AbdL', 'AbdR', 'ErL', 'ErR', 'IrL', 'IrR', 'ExtL', 'ExtR', 'FlexL', 'FlexR', 'Dyn']
y_description = ['BodyBoost', 'MeanEggBeater', 'MaxEggBeater']

In [3]:
# dataframe
X = pd.DataFrame(X_mat, columns=X_description)
y = pd.DataFrame(y_mat, columns=y_description)

In [4]:
X.head(1)

Unnamed: 0,AddL,AddR,AbdL,AbdR,ErL,ErR,IrL,IrR,ExtL,ExtR,FlexL,FlexR,Dyn
0,3.342563,3.406646,3.047468,2.90981,1.216772,1.320411,0.866297,1.072785,2.091772,2.906646,2.734177,3.15269,0.134644


In [5]:
y.head(1)

Unnamed: 0,BodyBoost,MeanEggBeater,MaxEggBeater
0,8.75,7.333333,8.0


## 2. Classification

In [6]:
# Label Encoder
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder
y_cat = y.apply(LabelEncoder().fit_transform)

In [7]:
# split data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=RANDOM_SEED)

In [8]:
from sklearn.multioutput import MultiOutputClassifier
from xgboost import XGBClassifier

xgboost_multi_clf = MultiOutputClassifier(XGBClassifier())

xgboost_multi_clf.fit(X_train, y_train)



MultiOutputClassifier(estimator=XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=100, nthread=-1,
       objective='binary:logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1),
           n_jobs=1)

In [9]:
y_pred_clf = xgboost_multi_clf.predict(X_test)

In [10]:
# evaluate
from sklearn.metrics import classification_report

for i, output in enumerate(range(y_pred_clf.shape[1])):classification_report(y_test.iloc[:, i], y_pred_clf[:, i])
    print(y_description[i])
    print(classification_report(y_test.iloc[:, i], y_pred_clf[:, i]))
    print('-' * 10)

BodyBoost
             precision    recall  f1-score   support

          0       0.00      0.00      0.00         0
          1       0.00      0.00      0.00         1
          2       0.00      0.00      0.00         0
          3       0.00      0.00      0.00         1
          4       0.00      0.00      0.00         1
          5       0.00      0.00      0.00         2
          6       0.00      0.00      0.00         4
          7       0.00      0.00      0.00         2

avg / total       0.00      0.00      0.00        11

----------
MeanEggBeater
             precision    recall  f1-score   support

          2       0.00      0.00      0.00         2
          4       0.00      0.00      0.00         0
          5       0.00      0.00      0.00         1
          7       0.00      0.00      0.00         1
         10       0.00      0.00      0.00         0
         12       0.00      0.00      0.00         1
         13       0.00      0.00      0.00         0
       

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [11]:
y_pred_clf

array([[ 5,  4,  2],
       [ 2, 13,  4],
       [ 5, 19,  5],
       [ 1, 15,  7],
       [ 7, 15,  4],
       [ 5, 19,  5],
       [ 1, 10,  3],
       [ 0,  4,  4],
       [ 3, 19,  5],
       [ 7, 15,  4],
       [ 5,  4,  5]])

## 3. Regressor

In [13]:
# split data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

In [14]:
# train model
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor

xgboost_multi_reg = MultiOutputRegressor(XGBRegressor())

xgboost_multi_reg.fit(X_train, y_train)

MultiOutputRegressor(estimator=XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=100, nthread=-1,
       objective='reg:linear', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1),
           n_jobs=1)

In [15]:
# predict
y_pred_reg = xgboost_multi_reg.predict(X_test)

In [17]:
# evaluate
from sklearn.metrics import mean_squared_error

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

for i, output in enumerate(range(y_pred_reg.shape[1])):
    print(y_description[i])
    mse = mean_squared_error(y_test.iloc[:, i], y_pred_reg[:, i])
    rmse = np.sqrt(mse)
    print(f'rmse = {rmse:.3f}')
    mape = mean_absolute_percentage_error(y_test.iloc[:, i], y_pred_reg[:, i])
    print(f'mape = {mape:.3f}')
    print('-' * 10)

BodyBoost
rmse = 0.564
mape = 5.626
----------
MeanEggBeater
rmse = 0.818
mape = 8.808
----------
MaxEggBeater
rmse = 1.091
mape = 11.606
----------
