<div class="alert alert-block alert-success">
    <h1 style="margin-bottom:0.5cm"><b>Train decoder</b></h1>
    <p style="margin-bottom:0.25cm">This code is used to train a 'decoder' that can predict finger positions from electromyographic signals, based on angular finger position data and electromyographic data from specific forearm muscles. To do this, the usual steps of a "data analysis" process are followed and different alternatives are tested to build the model.
</div>

## Import all necessary packages

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from yellowbrick.classifier import confusion_matrix
from sklearn.metrics import classification_report
from yellowbrick.classifier import ROCAUC, PrecisionRecallCurve
from sklearn.preprocessing import LabelEncoder
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics

## Data preprocessing

[ToDo]
- Por ahora todo el preprocesado se hace en el software de Noraxon

## Load data

In [None]:
data = pd.read_csv("data/data16.csv")
data

### Rename column

In [None]:
data.rename(columns={'index': 'indice'}, inplace=True)

## Exploratory Data Analysis (EDA)

### Delete first and last rows "without data"

In [None]:
for i in range(0,1):
    data.drop(i, inplace=True)

In [None]:
for i in range(2686,2716):
    data.drop(i, inplace=True)

In [None]:
data

### Preprocessing for classification

In [None]:
data['thumb_bool'] = (data['thumb'] > data['thumb'].mean()).astype(int)
data['indice_bool'] = (data['indice'] > data['indice'].mean()).astype(int)
data['long_bool'] = (data['long'] > data['long'].mean()).astype(int)
data['ring_bool'] = (data['ring'] > data['ring'].mean()).astype(int)
data['small_bool'] = (data['small'] > data['small'].mean()).astype(int)

In [None]:
data

### Manage missing (NaN) values

In [None]:
print("NaN values in data: "+str(data.isnull().values.sum()))

### Check duplicated rows

In [None]:
#print("Duplicated rows in data: "+str(data.duplicated().sum()))
#data.drop_duplicates(inplace=True)
#data.duplicated().sum()

### Data information

In [None]:
data.shape

In [None]:
data.dtypes

In [None]:
data.info()

In [None]:
data.describe()

### Histograms

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, ncols=5, figsize=(30, 20))

data.thumb.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax1[0])
ax1[0].set_title('Distribution of Thumb angle', size=14)
ax1[0].set_xlabel('Thumb angle (º)', size=12)
ax1[0].set_ylabel('Frequency', size=12)

data.indice.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax1[1])
ax1[1].set_title('Distribution of Index angle', size=14)
ax1[1].set_xlabel('Index angle (º)', size=12)
ax1[1].set_ylabel('Frequency', size=12)

data.long.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax1[2])
ax1[2].set_title('Distribution of Long angle', size=14)
ax1[2].set_xlabel('Long angle (º)', size=12)
ax1[2].set_ylabel('Frequency', size=12)

data.ring.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax1[3])
ax1[3].set_title('Distribution of Ring angle', size=14)
ax1[3].set_xlabel('Ring angle (º)', size=12)
ax1[3].set_ylabel('Frequency', size=12)

data.small.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax1[4])
ax1[3].set_title('Distribution of Small angle', size=14)
ax1[3].set_xlabel('Small angle (º)', size=12)
ax1[3].set_ylabel('Frequency', size=12)

data.thumb_bool.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax2[0])
ax2[0].set_title('Distribution of Thumb position', size=14)
ax2[0].set_xlabel('Thumb position (0-closed/1-opened)', size=12)
ax2[0].set_ylabel('Frequency', size=12)

data.indice_bool.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax2[1])
ax2[1].set_title('Distribution of Index position', size=14)
ax2[1].set_xlabel('Index position (0-closed/1-opened)', size=12)
ax2[1].set_ylabel('Frequency', size=12)

data.long_bool.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax2[2])
ax2[2].set_title('Distribution of Long position', size=14)
ax2[2].set_xlabel('Long position (0-closed/1-opened)', size=12)
ax2[2].set_ylabel('Frequency', size=12)

data.ring_bool.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax2[3])
ax2[3].set_title('Distribution of Ring position', size=14)
ax2[3].set_xlabel('Ring position (0-closed/1-opened)', size=12)
ax2[3].set_ylabel('Frequency', size=12)

data.small_bool.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax2[4])
ax2[4].set_title('Distribution of Small position', size=14)
ax2[4].set_xlabel('Small position (0-closed/1-opened)', size=12)
ax2[4].set_ylabel('Frequency', size=12)

data.muscle1.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax3[0])
ax3[0].set_title('Distribution of Muscle1 activity', size=14)
ax3[0].set_xlabel('Muscle1 EMG (uV)', size=12)
ax3[0].set_ylabel('Frequency', size=12)

data.muscle2.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax3[1])
ax3[1].set_title('Distribution of Muscle2 activity', size=14)
ax3[1].set_xlabel('Muscle2 EMG (uV)', size=12)
ax3[1].set_ylabel('Frequency', size=12)

data.muscle3.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax3[2])
ax3[2].set_title('Distribution of Muscle3 angle', size=14)
ax3[2].set_xlabel('Muscle3 EMG (uV)', size=12)
ax3[2].set_ylabel('Frequency', size=12)

data.muscle4.plot(kind='hist', bins=15, color='lightsteelblue', edgecolor='black', ax=ax3[3])
ax3[3].set_title('Distribution of Muscle4 angle', size=14)
ax3[3].set_xlabel('Muscle4 EMG (uV)', size=12)
ax3[3].set_ylabel('Frequency', size=12)


plt.show()

### Pairplot

In [None]:
sns.pairplot(data[['muscle1','muscle2','muscle3','muscle4','indice_bool']], corner=True, grid_kws={'despine': False}, diag_kind=None, hue="indice_bool");

### Correlation matrix
Tips:
* Selecting only moderate to strong relations with target value
* Remove strongly correlated features

In [None]:
corrmat = data[['muscle1','muscle2','muscle3','muscle4']].corr(numeric_only=True)
sns.heatmap(corrmat, annot=True, square=True, cmap="coolwarm", center=0);

### Checking variance of all features
Set a threshold value and remove features accordingly (very low variance accross all samples means no predictive power)

In [None]:
data[['muscle1','muscle2','muscle3','muscle4']].var()

In [None]:
data[['muscle1','muscle2','muscle3','muscle4']].var() > 100

## Linear Regression

### Model Training

#### Train-test splitting

In [None]:
X = data[['muscle1','muscle2','muscle3','muscle4']]
y = data[['thumb','indice','long','ring','small']]

# Divide dataset into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

#### Feature sclaing (Standarization or Normalization)

In [None]:
# Standarization
'''
feature_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)
'''

# Normalization
feature_scaler = MinMaxScaler().fit(X_train)
#target_scaler = MinMaxScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)

#### Create model

In [None]:
lr_model = LinearRegression()

#### Train model

In [None]:
lr_model.fit(X_train, y_train)

In [None]:
lr_model.intercept_

In [None]:
lr_model.coef_

### Model Evaluation

#### Predict with the model

In [None]:
y_pred = lr_model.predict(X_test)
y_pred = pd.DataFrame(y_pred, columns=y_test.columns)

In [None]:
y_pred.to_csv("y_pred", index=False)

In [None]:
y_test.to_csv("y_test", index=False)

### Scores and Plotting function

In [None]:
def scores(y_test, y_pred):
    print("Model performance on test data\n",
          f"- Explained variance score: {metrics.explained_variance_score(y_test, y_pred):.4f}\n",
          f"- Max error:                {metrics.max_error(y_test, y_pred):.4f}\n",
          f"- Mean absolute error:      {metrics.mean_absolute_error(y_test, y_pred):.4f}\n",
          f"- Mean squared error:       {metrics.mean_squared_error(y_test, y_pred):.4f}\n",
          f"- Median absolute error:    {metrics.median_absolute_error(y_test, y_pred):.4f}\n",
          f"- R2 score:                 {metrics.r2_score(y_test, y_pred):.4f}\n",
          )

In [None]:
def plot_vector(y_values, color='b', label=None):
    x_values = range(len(y_values))
    plt.plot(x_values, y_values, color=color, label=label)
    plt.xlabel('Index')
    plt.ylabel('Y Values')
    plt.grid(True)
    plt.legend()

#### Evaluate All

In [None]:
print("Model performance on test data\n",
      f"- Explained variance score: {metrics.explained_variance_score(y_test, y_pred):.4f}\n",
      f"- Mean absolute error:      {metrics.mean_absolute_error(y_test, y_pred):.4f}\n",
      f"- Mean squared error:       {metrics.mean_squared_error(y_test, y_pred):.4f}\n",
      f"- Median absolute error:    {metrics.median_absolute_error(y_test, y_pred):.4f}\n",
      f"- R2 score:                 {metrics.r2_score(y_test, y_pred):.4f}\n",
      )

#### Evaluate Thumb

In [None]:
scores(y_test.thumb, y_pred.thumb)

In [None]:
plot_vector(y_test.thumb, color='r', label='Test')
plot_vector(y_pred.thumb, color='g', label='Pred')
plt.title('Thumb')
plt.show()

In [None]:
# Calculate the correlation coefficient
correlation_coefficient = np.corrcoef(y_test.thumb, y_pred.thumb)[0, 1]

print("Correlation coefficient: {:.2f}".format(correlation_coefficient))

#### Evaluate Index

In [None]:
scores(y_test.indice, y_pred.indice)

In [None]:
plot_vector(y_test.indice, color='r', label='Test')
plot_vector(y_pred.indice, color='g', label='Pred')
plt.title('Index')
plt.show()

In [None]:
# Calculate the correlation coefficient
correlation_coefficient = np.corrcoef(y_test.indice, y_pred.indice)[0, 1]

print("Correlation coefficient: {:.2f}".format(correlation_coefficient))

#### Evaluate Long

In [None]:
scores(y_test.long, y_pred.long)

In [None]:
plot_vector(y_test.long, color='r', label='Test')
plot_vector(y_pred.long, color='g', label='Pred')
plt.title('Long')
plt.show()

In [None]:
# Calculate the correlation coefficient
correlation_coefficient = np.corrcoef(y_test.long, y_pred.long)[0, 1]

print("Correlation coefficient: {:.2f}".format(correlation_coefficient))

#### Evaluate Ring

In [None]:
scores(y_test.ring, y_pred.ring)

In [None]:
plot_vector(y_test.ring, color='r', label='Test')
plot_vector(y_pred.ring, color='g', label='Pred')
plt.title('Ring')
plt.show()

In [None]:
# Calculate the correlation coefficient
correlation_coefficient = np.corrcoef(y_test.ring, y_pred.ring)[0, 1]

print("Correlation coefficient: {:.2f}".format(correlation_coefficient))

#### Evaluate Small

In [None]:
scores(y_test.small, y_pred.small)

In [None]:
plot_vector(y_test.small, color='r', label='Test')
plot_vector(y_pred.small, color='g', label='Pred')
plt.title('Small')
plt.show()

In [None]:
# Calculate the correlation coefficient
correlation_coefficient = np.corrcoef(y_test.small, y_pred.small)[0, 1]

print("Correlation coefficient: {:.2f}".format(correlation_coefficient))

### Save model

In [None]:
joblib.dump(lr_model, 'model/lr_model.pkl')

"Re-train with all data" & save:

In [None]:
lr_model_full = LinearRegression()
lr_model_full.fit(X, y)
joblib.dump(lr_model_full, 'model/lr_model_full.pkl')

## LDA

### Evaluate function

In [None]:
def evaluate_classifier_extended(model, X_train,X_test,y_train,y_test):

    y_pred = model.predict(X_test)
    
    # Average scores
    print("Model performance on test data\n",
          f"- Accuracy:  {metrics.accuracy_score(y_test, y_pred):.4f}\n",
          f"- Precision: {metrics.precision_score(y_test, y_pred, average='weighted'):.4f}\n",
          f"- Recall:    {metrics.recall_score(y_test, y_pred, average='weighted'):.4f}\n",
          f"- F1 score:  {metrics.f1_score(y_test, y_pred, average='weighted'):.4f}\n",
          )
    # Scores for each class
    print("Model precision on test data for each class: "
          "{:.4f} {:.4f}".format(*metrics.precision_score(y_test, y_pred, average=None)))
    
    # Confussion matrix
    cm = confusion_matrix(model,
                          X_train,
                          y_train,
                          X_test,
                          y_test,
                          is_fitted=True,
                          support=True,
                          cmap='RdYlGn')
    
    # Report
    print(classification_report(y_pred, y_test))
    
    # ROC and precision-recall curves
        # ROC Curves
    roc_viz = ROCAUC(model,
                     is_fitted=True,
                     per_class=True,
                     micro=False,
                     macro=False,
                     size=(900, 500))
    roc_viz.fit(X_train, y_train)
    roc_viz.score(X_test, y_test)
    roc_viz.show();
        # Precision-Recall Curve
    encoder = LabelEncoder()
    encoded_y_train = encoder.fit_transform(y_train)
    pr_viz = PrecisionRecallCurve(model,
                                  classes=encoder.classes_,
                                  iso_f1_curves=True,
                                  per_class=True,
                                  micro=False,
                                  size=(900, 500))
    pr_viz.fit(X_train, y_train)
    pr_viz.score(X_test, y_test)
    pr_viz.show()
    
    # Decision Boundaries
    selected_features = X_train.columns[[0,2]]
    
    model = model.fit(np.array(X_train[X_train.columns[0:2]]), y_train)

    N = 300
    X = np.linspace(np.floor(min(X_train[selected_features[0]])) - 0.2,
                    np.ceil(max(X_train[selected_features[0]]))+0.2,
                    N)
    Y = np.linspace(np.floor(min(X_train[selected_features[1]])) - 0.2,
                    np.ceil(max(X_train[selected_features[1]]))+0.2,
                    N)
    X, Y = np.meshgrid(X, Y)
    
    X_train_help = X_train.copy()
    
    y_train_boolean = [False if value==0 else True for value in y_train]
    
    X_train_help["IMP_GT5"] = y_train_boolean

    g = sns.FacetGrid(X_train_help,
                      hue="IMP_GT5",
                      height=5).map(plt.scatter,
                                    selected_features[0],
                                    selected_features[1]).add_legend()
    zz = np.array([model.predict(np.array([[xx, yy]]))
                  for xx, yy in zip(np.ravel(X), np.ravel(Y))])
    Z = zz.reshape(X.shape)

    # Plot the filled and boundary contours
    g.ax.contourf(X, Y, Z, 2, alpha=.1, colors=('blue', 'orange', 'green'))
    g.ax.contour(X, Y, Z, 2, alpha=1, colors=('blue', 'orange', 'green'))

    # Add axis and title
    g.ax.set_xlabel(selected_features[0])
    g.ax.set_ylabel(selected_features[1])
    g.ax.set_title('Decision Boundaries with Train Data Scaled')
    g.fig.set_figheight(8)
    g.fig.set_figwidth(12)
    
    return model

### Thumb

#### Train-test splitting

In [None]:
X = data[['muscle1','muscle2','muscle3','muscle4']]
y = data['thumb_bool']

# Divide dataset into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

#### Feature scaling (Standarization or Normalization)

In [None]:
# Standarization
'''
feature_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)
'''

# Normalization
feature_scaler = MinMaxScaler().fit(X_train)
#target_scaler = MinMaxScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)

#### Create model

In [None]:
lda_thumb = LinearDiscriminantAnalysis()

#### Train model

In [None]:
y_pred = lda_thumb.fit(X_train, np.ravel(y_train)).predict(X_test)

#### Evaluate model

In [None]:
evaluate_classifier_extended(lda_thumb, X_train,X_test,y_train,y_test)

### Index

#### Train-test splitting

In [None]:
X = data[['muscle1','muscle2','muscle3','muscle4']]
y = data['indice_bool']

# Divide dataset into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

#### Feature scaling (Standarization or Normalization)

In [None]:
# Standarization
'''
feature_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)
'''

# Normalization
feature_scaler = MinMaxScaler().fit(X_train)
#target_scaler = MinMaxScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)

#### Create model

In [None]:
lda_index = LinearDiscriminantAnalysis()

#### Train model

In [None]:
y_pred = lda_index.fit(X_train, np.ravel(y_train)).predict(X_test)

#### Evaluate model

In [None]:
evaluate_classifier_extended(lda_index, X_train,X_test,y_train,y_test)

### Long

#### Train-test splitting

In [None]:
X = data[['muscle1','muscle2','muscle3','muscle4']]
y = data['long_bool']

# Divide dataset into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

#### Feature scaling (Standarization or Normalization)

In [None]:
# Standarization
'''
feature_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)
'''

# Normalization
feature_scaler = MinMaxScaler().fit(X_train)
#target_scaler = MinMaxScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)

#### Create model

In [None]:
lda_long = LinearDiscriminantAnalysis()

#### Train model

In [None]:
y_pred = lda_long.fit(X_train, np.ravel(y_train)).predict(X_test)

#### Evaluate model

In [None]:
evaluate_classifier_extended(lda_long, X_train,X_test,y_train,y_test)

### Ring

#### Train-test splitting

In [None]:
X = data[['muscle1','muscle2','muscle3','muscle4']]
y = data['ring_bool']

# Divide dataset into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

#### Feature scaling (Standarization or Normalization)

In [None]:
# Standarization
'''
feature_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)
'''

# Normalization
feature_scaler = MinMaxScaler().fit(X_train)
#target_scaler = MinMaxScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)

#### Create model

In [None]:
lda_ring = LinearDiscriminantAnalysis()

#### Train model

In [None]:
y_pred = lda_ring.fit(X_train, np.ravel(y_train)).predict(X_test)

#### Evaluate model

In [None]:
evaluate_classifier_extended(lda_ring, X_train,X_test,y_train,y_test)

### Small

#### Train-test splitting

In [None]:
X = data[['muscle1','muscle2','muscle3','muscle4']]
y = data['small_bool']

# Divide dataset into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

#### Feature scaling (Standarization or Normalization)

In [None]:
# Standarization
'''
feature_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)
'''

# Normalization
feature_scaler = MinMaxScaler().fit(X_train)
#target_scaler = MinMaxScaler().fit(y_train)

scaled_X_train = feature_scaler.transform(X_train)
scaled_X_test = feature_scaler.transform(X_test)

#scaled_y_train = target_scaler.transform(y_train)
#scaled_y_test = target_scaler.transform(y_test)

#### Create model

In [None]:
lda_small = LinearDiscriminantAnalysis()

#### Train model

In [None]:
y_pred = lda_small.fit(X_train, np.ravel(y_train)).predict(X_test)

#### Evaluate model

In [None]:
evaluate_classifier_extended(lda_small, X_train,X_test,y_train,y_test)

### Save models

In [None]:
joblib.dump(lda_thumb, 'model/lda_thumb.pkl')
joblib.dump(lda_index, 'model/lda_index.pkl')
joblib.dump(lda_long, 'model/lda_long.pkl')
joblib.dump(lda_ring, 'model/lda_ring.pkl')
joblib.dump(lda_small, 'model/lda_small.pkl')

### "Re-train with all data" & save

In [None]:
#fullmodel = LinearRegression()
#fullmodel.fit(X, y)
#joblib.dump(fullmodel, 'model/fullmodel.pkl')

## EXTRA: Load model

In [None]:
#loaded_model = joblib.load('model/model.pkl')