In [17]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from math import sqrt
from sklearn.metrics import roc_auc_score, f1_score, classification_report, confusion_matrix
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error, classification_report
from keras.models import Sequential, Model 
from keras.layers import Dense, Input, concatenate, Activation, Dropout
from keras.optimizers import Adam
import tensorflow
from keras.utils import to_categorical
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from micromlgen import port

In [18]:
RANDOM_SEED = 42

In [19]:
np.random.seed(RANDOM_SEED)
tensorflow.random.set_seed(RANDOM_SEED)

In [20]:
with open('data/X_paper.pkl', 'rb') as f:
    X = pickle.load(f)

with open('data/y_paper.pkl', 'rb') as f:
    y = pickle.load(f)

In [21]:
X.shape

(1350, 5)

In [22]:
#X = np.concatenate((X[:250], X[750:1000], X[1500:]), axis=0)
#y = np.concatenate((y[:250], y[750:1000], y[1500:]), axis=0)

Split in train and test sets

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED)

In [24]:
#y_train = pd.get_dummies(y_train).values
#y_test = pd.get_dummies(y_test).values

In [25]:
y_train

array([6, 4, 3, ..., 5, 4, 5], dtype=int32)

In [26]:
X_train.shape[1]

5

## Models Evaluation

### Random Forest

In [27]:
model = RandomForestClassifier(n_estimators=50, random_state=42)
#model = MLPClassifier(random_state=42)

In [28]:
model.fit(X_train, y_train)

RandomForestClassifier(n_estimators=50, random_state=42)

In [29]:
y_pred = model.predict(X_test)

In [30]:
f1_score(y_test, y_pred, average="weighted")

0.7120090552468551

In [31]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.73      0.63      0.68        30
           1       0.64      0.95      0.77        40
           2       0.61      0.35      0.44        40
           3       0.82      0.70      0.76        40
           4       0.65      0.88      0.74        40
           5       0.71      0.55      0.62        40
           6       0.93      1.00      0.96        40

    accuracy                           0.73       270
   macro avg       0.73      0.72      0.71       270
weighted avg       0.73      0.73      0.71       270



In [32]:
print(confusion_matrix(y_test, y_pred))

[[19  3  5  0  2  1  0]
 [ 1 38  0  0  1  0  0]
 [ 6 15 14  0  5  0  0]
 [ 0  0  0 28  3  6  3]
 [ 0  1  2  0 35  2  0]
 [ 0  2  2  6  8 22  0]
 [ 0  0  0  0  0  0 40]]


# Export the model and save it on File

In [17]:
with open('rf_model_paper_3.h', 'w') as f:  # change path if needed
    f.write(port(model, optimize=False))

In [18]:
with open('models/rf_model_paper_3.pkl', 'wb') as f:
    pickle.dump(model, f)
