# Import Libraries

In [24]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from math import sqrt
from sklearn.metrics import roc_auc_score, f1_score, classification_report, confusion_matrix, accuracy_score
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error, classification_report
from keras.models import Sequential, Model 
from keras.layers import Dense, Input, concatenate, Activation, Dropout
from keras.optimizers import Adam
import tensorflow
from keras.utils import to_categorical
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from micromlgen import port
import m2cgen as m2c

In [25]:
RANDOM_SEED = 42

In [26]:
np.random.seed(RANDOM_SEED)
tensorflow.set_random_seed(RANDOM_SEED)

# Load Data

In [27]:
with open('../data/X_paper.pkl', 'rb') as f:
    X = pickle.load(f)

with open('../data/y_paper.pkl', 'rb') as f:
    y = pickle.load(f)

In [28]:
X = np.concatenate((X[:250], X[750:1000], X[1500:]), axis=0)
y = np.concatenate((y[:250], y[750:1000], y[1500:]), axis=0)

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED)

# Train Model

In [30]:
model = XGBClassifier()
model.fit(X_train, y_train)






XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=None)

In [31]:
pred_train = model.predict(X_train)
pred_test = model.predict(X_test)


In [32]:
acc_train = accuracy_score(y_train, pred_train)
acc_test = accuracy_score(y_test, pred_test)

In [33]:
print(f'Train: {acc_train}, Test: {acc_test}')

Train: 0.83, Test: 0.82


In [34]:
f1_score(y_test, pred_test, average="weighted")

0.8011049060229389

In [35]:
print(classification_report(y_test, pred_test))

              precision    recall  f1-score   support

           0       0.87      0.90      0.89        30
           1       0.86      0.95      0.90        20
           4       0.77      0.85      0.81        40
           5       0.67      0.20      0.31        10

    accuracy                           0.82       100
   macro avg       0.79      0.73      0.73       100
weighted avg       0.81      0.82      0.80       100



In [36]:
print(confusion_matrix(y_test, pred_test))

[[27  1  2  0]
 [ 0 19  1  0]
 [ 3  2 34  1]
 [ 1  0  7  2]]


# Export Model

In [38]:
with open('xgb_model_paper.h', 'w') as f:  # change path if needed
    f.write(m2c.export_to_c(model))