### Imports

In [1]:
import sys
sys.path.append("../ml/")

In [2]:
import os

import pandas as pd
from sklearn.neural_network import MLPClassifier

from preprocessing import splip_x_y, generate_train_test, scale_data
from model import Classifier
from metrics import classification_metrics, multiple_run_metrics

### Reading data

In [3]:
filename = 'diabetes.csv'
path = os.path.join('../data/', filename)
df = pd.read_csv(path)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### Single run of MLP model

In [4]:
nvar = df.shape[1] - 1
mlp = MLPClassifier(hidden_layer_sizes=(nvar,),
                      activation='tanh',
                      solver='lbfgs',
                      alpha=1.0,
                      verbose=False,
                      max_iter=1000)

mlp

MLPClassifier(activation='tanh', alpha=1.0, hidden_layer_sizes=(8,),
              max_iter=1000, solver='lbfgs')

In [5]:
classifier = Classifier(model=mlp)
classifier

<model.Classifier at 0x4fb6448>

In [6]:
result = classifier.single_run(df=df, target_col='Outcome', test_size=0.25, stratify=True)

In [7]:
print(result.keys())

dict_keys(['train', 'test'])


### Metrics

In [8]:
train_acc, train_prec, train_rec, train_fscore = classification_metrics(*result['train'])
test_acc, test_prec, test_rec, test_fscore = classification_metrics(*result['test'])

In [9]:
print('accuracy:', train_acc.round(2), test_acc.round(2))
print('precision:', train_prec.round(2), test_prec.round(2))
print('recall:', train_rec.round(2), test_rec.round(2))
print('fscore:', train_fscore.round(2), test_fscore.round(2))

accuracy: 0.84 0.73
precision: [0.86 0.8 ] [0.77 0.64]
recall: [0.9  0.74] [0.84 0.54]
fscore: [0.88 0.77] [0.8  0.58]


### Multiple run

In [10]:
result = classifier.multiple_runs(n=30, df=df, target_col='Outcome', test_size=0.25, stratify=True, scaler=None)

In [11]:
len(result), type(result), result.keys()

(2, dict, dict_keys(['train', 'test']))

In [12]:
train_res = multiple_run_metrics(result['train'])
print(train_res)

{'mean': accuracy                                     0.847467
precision                [0.8671666666666665, 0.8061]
recall       [0.9041333333333333, 0.7413666666666668]
fscore       [0.8852666666666666, 0.7723333333333333]
dtype: object, 'std': accuracy    0.014222
dtype: float64}


In [13]:
train_res.keys()

dict_keys(['mean', 'std'])

In [14]:
test_res = multiple_run_metrics(result['test'])
print(test_res)

{'mean': accuracy                                     0.737567
precision    [0.7860333333333333, 0.6378333333333333]
recall       [0.8213333333333335, 0.5809666666666667]
fscore       [0.8027666666666666, 0.6061333333333335]
dtype: object, 'std': accuracy    0.028003
dtype: float64}


In [21]:
NDIGITS = 2

In [26]:
pd.DataFrame(data = [
    (train_res['mean'].accuracy.round(NDIGITS), test_res['mean'].accuracy.round(NDIGITS)),
    (train_res['mean'].precision.round(NDIGITS), test_res['mean'].precision.round(NDIGITS)),
    (train_res['mean'].recall.round(NDIGITS), test_res['mean'].recall.round(NDIGITS)),
    (train_res['mean'].fscore.round(NDIGITS), test_res['mean'].fscore.round(NDIGITS)),
], columns=['train_mean', 'test_mean'], index=['accuracy', 'precision', 'recall', 'fscore'])

Unnamed: 0,train_mean,test_mean
accuracy,0.85,0.74
precision,"[0.87, 0.81]","[0.79, 0.64]"
recall,"[0.9, 0.74]","[0.82, 0.58]"
fscore,"[0.89, 0.77]","[0.8, 0.61]"
