# Classification: Multi-layer Perceptron Classifier

In [None]:
#IMPORT DATA
import pandas as pd
data = pd.read_csv('diabetes.csv')

In [None]:
data.head()

In [None]:
#Print class freq. through pandas 
print(data.groupby('target').size())

#some imports to plot 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')

#Visualize Class Counts
sns.countplot(y=data.target ,data=data)
plt.xlabel("count of each class")
plt.ylabel("classes")
plt.show()

In [None]:
#gives information about the data types,columns, null value counts, memory usage etc
data.info(verbose=True)

In [None]:
#basic statistic details about the data
data.describe(include="all")

In [None]:
#Separate X and y (explanatory variables and target variable)
X = data.iloc[:,0:-1]
column_names = list(X) 
y = data.iloc[:,-1] 

#X.head()
#y.head()

In [None]:
from sklearn.model_selection import train_test_split

#SPLIT DATA INTO TRAIN AND TEST SET
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size =0.30, #by default is 75%-25%
                                                    #shuffle is set True by default,
                                                    stratify=y,
                                                    random_state= 123) #fix random seed for replicability

print(X_train.shape)

## Multi-layer Perceptron classifier
![MLPC](multilayerperceptron_network.png)


In [None]:
'''Multi-layer Perceptron classifier'''
from sklearn.neural_network import MLPClassifier

model=MLPClassifier(hidden_layer_sizes=(5, 2),
              activation='relu', alpha=0.01, batch_size='auto',              
              learning_rate='constant', learning_rate_init=0.001,
              max_iter=200, solver='lbfgs', tol=0.0001,
              validation_fraction=0.2, verbose=True)

# hidden_layer_sizes :  The ith element represents the number of neurons in the ith hidden layer, default (100,) . 
# activation : {‘identity’, ‘logistic’, ‘tanh’, ‘relu’}, default ‘relu’
    #Activation function for the hidden layer.
    #        ‘identity’, no-op activation, useful to implement linear bottleneck, returns f(x) = x
    #        ‘logistic’, the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)).
    #        ‘tanh’, the hyperbolic tan function, returns f(x) = tanh(x).
    #        ‘relu’, the rectified linear unit function, returns f(x) = max(0, x)
# solver : {‘lbfgs’, ‘sgd’, ‘adam’}, default ‘adam’
    #      ‘lbfgs’ is an optimizer in the family of quasi-Newton methods.
    #        ‘sgd’ refers to stochastic gradient descent.
    #        ‘adam’ refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba
# alpha : float, optional, default 0.0001
    #L2 penalty (regularization term) parameter.
# batch_size :Size of minibatches for stochastic optimizers. When set to “auto”, batch_size=min(200, n_samples)
# learning_rate : {‘constant’, ‘invscaling’, ‘adaptive’}, default ‘constant’
    #    ‘constant’ is a constant learning rate given by ‘learning_rate_init’.
    #    ‘invscaling’ gradually decreases the learning rate at each time step ‘t’ using an inverse scaling exponent of ‘power_t’. effective_learning_rate = learning_rate_init / pow(t, power_t)
    #    ‘adaptive’ keeps the learning rate constant to ‘learning_rate_init’ as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if ‘early_stopping’ is on, the current learning rate is divided by 5.
# learning_rate_init : double, optional, default 0.001
# max_iter : Maximum number of iterations.
# tol :Tolerance for the optimization.
# verbose : 
# validation_fraction : The proportion of training data to set aside as validation set for early stopping. 

model.fit(X_train, y_train)
model.predict(X_test)

In [None]:
'''Multi-layer Perceptron classifier'''
from sklearn.neural_network import MLPClassifier

classifier = MLPClassifier()
parameters = {"hidden_layer_sizes":[(5, 2),(10,8,5)],  "max_iter": [200], "alpha": [0.00001,0.1]}
#hidden_layer_sizes : The ith element represents the number of neurons in the ith hidden layer.

In [None]:
#DEFINE YOUR GRIDSEARCH 
'''
GS perfoms an exhaustive search over specified parameter values for an estimator.
GS uses a Stratified K-Folds cross-validator
(The folds are made by preserving the percentage of samples for each class.)
If refit=True the model is retrained on the whole training set with the best found params
'''
from sklearn.model_selection import GridSearchCV
gs = GridSearchCV(classifier, parameters, cv=3, scoring = 'accuracy', verbose=50, n_jobs=-1, refit=True)

In [None]:
#TRAIN YOUR CLASSIFIER
gs = gs.fit(X_train, y_train)

In [None]:
#summarize the results of your GRIDSEARCH
print('***GRIDSEARCH RESULTS***')

print("Best score: %f using %s" % (gs.best_score_, gs.best_params_))
means = gs.cv_results_['mean_test_score']
stds = gs.cv_results_['std_test_score']
params = gs.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
#TEST ON YOUR TEST SET 
best_model = gs.best_estimator_
y_pred = best_model.predict(X_test)

In [None]:
#This is your prediction on the TEST SET
y_pred

In [None]:
#EVALUATE YOUR PREDICTION (on the y_test that you left aside)
from sklearn.metrics import f1_score
print('***RESULTS ON TEST SET***')
print("f1_score: ", f1_score(y_test, y_pred))

In [None]:
#PRINT SOME FURTHER METRICS
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
#CONFUSION MATRIX
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))

In [None]:
# Plot confusion matrix
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap="Blues"); #annot=True to annotate cells fmt: format


In [None]:
from sklearn import metrics

model=MLPClassifier(hidden_layer_sizes=(100, 20), alpha=1e-05, max_iter=200)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

y_probs = model.predict_proba(X_test) #predict_proba gives the probabilities for the target (0 and 1 in your case) 

fpr, tpr, thresholds=metrics.roc_curve(y_test,  y_probs[:,1])

import matplotlib.pyplot as plt
plt.plot(fpr, tpr, label='ROC')
plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.show()

auc = metrics.roc_auc_score(y_test, y_probs[:,1])
print('AUC: %.2f' % auc)