In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
os.chdir("../input")
os.listdir()

## Data Read

In [None]:
df=pd.read_csv("../input/eeg-clean/eeg_clean.csv")


In [None]:
from warnings import filterwarnings
filterwarnings('ignore')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
print(df["eye"].value_counts())

In [None]:
df.eye=[1 if each =="Open" else 0 for each in df.eye]

In [None]:
df.info()

In [None]:
y = df["eye"].values
X = df.drop(['eye'], axis=1).values

In [None]:
# Data Standardization 
from sklearn.preprocessing import StandardScaler
Scaler=StandardScaler()
X=Scaler.fit_transform(X)

X[0:3]

In [None]:
from sklearn.model_selection import train_test_split
# shuffle and split training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                    random_state=0)
# Multi Layer Perceptron Artificial Neural Network
from sklearn.neural_network import MLPClassifier 

# Setting up a primitive (non-validated) model
mlpc = MLPClassifier(random_state = 0)# ANN model object created

mlpc.fit(X_train, y_train) # ANN model object fit

In [None]:
# Forecasting on the Unvalidated Model
y_pred = mlpc.predict(X_test) # model prediction process over test set

In [None]:
import sklearn.metrics as metrics

# Accuracy

print("Accuracy:",metrics.accuracy_score(y_test,y_pred))

# f1 score

print("f1_weighted:",metrics.f1_score(y_test, y_pred,average='weighted'))

## Grid Search Cross Validation

In [None]:
# Cross Validation Process
# Parameters for CV created in dictionary structure
# INFORMATION ABOUT THE INPUTED PARAMETERS
# alpha: float, default = 0.0001 L2 penalty (regularization term) parameter. (penalty parameter)
   
mlpc_params = {"alpha": [0.1, 0.01, 0.001],
              "hidden_layer_sizes": [(100,100),
                                     (100,100,100)],
              "solver" : ["adam","sgd"],
              "activation": ["relu","logistic"]}

from sklearn.model_selection import GridSearchCV




mlpc = MLPClassifier(random_state = 0) # ANN model object created

# Model CV process 
mlpc_cv_model = GridSearchCV(mlpc, mlpc_params, 
                         cv = 5, # To make a 5-fold CV
                         n_jobs = -1, # Number of jobs to be run in parallel (-1: means to use all processors)
                         verbose = 2) # Controls the level of detail: higher means more messages gets value as integer.

mlpc_cv_model.fit(X_train, y_train) 


# The best parameter obtained as a result of CV process

print("The best parameters: " + str(mlpc_cv_model.best_params_))

In [None]:
# Setting the Final Model with the best parameter

mlpc_tuned = mlpc_cv_model.best_estimator_

# Fitting Final Model
mlpc_tuned.fit(X_train, y_train)

In [None]:
# K-fold f1_weighted

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# K fold
kf = KFold(shuffle=True, n_splits=5) # To make a 5-fold CV

cv_results_kfold = cross_val_score(mlpc_tuned, X_test, y_test, cv=kf, scoring= 'f1_weighted')

print("K-fold Cross Validation f1_weigted Results: ",cv_results_kfold)
print("K-fold Cross Validation f1_weigted Results Mean: ",cv_results_kfold.mean())

In [None]:
# K-fold accuracy

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# K fold
kf = KFold(shuffle=True, n_splits=5) # To make a 5-fold CV

cv_results_kfold = cross_val_score(mlpc_tuned, X_test,y_test, cv=kf, scoring= 'accuracy')

print("K-fold Cross Validation accuracy Results: ",cv_results_kfold)
print("K-fold Cross Validation accuracy Results Mean: ",cv_results_kfold.mean())

In [None]:
# Tune Model Prediction
# Prediction process of Final Model over test set
y_pred = mlpc_tuned.predict(X_test)

In [None]:
# Accuracy and f1_weighted value of Final Model

# %% f1 score
import sklearn.metrics as metrics
print("f1_weighted:",metrics.f1_score(y_test, y_pred,average='weighted'))

# %% Accuracy

print("accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
#%% Confusion Matrix and Classification Report
from sklearn.metrics import confusion_matrix, classification_report 

# Classification Report
model_report = classification_report(y_test, y_pred)
print(model_report)

In [None]:
# Confusion Matrix
# multilabel-indicator is not supported so np.argmax should be used!
model_conf = confusion_matrix(y_test,y_pred)
print(model_conf)

In [None]:
#%% ROC-AUC Curve
import matplotlib.pyplot as plt



probs=mlpc_tuned.predict_proba(X_test)
fpr,tpr,threshold=metrics.roc_curve(y_test,y_pred)
roc_auc=metrics.auc(fpr,tpr)




plt.title("ROC")
plt.plot(fpr,tpr,label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy',  linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()