In [23]:
from sklearn.neural_network import MLPClassifier
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from stephentools import get_file
from stephentools import data_d3
from stephentools import analyse_feature_importance
from stephentools import visualize_decision_tree
from sklearn.preprocessing import StandardScaler

In [24]:
df = get_file()
df,X,y,X_train, X_test, y_train, y_test=data_d3()
rs=42

In [25]:
model_2 = MLPClassifier(max_iter=250, random_state=rs)
model_2.fit(X_train, y_train)

print("Train accuracy:", model_2.score(X_train, y_train))
print("Test accuracy:", model_2.score(X_test, y_test))

y_pred = model_2.predict(X_test)
print(classification_report(y_test, y_pred))

print(model_2)

Train accuracy: 0.8616317530319736
Test accuracy: 0.799055967633176
              precision    recall  f1-score   support

       False       0.79      0.92      0.85       940
        True       0.82      0.58      0.68       543

    accuracy                           0.80      1483
   macro avg       0.80      0.75      0.77      1483
weighted avg       0.80      0.80      0.79      1483

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=250, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=42, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)


In [26]:
model_z = MLPClassifier(hidden_layer_sizes=165, max_iter=200, solver='adam', random_state=42, activation='tanh',alpha= 0.05)
model_z.fit(X_train, y_train)

print("Train accuracy:", model_z.score(X_train, y_train))
print("Test accuracy:", model_z.score(X_test, y_test))

y_pred = model_z.predict(X_test)
print(classification_report(y_test, y_pred))

print(model_z)

Train accuracy: 0.8748621830209482
Test accuracy: 0.8091706001348618
              precision    recall  f1-score   support

       False       0.82      0.89      0.86       940
        True       0.78      0.66      0.72       543

    accuracy                           0.81      1483
   macro avg       0.80      0.78      0.79      1483
weighted avg       0.81      0.81      0.81      1483

MLPClassifier(activation='tanh', alpha=0.05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=165, learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=42, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)




In [None]:
params = {'hidden_layer_sizes': [(x,) for x in range(145, 180, 20)],'activation': ['tanh'],
    'solver': ['adam'], 'alpha':[0.05]}

cv = GridSearchCV(param_grid=params, estimator=MLPClassifier(random_state=rs),return_train_score=True, cv=10, n_jobs=-1)
cv.fit(X_train, y_train)

print("Train accuracy:", cv.score(X_train, y_train))
print("Test accuracy:", cv.score(X_test, y_test))

y_pred = cv.predict(X_test)
print(classification_report(y_test, y_pred))
print(cv.best_params_)

In [None]:
import pickle
with open('DT.pickle', 'rb') as f:
    dt_best,roc_index_dt_best, fpr_dt_best, tpr_dt_best = pickle.load(f)

print(dt_best)

In [None]:
analyse_feature_importance(dt_best, X.columns)

In [None]:
from sklearn.feature_selection import SelectFromModel

selectmodel = SelectFromModel(dt_best, prefit=True)
X_train_sel_model = selectmodel.transform(X_train)
X_test_sel_model = selectmodel.transform(X_test)

print(X_train_sel_model.shape)

In [None]:
model_z = MLPClassifier(hidden_layer_sizes=16, max_iter=200, solver='adam', random_state=42, activation='tanh',alpha= 0.05)
cv_sel_model.fit(X_train_sel_model, y_train)

In [None]:
print("Train accuracy:", cv_sel_model.score(X_train_sel_model, y_train))
print("Test accuracy:", cv_sel_model.score(X_test_sel_model, y_test))

y_pred = cv_sel_model.predict(X_test_sel_model)
print(classification_report(y_test, y_pred))

In [None]:
X_train_sel_model

In [None]:
model_z = MLPClassifier(hidden_layer_sizes=165, max_iter=1000, solver='adam', random_state=42, activation='tanh',alpha= 0.05)
model_z.fit(X_train, y_train)

print("Train accuracy:", model_z.score(X_train, y_train))
print("Test accuracy:", model_z.score(X_test, y_test))

y_pred = model_z.predict(X_test)
print(classification_report(y_test, y_pred))

print(model_z)

In [None]:
y_pred

In [None]:
final=pd.DataFrame(y_pred)

In [None]:
df

In [None]:
final

In [None]:
X

In [None]:
o = df.drop('covid19_positive',axis=1)
v = df['covid19_positive']
from sklearn.model_selection import train_test_split
Big,Test, real, actual =train_test_split(o, v, test_size=0.29, stratify=y, random_state=rs) 

In [None]:
Test['Real'] = actual
Test['Prediction']=final


In [None]:
Test

In [None]:
Test=Test[Test['Prediction']==True]

In [None]:
Test.to_excel('NN_Generalization.xlsx','r')

In [None]:
Test

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.displot(Test['height'])

In [None]:
Test.plot(x='height',data=Test)

In [None]:
import matplotlib.pyplot as plt