### Read the libraries

In [21]:
import pandas as pd
import numpy as np
import math
import itertools
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')


from sklearn import model_selection, metrics
from sklearn.model_selection import train_test_split
from scipy import stats


from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, r2_score, f1_score,mean_squared_error

from sklearn.neural_network import MLPRegressor


from numpy.random import seed
seed(1)

### Defining the functions

In [22]:
def complete_model(trainX, trainy, testX, testy, model):
    model.fit(trainX, trainy)
    y_pred = model.predict(testX)
    return calculation(testy, y_pred)

In [23]:
def calculation(y_test, y_pred):  
    mse=mean_squared_error(y_test,y_pred)
    rmse=np.sqrt(mse)
    r2=metrics.r2_score(y_test,y_pred)
    pcc=stats.pearsonr(y_test,y_pred)
    return [mse, rmse, r2, pcc[0],pcc[1]]

In [24]:
def print_output(model_name, val, output_val):
    print('\033[1mOptimized {} model {} performance: \033[0m'.format(model_name, val))
    print("MSE:         {0:.3f}".format(output_val[0]))
    print("RMSE: {0:.3f}".format(output_val[1]))
    print("R2: {0:.3f}".format(output_val[2]))
    print("PCC:    {0:.3f}".format(output_val[3]))
    print("p-value:    {0:.3f}".format(output_val[4]))

### Read the data

In [25]:
## Read the training file
overall_data =pd.read_csv("../SARS/sars_boruta_grepped_features_train.csv")

### Remove last column from the file
overall_data = overall_data.iloc[:, :-1]

## Replace the string character
overall_data.replace('?',0,inplace=True)

### Split the data
X = overall_data.iloc[:,1:].values
y = overall_data.loc[:,'Name'].values

X[:] = np.nan_to_num(X)
y[:] = np.nan_to_num(y)


overall_data.head()

Unnamed: 0,Name,AATS3s,AATS4s,AATSC0c,AATSC1p,MATS1p,MATS1i,VE1_Dzs,VE2_Dzs,TDB5e,...,PubchemFP497,PubchemFP609,PubchemFP622,PubchemFP674,KRFP2018,KRFP2135,KRFP4772,AD2D174,KRFPC2135,KRFPC2975
0,5.18,2.034846,1.948337,0.007198,-0.014797,-0.059715,-0.211553,0.493891,0.02245,34.024208,...,0,0,0,0,0,0,0,0,0.0,9.0
1,5.1,2.168422,2.021814,0.011219,-0.007268,-0.029721,-0.185476,0.45791,0.019909,32.854418,...,0,0,0,0,0,0,0,0,0.0,9.0
2,5.89,2.586843,2.484821,0.01104,0.001238,0.004958,-0.123107,0.285269,0.011411,35.089963,...,0,0,0,0,0,0,0,0,0.0,5.0
3,5.3,6.735119,5.824755,0.039992,-0.005964,-0.029783,-0.20113,0.229938,0.012774,43.154668,...,0,0,0,0,0,0,0,0,0.0,9.0
4,4.03,2.308201,2.173413,0.006154,-0.008504,-0.034845,-0.098218,0.413094,0.014753,35.676718,...,0,0,0,0,0,0,0,0,0.0,8.0


In [26]:
### Read the test dataset

test = pd.read_csv("../SARS/sars_boruta_grepped_features_test.csv", low_memory=False)


test = test.iloc[:, :-1]

## Replace the string character from the test dataset
test.replace('?',0,inplace=True)

## Spliting the files in x and y variables [test dataset]
X_val = test.iloc[:,1:].values
y_val = test.loc[:,'Name'].values

X_val[:] = np.nan_to_num(X_val)
y_val[:] = np.nan_to_num(y_val)

test.head()

Unnamed: 0,Name,AATS3s,AATS4s,AATSC0c,AATSC1p,MATS1p,MATS1i,VE1_Dzs,VE2_Dzs,TDB5e,...,PubchemFP497,PubchemFP609,PubchemFP622,PubchemFP674,KRFP2018,KRFP2135,KRFP4772,AD2D174,KRFPC2135,KRFPC2975
0,4.81,4.042813,4.499288,0.023375,0.014046,0.056894,-0.016811,0.585259,0.02251,39.603293,...,0,0,0,0,0,0,0.0,0.0,0.0,8.0
1,5.12,2.117682,1.930347,0.005958,-0.00061,-0.00198,-0.189903,0.291171,0.014559,33.969583,...,0,0,0,0,0,0,0.0,0.0,0.0,5.0
2,5.18,2.702546,2.663669,0.012685,-0.000658,-0.002881,-0.074437,0.063316,0.001376,36.420713,...,0,0,0,0,0,0,0.0,0.0,0.0,19.0
3,4.3,5.116505,5.279007,0.026623,0.041073,0.183468,0.224729,0.157879,0.004934,41.44085,...,0,0,0,0,0,0,0.0,0.0,0.0,4.0
4,4.3,3.252772,3.482981,0.01499,0.00608,0.021805,0.020338,0.073163,0.00236,39.557025,...,0,0,0,0,0,0,0.0,0.0,0.0,6.0


### Call the defined function

In [27]:
column_names =  ['Range_val', 'Model_info','MAE', 'RMSE','R2', 'PCC','p-val']

In [28]:

output  = pd.DataFrame(columns = column_names)

activations = ['tanh', 'relu']
solvers = ['sgd', 'adam']
learning_rates = ['constant', 'invscaling', 'adaptive']


paras = [l for l in itertools.product(activations, solvers, learning_rates)]

for i in range(len(paras)):
    para = paras[i]
    activations, solvers, learning_rates = para[0], para[1], para[2] 
    model = MLPRegressor(activation=activations, solver=solvers, learning_rate=learning_rates, random_state=10)
    print (model)
    
    for j in range(2):
        name = 'ANN_'+'_paras_'+str(i)+'_activation_'+str(activations)+'_solver_'+str(solvers)+'_learning_'+str(learning_rates)
        X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1, random_state=j)
        output_val = complete_model(trainX=X_train, trainy=y_train, testX=X_test, testy=y_test, model=model)
        output.loc[len(output)] = [str(j), name, output_val[0], output_val[1], output_val[2], output_val[3],  output_val[4]]

output.to_csv(r'''ann_training_metrics_all.csv''')    

MLPRegressor(activation='tanh', random_state=10, solver='sgd')
MLPRegressor(activation='tanh', learning_rate='invscaling', random_state=10,
             solver='sgd')
MLPRegressor(activation='tanh', learning_rate='adaptive', random_state=10,
             solver='sgd')
MLPRegressor(activation='tanh', random_state=10)
MLPRegressor(activation='tanh', learning_rate='invscaling', random_state=10)
MLPRegressor(activation='tanh', learning_rate='adaptive', random_state=10)
MLPRegressor(random_state=10, solver='sgd')
MLPRegressor(learning_rate='invscaling', random_state=10, solver='sgd')
MLPRegressor(learning_rate='adaptive', random_state=10, solver='sgd')
MLPRegressor(random_state=10)
MLPRegressor(learning_rate='invscaling', random_state=10)
MLPRegressor(learning_rate='adaptive', random_state=10)


### Print the output of best performing ANN model

In [None]:
### ANN
optimized_ann = KNeighborsRegressor(activation='relu', solver='adam', learning_rate='adaptive', random_state=7)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1, random_state=51)
### Optimized ANN model training performance
print_result('ANN', 'training', complete_model(X_train, y_train, X_test, y_test, optimized_ann))
### Optimized ANN model testing performance
print_result('ANN', 'testing', complete_model(X_train, y_train, X_val, y_val, optimized_ann))