In [1]:
#Multi-layer perceptron/Deep learning
#import the libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
import numpy as np
from sklearn.metrics import mean_squared_error as MSE


In [2]:
#achieve randomness
np.random.seed(1)

In [3]:
#load data 
data = pd.read_excel("nnData.xlsx")
data.head()

Unnamed: 0,Experiemnt(celcius),Molecular weight(g/mol),Unnamed: 2
0,-65,312.54,0
1,-57,276.41,4
2,-49,304.47,4
3,-45,328.49,6
4,-33,102.13,0


In [4]:
# #rename the columns

# data.rename(columns = {'Unnamed: 2':'B'}, inplace = True)
# data.head()

In [5]:
#get the statistics
data.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Experiemnt(celcius),62.0,34.822581,40.726003,-65.0,12.25,33.5,69.0,96.0
Molecular weight(g/mol),62.0,285.441973,88.401461,88.10512,254.915,282.465,334.34962,480.84936
Unnamed: 2,62.0,0.774194,1.31098,0.0,0.0,0.0,1.0,6.0


In [6]:
#extract the predictors
predX = data.iloc[:,1:]
print("The shape of the training data:", predX.shape)
predX.head()

The shape of the training data: (62, 2)


Unnamed: 0,Molecular weight(g/mol),Unnamed: 2
0,312.54,0
1,276.41,4
2,304.47,4
3,328.49,6
4,102.13,0


In [7]:
#extract the target
targY = (data.iloc[:,0]).to_frame()
print("The shape of the target data:", targY.shape)
targY.head()

The shape of the target data: (62, 1)


Unnamed: 0,Experiemnt(celcius)
0,-65
1,-57
2,-49
3,-45
4,-33


In [8]:
X_train, X_test, y_train, y_test = train_test_split(predX, targY, test_size=0.30, random_state=42)

print("The distribution of training and testing data after partition:")
print("training predictors:", X_train.shape)
print("testing predictors:", X_test.shape)
print("training target:", y_train.shape)
print("testing target:", y_test.shape)

The distribution of training and testing data after partition:
training predictors: (43, 2)
testing predictors: (19, 2)
training target: (43, 1)
testing target: (19, 1)


In [9]:
#normalise/scale the data
#fit only to the training data
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [10]:
# Now apply the transformations to the predi ctors:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [18]:
#Define the parameters for tunning model
cv = 3 # how namy folds within each validation 
verbose = 0 # print progress messages during CV
n_jobs = 1  # determine the number of cores used


#declare the MLP
def tune_nn(X, Y):
    # Number of neurons in the hidden layer , # tuple hidden layer sizes == DNN
    numParams = 6;
    startSize=10; stopSize = 50;
    layerOne = [int(x) for x in np.random.randint(startSize, stopSize, numParams)] 
    layerTwo = [int(x) for x in np.random.randint(startSize, stopSize, numParams)]
    layerThree = [int(x) for x in np.random.randint(startSize,stopSize,numParams)]
    hidden_layer_sizes = list(zip(layerOne, layerTwo, layerThree))

    # activation for the hidden layer 
    activation = ['relu', "logistic", "tanh"]
    # type of optimization techniques use
    solver = ["lbfgs", "sgd", "adam"]
    # Control regularization
    alpha = [0.001, 0.01, 0.05, 0.1]
    #Number of leaves
    learning_rate = ['constant', 'adaptive']
    random_grid = {
    'hidden_layer_sizes': hidden_layer_sizes,
    'activation': activation,
    'solver': ['sgd', 'adam'],
    'alpha': alpha,
    'learning_rate': learning_rate}
    nn = MLPRegressor(max_iter= 500)
    nn_random = GridSearchCV(estimator = nn, param_grid = random_grid, cv = cv, 
                                   verbose = verbose, n_jobs = n_jobs)  
#     nn_random = GridSearchCV(estimator = nn, param_distributions = random_grid, n_iter = n_iter, cv = cv, 
#                                    verbose = verbose, random_state = random_state, n_jobs = n_jobs)  
    nn_random.fit(X, Y)
    nn_random.fit(X, Y)
    print("Best Parameters: ", nn_random.best_params_, "Best Score: ", nn_random.best_score_)
    return(nn_random.best_params_,nn_random.best_estimator_, nn_random.cv_results_)

In [19]:
# #initialise deep NN
# #read more on the mlp -- https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
# #declare 
# param_list = {"hidden_layer_sizes": [(13,13,12),(8,2,3)], "activation": ["identity", "logistic", "tanh", "relu"], "solver": ["lbfgs", "sgd", "adam"], "alpha": [0.00005,0.0005]}
# mlp = MLPRegressor()
# reg = GridSearchCV(mlp, param_grid=param_list) 
# # mlp = MLPRegressor(hidden_layer_sizes=(13,13, 12), activation = 'relu',  max_iter=500)
# reg.fit(X_train, y_train.values.ravel())

In [20]:
 best_Param, bestModel, all_results = tune_nn(X_train, y_train.values.ravel())















Best Parameters:  {'activation': 'tanh', 'alpha': 0.1, 'hidden_layer_sizes': (13, 17, 30), 'learning_rate': 'adaptive', 'solver': 'sgd'} Best Score:  0.6468512609499772




In [14]:
#print the best parameter
print("The best parameter is:", '\n', best_Param)

The best parameter is: 
 {'activation': 'tanh', 'alpha': 0.01, 'hidden_layer_sizes': (21, 22, 28), 'learning_rate': 'adaptive', 'solver': 'sgd'}


In [15]:
#get the performance for training
ypred_tr = bestModel.predict(X_train)

mseTrain = MSE(y_train, ypred_tr, squared=False)

print("The training performance is: ", mseTrain)

The training performance is:  17.162704818305023


In [16]:
#get the performance for training
ypred_ts = bestModel.predict(X_test)

mseTest = MSE(y_test, ypred_ts, squared=False)

print("The testing performance is: ", mseTest)

The testing performance is:  33.735276790758064


In [17]:
#send the result to excel sheet

##form a dataframe for both target_set and predicted set
outPutDftr = pd.DataFrame(list(zip(y_train.values.ravel(), ypred_tr)), columns = ['Target_tr', 'Predicted_tr'])
outPutDfts = pd.DataFrame(list(zip(y_test.values.ravel(),  ypred_ts)), columns = ['Target_ts', 'Predicted_ts'])

##save the file as CSV
outPutDfts.to_csv('modelOutputrfrts.csv')
outPutDftr.to_csv('modelOutputrfrtr.csv')