In [6]:
#Imports

from sklearn import tree
from matplotlib import pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn import model_selection
from pprint import pprint
from matplotlib.pyplot import scatter 
import pandas as pd
np.set_printoptions(suppress=True)

In [7]:
#Data filtering functions (gets rid of lines containing the same scores for all configurations)

def data_filter(data):
    for line in data:
        print(np.array(line[-5:]))
        if np.array_equal(np.array(line[-5:]),np.zeros(5)):
            data.remove(line)
    return data
       

In [8]:
#Importing Data from file
dataset = np.genfromtxt("all-data-filtered", delimiter='')

#Input
Input = dataset[:, :-8]

#Features
features_file = open("Data-Extraction/features", "r")
features = features_file.readlines()

#Applying scaling
sc = StandardScaler()
Input = sc.fit_transform(Input)

In [9]:
import time

#Function that, given a configuration, outputs a table summarizing the best parameters to consider for that configuration
#Note: c is the configuration number: goes from 1 to 8
def tableConfig(c):
    
    hidden_layer_sizes=[(100,)]
    #activation=['identity', 'logistic', 'tanh', 'relu']
    activation=['identity']
    #early_stopping =[False,True]
    validation_fonction=[0.1,0.5]
    n_iter_no_change=[10,100]
    max_iter=[1000,5000]
    learning_rate_init=[0.005,0.1]
    data = []
    
    Output=dataset[:, c-9]
    
    x_train, x_test, y_train, y_test = model_selection.train_test_split(Input, Output, random_state=1, test_size = 0.20)
    x_test, x_validation, y_test, y_validation = model_selection.train_test_split(x_test, y_test, test_size=0.25, random_state=42)
    
    for i in hidden_layer_sizes: 
        for j in activation: 
            #for k in early_stopping: 
            for l in validation_fonction: 
                for m in n_iter_no_change: 
                    for n in max_iter:
                        for o in learning_rate_init:

                            mlp = MLPRegressor(
                                hidden_layer_sizes=i,
                                activation= j,
                                #early_stopping = True,
                                validation_fraction=l,
                                n_iter_no_change=m,
                                max_iter= n, 
                                learning_rate_init=o            
                                )
    
                            start = time.time()
                            mlp.fit(x_train, y_train)
                            stop = time.time()
                            
                            data.append({'hidden_layer_sizes':i,
                                'activation':j,
                                #'early_stopping': k, 
                                'validation_fraction':l,
                                'n_iter_no_change':m, 
                                'max_iter': n, 
                                'learning_rate_init':o, 
                                'test_score': mlp.score(x_test, y_test),
                                'training time': stop - start
                                })

    table = pd.DataFrame.from_dict(data)
    table = table.replace(np.nan, '-')
    table = table.sort_values(by='test_score', ascending=False)
    #print("Configuration ",str(c+1),": ")
    return table.iloc[0]
    



In [10]:
treeConfigs=[]

for i in range(0,8):
    aux=[]
    for j in range(0,8):
        aux.append(tableConfig(i)[j])
    treeConfigs.append(aux)
    
print(np.array(treeConfigs))



[[(100,) 'identity' 0.5 100 5000 0.1 0.9999993606719897
  0.07188034057617188]
 [(100,) 'identity' 0.5 10 5000 0.1 0.26974697901958644
  0.07917547225952148]
 [(100,) 'identity' 0.5 10 1000 0.1 -0.05570352027086645
  0.09380125999450684]
 [(100,) 'identity' 0.1 10 1000 0.1 0.15473447144660168
  0.10932397842407227]
 [(100,) 'identity' 0.5 10 1000 0.1 0.393093368561661 0.341839075088501]
 [(100,) 'identity' 0.5 10 5000 0.1 0.298874937286811 0.06266999244689941]
 [(100,) 'identity' 0.1 100 1000 0.1 0.5043532716906206
  0.23255586624145508]
 [(100,) 'identity' 0.1 10 5000 0.1 0.16729280431006488
  0.09374403953552246]]


  print(np.array(treeConfigs))


In [11]:
print(len(treeConfigs))

print(treeConfigs[0][0])

8
(100,)


In [12]:
#Choice of the best parameters for the 8 Regressors

mlps=[] #List of 8 MLPs for each configuration
#Parameters of MLPRegressor

for c in treeConfigs:
    mlp = MLPRegressor(
        hidden_layer_sizes=c[0],
        activation= c[1],
        validation_fraction=c[2],
        n_iter_no_change=c[3],
        max_iter= c[4], 
        learning_rate_init=c[5] 
    )
    mlps.append(mlp)



In [13]:
print(len(mlps))
print(np.array(mlps))

8
[MLPRegressor(activation='identity', learning_rate_init=0.1, max_iter=5000,
             n_iter_no_change=100, validation_fraction=0.5)
 MLPRegressor(activation='identity', learning_rate_init=0.1, max_iter=5000,
             validation_fraction=0.5)
 MLPRegressor(activation='identity', learning_rate_init=0.1, max_iter=1000,
             validation_fraction=0.5)
 MLPRegressor(activation='identity', learning_rate_init=0.1, max_iter=1000)
 MLPRegressor(activation='identity', learning_rate_init=0.1, max_iter=1000,
             validation_fraction=0.5)
 MLPRegressor(activation='identity', learning_rate_init=0.1, max_iter=5000,
             validation_fraction=0.5)
 MLPRegressor(activation='identity', learning_rate_init=0.1, max_iter=1000,
             n_iter_no_change=100)
 MLPRegressor(activation='identity', learning_rate_init=0.1, max_iter=5000)]


In [14]:
Configs=[]
trainScores=[]
testScores=[]
xTrain=[]
yTrain=[]
xTest=[]
yTest=[]
Gaps=[]
Trees=[]
gap=0



#Collecting Trains & Test vectors for MLP Regressor Evaluation
for i in range(1,9):
    Configs.append("C"+str(i))
    Output=dataset[:,i-9]
    #print("Configuration ",i," : ")
    #print(Output)

    #Data splitting: training set & test set: 20% | 80%
    x_train, x_test, y_train, y_test = model_selection.train_test_split(Input, Output, random_state=1, test_size = 0.20)
    x_test, x_validation, y_test, y_validation = model_selection.train_test_split(x_test, y_test, test_size=0.25, random_state=42)
 
    xTrain.append(x_train)
    yTrain.append(y_train)
    xTest.append(x_test)
    yTest.append(y_test)
    
for i in range(0,8):
    
    #Training
    mlps[i].fit(xTrain[i], yTrain[i])

    #Analyzing scores
    #print('Train score ',1,': ', mlp.score(x_train, y_train))
    #print('Test score ',1,': ', mlp.score(x_test, y_test))
    #print('\n')
    trainScores.append(mlp.score(xTrain[i], yTrain[i])*100)
    testScores.append(mlp.score(xTest[i], yTest[i])*100)
    gap+=abs((mlp.score(xTrain[i], yTrain[i])-mlp.score(xTest[i], yTest[i]))*100)
    
gap/=len(Configs)
scatter(Configs,trainScores,c='red',label='Train Score')
scatter(Configs,testScores,c='blue',label='Test Score')
plt.xlabel("Configurations")
plt.ylabel("Scores")
plt.legend()
plt.show()

print(gap,"%")
Gaps.append(gap)

NotFittedError: This MLPRegressor instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.