### Install and Import Libraries

In [None]:
# # Installing dependencies
# %pip install niapy
# %pip install openpyxl 
# %pip install pillow
# %pip install -U scikit-learn
# %pip install -U seaborn

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import openpyxl
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import seaborn as sns
from sklearn.model_selection import train_test_split
import pickle

In [2]:
# Import dataset training
dataset_train_raw = pd.read_excel('dataset/boston_train.xlsx')
dataset_train_raw

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.40771,0.0,6.20,1,0.5070,6.164,91.3,3.0480,8,307,17.4,395.24,21.46,21.7
1,19.60910,0.0,18.10,0,0.6710,7.313,97.9,1.3163,24,666,20.2,396.90,13.44,15.0
2,6.71772,0.0,18.10,0,0.7130,6.749,92.6,2.3236,24,666,20.2,0.32,17.44,13.4
3,1.51902,0.0,19.58,1,0.6050,8.375,93.9,2.1620,5,403,14.7,388.45,3.32,50.0
4,9.59571,0.0,18.10,0,0.6930,6.404,100.0,1.6390,24,666,20.2,376.11,20.31,12.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399,0.02009,95.0,2.68,0,0.4161,8.034,31.9,5.1180,4,224,14.7,390.55,2.88,50.0
400,0.04981,21.0,5.64,0,0.4390,5.998,21.4,6.8147,4,243,16.8,396.90,8.43,23.4
401,0.08199,0.0,13.92,0,0.4370,6.009,42.3,5.5027,4,289,16.0,396.90,10.40,21.7
402,0.37578,0.0,10.59,1,0.4890,5.404,88.6,3.6650,4,277,18.6,395.24,23.98,19.3


In [3]:
# Import dataset testing
dataset_test_raw = pd.read_excel('dataset/boston_test.xlsx')
dataset_test_raw

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.01951,17.5,1.38,0,0.4161,7.104,59.5,9.2229,3,216,18.6,393.24,8.05,33.0
1,0.14866,0.0,8.56,0,0.5200,6.727,79.9,2.7778,5,384,20.9,394.76,9.42,27.5
2,25.04610,0.0,18.10,0,0.6930,5.987,100.0,1.5888,24,666,20.2,396.90,26.77,5.6
3,3.67367,0.0,18.10,0,0.5830,6.312,51.9,3.9917,24,666,20.2,388.62,10.58,21.2
4,9.51363,0.0,18.10,0,0.7130,6.728,94.1,2.4961,24,666,20.2,6.68,18.71,14.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,0.06211,40.0,1.25,0,0.4290,6.490,44.4,8.7921,1,335,19.7,396.90,5.98,22.9
98,0.06911,45.0,3.44,0,0.4370,6.739,30.8,6.4798,5,398,15.2,389.71,4.69,30.5
99,0.09252,30.0,4.93,0,0.4280,6.606,42.2,6.1899,6,300,16.6,383.78,7.37,23.3
100,0.26363,0.0,8.56,0,0.5200,6.229,91.2,2.5451,5,384,20.9,391.23,15.55,19.4


### Data Preprocessing

In [4]:
# Normalizing data values for data train
normalizer = StandardScaler()
normalized_data_train = dataset_train_raw.copy()
cols = normalized_data_train.columns
for col in cols:
    normalized_data_train[col] = normalizer.fit_transform(normalized_data_train[[col]])

normalized_data_train

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,-0.368403,-0.469675,-0.731832,3.595975,-0.423937,-0.165276,0.807027,-0.341388,-0.177758,-0.595458,-0.492442,0.433353,1.213595,-0.079498
1,1.976926,-0.469675,1.020825,-0.278089,0.989336,1.499446,1.045658,-1.215395,1.654827,1.527104,0.809686,0.450740,0.093792,-0.829790
2,0.402325,-0.469675,1.020825,-0.278089,1.351272,0.682298,0.854030,-0.707000,1.654827,1.527104,0.809686,-3.703069,0.652297,-1.008965
3,-0.232663,-0.469675,1.238803,3.595975,0.420580,3.038119,0.901033,-0.788561,-0.521368,-0.027865,-1.748065,0.362234,-1.319227,3.089649
4,0.753854,-0.469675,1.020825,-0.278089,1.178922,0.182447,1.121587,-1.052525,1.654827,1.527104,0.809686,0.232984,1.053025,-1.154544
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399,-0.415748,3.654826,-1.250265,-0.278089,-1.207270,2.544063,-1.340654,0.703361,-0.635905,-1.086189,-1.748065,0.384230,-1.380663,3.089649
400,-0.412118,0.442057,-0.814310,-0.278089,-1.009929,-0.405784,-1.720295,1.559702,-0.635905,-0.973853,-0.771469,0.450740,-0.605736,0.110875
401,-0.408187,-0.469675,0.405186,-0.278089,-1.027164,-0.389847,-0.964629,0.897523,-0.635905,-0.701881,-1.143506,0.450740,-0.330673,-0.079498
402,-0.372303,-0.469675,-0.085263,3.595975,-0.579053,-1.266398,0.709405,-0.029982,-0.635905,-0.772831,0.065613,0.433353,1.565453,-0.348259


In [5]:
# Normalizing data values for data test
normalizer = StandardScaler()
normalized_data_test = dataset_test_raw.copy()
cols = normalized_data_test.columns
for col in cols:
    normalized_data_test[col] = normalizer.fit_transform(normalized_data_test[[col]])

normalized_data_test

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,-0.434277,0.164242,-1.356362,-0.25,-1.159000,1.063571,-0.250490,2.050090,-0.763038,-1.175589,0.071374,0.359597,-0.592096,0.985078
1,-0.421369,-0.558748,-0.345025,-0.25,-0.251230,0.558183,0.433346,-0.516506,-0.529665,-0.159197,1.111399,0.381079,-0.396481,0.442221
2,2.066947,-0.558748,0.998729,-0.25,1.260263,-0.433827,1.107125,-0.989995,1.687377,1.546890,0.794869,0.411322,2.080841,-1.719338
3,-0.069071,-0.558748,0.998729,-0.25,0.299198,0.001853,-0.505252,-0.033101,1.687377,1.546890,0.794869,0.294305,-0.230850,-0.179598
4,0.514590,-0.558748,0.998729,-0.25,1.435002,0.559523,0.909349,-0.628686,1.687377,1.546890,0.794869,-5.103502,0.929993,-0.801416
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,-0.430019,1.093800,-1.374673,-0.25,-1.046293,0.240472,-0.756662,1.878535,-0.996410,-0.455645,0.568777,0.411322,-0.887661,-0.011805
98,-0.429320,1.300369,-1.066201,-0.25,-0.976398,0.574269,-1.212553,0.957721,-0.529665,-0.074497,-1.466053,0.309709,-1.071854,0.738325
99,-0.426980,0.680663,-0.856328,-0.25,-1.055030,0.395976,-0.830409,0.842275,-0.412978,-0.667393,-0.832995,0.225903,-0.689190,0.027675
100,-0.409879,-0.558748,-0.345025,-0.25,-0.251230,-0.109413,0.812137,-0.609173,-0.529665,-0.159197,1.111399,0.331191,0.478792,-0.357260


In [6]:
# Label and Feature grouping
train_features = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
X_train = normalized_data_train[train_features].to_numpy()
y_train = normalized_data_train['PRICE'].to_numpy()
X_test = normalized_data_test[train_features].to_numpy()
y_test = normalized_data_test['PRICE'].to_numpy()

In [7]:
# Splitting Data Training and Data Validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3)


### MLP

In [150]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_val_score
from niapy.problems import Problem
from niapy.task import OptimizationType, Task
from niapy.algorithms.basic import GeneticAlgorithm

def get_hyperparameters_mlp(x):
    # Define each hyperparameter
    hidden_layer_sizes = round(50 + x[0]*150)
    activations = ('identity', 'logistic', 'tanh', 'relu')
    activation = activations[round(x[1]*3)]
    solvers = ('lbfgs', 'sgd', 'adam')
    solver = solvers[round(x[2]*2)]
    learning_rates = ('constant', 'invscaling', 'adaptive')
    learning_rate = learning_rates[round(x[5]*2)]
    max_iter = 10000

    params = {
        'hidden_layer_sizes': hidden_layer_sizes,
        'activation': activation,
        'solver': solver,
        'learning_rate': learning_rate,
        'max_iter': max_iter
    }
    return params
    
def get_regressor_mlp(x):
    """Get regressor from solution `x`."""
    params = get_hyperparameters_mlp(x)
    return MLPRegressor(**params)

class MLPRegressorHyperparameterOptimization(Problem):
    def __init__(self, X_val, y_val):
        super().__init__(dimension=6, lower=0, upper=1)
        self.X_val = X_val
        self.y_val = y_val

    def _evaluate(self, x):
        model = get_regressor_mlp(x)
        scores = cross_val_score(model, self.X_val, self.y_val, cv=2, n_jobs=-1)
        return scores.mean()

In [151]:
# Default Optimization
problem = MLPRegressorHyperparameterOptimization(X_val, y_val)

# Running the optimization
list_best = []
task = Task(problem, max_iters=50, optimization_type=OptimizationType.MAXIMIZATION)
algorithm = GeneticAlgorithm()
best_params_mlp, best_accuracy_mlp = algorithm.run(task)
list_best.append([best_params_mlp, best_accuracy_mlp])

# Writing the list data to the default_test file
fileIn = open('default_test', 'wb')
pickle.dump(list_best, fileIn)
fileIn.close()

In [173]:
# Default Performance

# Getting the list data from default_test file
fileOut = open('default_test', 'rb')
list_default = pickle.load(fileOut)
fileOut.close()

# Assigning the hyperparameter
default_parameter = list_default[0][0]
print('Best parameters:', get_hyperparameters_mlp(default_parameter))
default_optimized = get_regressor_mlp(default_parameter)

# Calculating average score
total_default_score = 0
for i in range(50):
    default_optimized.fit(X_train, y_train)
    default_optimized_score = default_optimized.score(X_test, y_test)
    total_default_score += default_optimized_score
print('Average default model accuracy: ', total_default_score/50)

Best parameters: {'hidden_layer_sizes': 152, 'activation': 'relu', 'solver': 'adam', 'learning_rate': 'adaptive', 'max_iter': 10000}
Average default model accuracy:  0.8592398757614765


In [153]:
# Population Variation in Optimization
problem = MLPRegressorHyperparameterOptimization(X_val, y_val)

# Running the optimization
list_best = []
task = Task(problem, max_iters=50, optimization_type=OptimizationType.MAXIMIZATION)
algorithm = GeneticAlgorithm(population_size=50)
best_params_mlp, best_accuracy_mlp = algorithm.run(task)
list_best.append([best_params_mlp, best_accuracy_mlp])

# Writing the list data to the population_test file
fileIn = open('population_test', 'wb')
pickle.dump(list_best, fileIn)
fileIn.close()

In [168]:
# Population Variation Performance

# Getting the list data from population_test file
fileOut = open('population_test', 'rb')
list_population = pickle.load(fileOut)
fileOut.close()

# Assigning the hyperparameter
population_parameter = list_population[0][0]
print('Best parameters:', get_hyperparameters_mlp(population_parameter))
population_optimized = get_regressor_mlp(population_parameter)

# Calculating average score
total_population_score = 0
for i in range(50):
    population_optimized.fit(X_train, y_train)
    population_optimized_score = population_optimized.score(X_test, y_test)
    total_population_score += population_optimized_score
print('Average population model accuracy: ', total_population_score/50)

Best parameters: {'hidden_layer_sizes': 132, 'activation': 'relu', 'solver': 'lbfgs', 'learning_rate': 'invscaling', 'max_iter': 10000}
Average population model accuracy:  0.7507962776558409


In [155]:
# Mutation Variation in Optimization
problem = MLPRegressorHyperparameterOptimization(X_val, y_val)

# Running the optimization
list_best = []
task = Task(problem, max_iters=50, optimization_type=OptimizationType.MAXIMIZATION)
algorithm = GeneticAlgorithm(mutation_rate=0.5)
best_params_mlp, best_accuracy_mlp = algorithm.run(task)
list_best.append([best_params_mlp, best_accuracy_mlp])

# Writing the list data to the mutation_test file
fileIn = open('mutation_test', 'wb')
pickle.dump(list_best, fileIn)
fileIn.close()

In [172]:
# Mutation Variation Performance

# Getting the list data from mutation_test file
fileOut = open('mutation_test', 'rb')
list_mutation = pickle.load(fileOut)
fileOut.close()

# Assigning the hyperparameter
mutation_parameter = list_mutation[0][0]
print('Best parameters:', get_hyperparameters_mlp(mutation_parameter))
mutation_optimized = get_regressor_mlp(mutation_parameter)

# Calculating average score
total_mutation_score = 0
for i in range(50):
    mutation_optimized.fit(X_train, y_train)
    mutation_optimized_score = mutation_optimized.score(X_test, y_test)
    total_mutation_score += mutation_optimized_score
print('Average mutation model accuracy: ', total_mutation_score/50)

Best parameters: {'hidden_layer_sizes': 175, 'activation': 'relu', 'solver': 'lbfgs', 'learning_rate': 'invscaling', 'max_iter': 10000}
Average mutation model accuracy:  0.7709048052519222


In [159]:
# Iteration Variation in Optimization
problem = MLPRegressorHyperparameterOptimization(X_val, y_val)

# Running the optimization
list_best = []
task = Task(problem, max_iters=100, optimization_type=OptimizationType.MAXIMIZATION)
algorithm = GeneticAlgorithm()
best_params_mlp, best_accuracy_mlp = algorithm.run(task)
list_best.append([best_params_mlp, best_accuracy_mlp])

# Writing the list data to the iteration_test file
fileIn = open('iteration_test', 'wb')
pickle.dump(list_best, fileIn)
fileIn.close()

In [170]:
# Iteration Variation Performance

# Getting the list data from iteration_test file
fileOut = open('iteration_test', 'rb')
list_iteration = pickle.load(fileOut)
fileOut.close()

# Assigning the hyperparameter
iteration_parameter = list_iteration[0][0]
print('Best parameters:', get_hyperparameters_mlp(iteration_parameter))
iteration_optimized = get_regressor_mlp(iteration_parameter)

# Calculating average score
total_iteration_score = 0
for i in range(50):
    iteration_optimized.fit(X_train, y_train)
    iteration_optimized_score = iteration_optimized.score(X_test, y_test)
    total_iteration_score += iteration_optimized_score
print('Average iteration model accuracy: ', total_iteration_score/50)

Best parameters: {'hidden_layer_sizes': 153, 'activation': 'relu', 'solver': 'adam', 'learning_rate': 'invscaling', 'max_iter': 10000}
Average iteration model accuracy:  0.8591639395269003


In [161]:
# Crossover Variation in Optimization
problem = MLPRegressorHyperparameterOptimization(X_val, y_val)

# Running the optimization
list_best = []
task = Task(problem, max_iters=50, optimization_type=OptimizationType.MAXIMIZATION)
algorithm = GeneticAlgorithm(crossover_rate=0.5)
best_params_mlp, best_accuracy_mlp = algorithm.run(task)
list_best.append([best_params_mlp, best_accuracy_mlp])

# Writing the list data to the crossover_rate_test file
fileIn = open('crossover_rate_test', 'wb')
pickle.dump(list_best, fileIn)
fileIn.close()

In [171]:
# Crossover Variation Performance

# Getting the list data from crossover_rate_test file
fileOut = open('crossover_rate_test', 'rb')
list_crossover = pickle.load(fileOut)
fileOut.close()

# Assigning the hyperparameter
crossover_parameter = list_crossover[0][0]
print('Best parameters:', get_hyperparameters_mlp(crossover_parameter))
crossover_optimized = get_regressor_mlp(crossover_parameter)

# Calculating average score
total_crossover_score = 0
for i in range(50):
    crossover_optimized.fit(X_train, y_train)
    crossover_optimized_score = crossover_optimized.score(X_test, y_test)
    total_crossover_score += crossover_optimized_score
print('Average crossover model accuracy: ', total_crossover_score/50)

Best parameters: {'hidden_layer_sizes': 198, 'activation': 'relu', 'solver': 'lbfgs', 'learning_rate': 'invscaling', 'max_iter': 10000}
Average crossover model accuracy:  0.7766949707299573
