In [1]:
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV

In [2]:
train_values = pd.read_csv('train_values.csv', dtype= {'building_id': np.int32,\
                                              'geo_level_1_id': np.int8,\
                                              'geo_level_2_id': np.int16,\
                                              'geo_level_3_id': np.int16,\
                                              'count_floors_pre_eq': np.int16,\
                                              'age': np.int16,\
                                              'area_percentage': np.int8,\
                                              'height_percentage': np.int8,\
                                              'land_surface_condition': 'category',\
                                              'foundation_type': 'category',\
                                              'roof_type': 'category',\
                                              'ground_floor_type':'category',\
                                              'other_floor_type': 'category',\
                                              'position': 'category',\
                                              'plan_configuration':'category',\
                                              'has_superstructure_adobe_mud':'boolean',\
                                              'has_superstructure_mud_mortar_stone':'boolean',\
                                              'has_superstructure_stone_flag':'boolean',\
                                              'has_superstructure_cement_mortar_stone':'boolean',\
                                              'has_superstructure_mud_mortar_brick':'boolean',\
                                              'has_superstructure_cement_mortar_brick':'boolean',\
                                              'has_superstructure_timber':'boolean',\
                                              'has_superstructure_bamboo':'boolean',\
                                              'has_superstructure_rc_non_engineered':'boolean',\
                                              'has_superstructure_rc_engineered':'boolean',\
                                              'has_superstructure_other':'boolean',\
                                              'legal_ownership_status':'category',\
                                              'count_families': np.int16,\
                                              'has_secondary_use':'boolean',\
                                              'has_secondary_use_agriculture':'boolean',\
                                              'has_secondary_use_hotel':'boolean',\
                                              'has_secondary_use_rental':'boolean',\
                                              'has_secondary_use_institution':'boolean',\
                                              'has_secondary_use_school':'boolean',\
                                              'has_secondary_use_industry':'boolean',\
                                              'has_secondary_use_health_post':'boolean',\
                                              'has_secondary_use_gov_office':'boolean',\
                                              'has_secondary_use_use_police':'boolean',\
                                              'has_secondary_use_other':'boolean'
                                              })
train_labels = pd.read_csv("train_labels.csv")
test_values = pd.read_csv("test_values.csv")

In [3]:
x_pre = pd.get_dummies(train_values).set_index('building_id')
y_pre = train_labels.loc[:,'damage_grade']

x = x_pre
y = y_pre
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 23)

In [4]:
hiper = {"hidden_layer_sizes" : [10,20,30,40,50,100,200,500,1000], 
         "activation": ["identity", "logistic", "tanh", "relu"],
        "solver": ["lbfgs","sgd","adam"],
        "learning_rate":['constant', "invscaling", "adaptive"],
        }
nn_mlp = MLPClassifier()
rn_sr = RandomizedSearchCV(nn_mlp,param_distributions = hiper, scoring = 'f1_micro',cv=5,n_jobs=-1)


In [5]:
rn_sr.fit(x_train, y_train)

RandomizedSearchCV(cv=5, estimator=MLPClassifier(), n_jobs=-1,
                   param_distributions={'activation': ['identity', 'logistic',
                                                       'tanh', 'relu'],
                                        'hidden_layer_sizes': [10, 20, 30, 40,
                                                               50, 100, 200,
                                                               500, 1000],
                                        'learning_rate': ['constant',
                                                          'invscaling',
                                                          'adaptive'],
                                        'solver': ['lbfgs', 'sgd', 'adam']},
                   scoring='f1_micro')

In [6]:
print(rn_sr.best_score_)
bst_parms = rn_sr.best_params_
print(bst_parms)

0.5698244435917115
{'solver': 'adam', 'learning_rate': 'constant', 'hidden_layer_sizes': 40, 'activation': 'tanh'}


In [8]:
mlp_class = MLPClassifier(hidden_layer_sizes = bst_parms["hidden_layer_sizes"],
                         activation = bst_parms["activation"],
                         solver = bst_parms["solver"],
                         learning_rate = bst_parms["learning_rate"])
model0 = mlp_class.fit(x_train, y_train)

model0.predict(x_test)
print("Training Score: {}".format(mlp_class.score(x_train, y_train)))
print("Test Score: {}".format(mlp_class.score(x_test, y_test)))

Training Score: 0.569392747505756
Test Score: 0.5682738243702155


In [9]:
gd_sr = GridSearchCV(nn_mlp,param_grid = hiper, scoring = 'f1_micro',cv=5,n_jobs=-1)

In [None]:
gd_sr.fit(x_train,y_train)

In [None]:
print(gd_sr.best_score_)
bst_parms1 = gd_sr.best_params_
print(bst_parms)

In [None]:
mlp_class1 = MLPClassifier(hidden_layer_sizes = bst_parms1["hidden_layer_sizes"],
                         activation = bst_parms1["activation"],
                         solver = bst_parms1["solver"],
                         learning_rate = bst_parms1["learning_rate"])
model1 = mlp_class.fit(x_train, y_train)

model1.predict(x_test)
print("Training Score: {}".format(mlp_class1.score(x_train, y_train)))
print("Test Score: {}".format(mlp_class1.score(x_test, y_test)))