In [1]:
import joblib
import json
import pandas
import warnings
import numpy

import sklearn.neural_network
import sklearn.model_selection
import sklearn.exceptions

warnings.filterwarnings("ignore", category=sklearn.exceptions.ConvergenceWarning)

# Instructions

- Read **the train data** from the CSV file and properly set the index


In [2]:
data_train = pandas.read_csv('./data/features.train.csv').set_index('id')
data_train

Unnamed: 0_level_0,duration,credit_amount,age,employment,own_telephone,foreign_worker,installment_commitment,residence_since,existing_credits,num_dependents,...,property_magnitude_life insurance,property_magnitude_car,property_magnitude_real estate,other_payment_plans_none,other_payment_plans_stores,other_payment_plans_bank,housing_for free,housing_rent,housing_own,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
253,0.277814,0.320904,-0.080669,2.0,0.0,1.0,1.0,2.0,1.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
667,2.289342,0.130313,-0.765274,2.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
85,-0.727949,-0.642249,-0.594123,2.0,1.0,1.0,3.0,1.0,1.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
969,-0.811763,0.246356,0.347209,2.0,0.0,1.0,0.0,1.0,1.0,1.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
75,-0.727949,-0.602162,2.572174,4.0,0.0,1.0,3.0,3.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835,-0.727949,-0.758292,1.031813,2.0,0.0,1.0,3.0,3.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0
192,0.529255,0.237916,0.004906,2.0,1.0,1.0,3.0,1.0,0.0,1.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
629,-0.979390,0.208730,2.401022,4.0,0.0,1.0,0.0,3.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
559,-0.225068,-0.460801,-0.422972,1.0,0.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


In [3]:
search_parameters = {
    # 'alpha': [0.2, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001]
    'alpha': numpy.arange(0.001, 0.011, 0.001)
}

In [4]:
model = sklearn.neural_network.MLPClassifier(
    hidden_layer_sizes=(20, 10),
    solver='sgd', 
    activation='logistic',
    alpha=0.0,  
    batch_size=32,
    learning_rate='constant',
    learning_rate_init=0.1,
    max_iter=2000,
    momentum=0.0, 
    nesterovs_momentum=False, 
    shuffle=True,
    random_state=0
)

In [5]:
model = sklearn.model_selection.GridSearchCV(
    estimator=model, 
    param_grid=search_parameters,
    n_jobs=-1,
    verbose=2
)

In [6]:
model.fit(
    data_train.drop(['label'], axis='columns'),
    data_train['label'],
)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [7]:
cv_results = pandas.DataFrame(model.cv_results_)
cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,2.901165,2.31794,0.003097,0.00047,0.001,{'alpha': 0.001},0.78,0.733333,0.773333,0.713333,0.786667,0.757333,0.028783,10
1,3.004087,2.46621,0.003764,0.001672,0.002,{'alpha': 0.002},0.78,0.74,0.773333,0.713333,0.786667,0.758667,0.027777,5
2,2.593358,2.271103,0.002048,0.000192,0.003,{'alpha': 0.003},0.78,0.74,0.773333,0.713333,0.786667,0.758667,0.027777,5
3,2.09507,1.364303,0.002501,0.000722,0.004,{'alpha': 0.004},0.78,0.74,0.773333,0.76,0.786667,0.768,0.016546,4
4,2.00382,1.08284,0.002525,0.000501,0.005,{'alpha': 0.005},0.78,0.74,0.773333,0.766667,0.786667,0.769333,0.016111,3
5,1.554941,0.868395,0.002347,0.000515,0.006,{'alpha': 0.006},0.78,0.74,0.773333,0.773333,0.786667,0.770667,0.016111,2
6,1.783271,0.913148,0.002757,0.001086,0.007,{'alpha': 0.007},0.78,0.74,0.773333,0.773333,0.793333,0.772,0.017588,1
7,1.37624,0.205042,0.002149,0.000316,0.008,{'alpha': 0.008},0.78,0.74,0.773333,0.706667,0.793333,0.758667,0.031383,5
8,1.382648,0.139024,0.002464,0.000637,0.009,{'alpha': 0.009000000000000001},0.78,0.74,0.773333,0.706667,0.793333,0.758667,0.031383,5
9,1.421652,0.164297,0.002633,0.00078,0.01,{'alpha': 0.010000000000000002},0.78,0.74,0.773333,0.706667,0.793333,0.758667,0.031383,5


In [8]:
cv_results.sort_values(['rank_test_score'])

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
6,1.783271,0.913148,0.002757,0.001086,0.007,{'alpha': 0.007},0.78,0.74,0.773333,0.773333,0.793333,0.772,0.017588,1
5,1.554941,0.868395,0.002347,0.000515,0.006,{'alpha': 0.006},0.78,0.74,0.773333,0.773333,0.786667,0.770667,0.016111,2
4,2.00382,1.08284,0.002525,0.000501,0.005,{'alpha': 0.005},0.78,0.74,0.773333,0.766667,0.786667,0.769333,0.016111,3
3,2.09507,1.364303,0.002501,0.000722,0.004,{'alpha': 0.004},0.78,0.74,0.773333,0.76,0.786667,0.768,0.016546,4
1,3.004087,2.46621,0.003764,0.001672,0.002,{'alpha': 0.002},0.78,0.74,0.773333,0.713333,0.786667,0.758667,0.027777,5
2,2.593358,2.271103,0.002048,0.000192,0.003,{'alpha': 0.003},0.78,0.74,0.773333,0.713333,0.786667,0.758667,0.027777,5
7,1.37624,0.205042,0.002149,0.000316,0.008,{'alpha': 0.008},0.78,0.74,0.773333,0.706667,0.793333,0.758667,0.031383,5
8,1.382648,0.139024,0.002464,0.000637,0.009,{'alpha': 0.009000000000000001},0.78,0.74,0.773333,0.706667,0.793333,0.758667,0.031383,5
9,1.421652,0.164297,0.002633,0.00078,0.01,{'alpha': 0.010000000000000002},0.78,0.74,0.773333,0.706667,0.793333,0.758667,0.031383,5
0,2.901165,2.31794,0.003097,0.00047,0.001,{'alpha': 0.001},0.78,0.733333,0.773333,0.713333,0.786667,0.757333,0.028783,10


In [9]:
model.best_params_

{'alpha': 0.007}