In [0]:
import numpy as np         # linear algebra
import sklearn as sk       # machine learning
import pandas as pd        # reading in data files, data cleaning
import matplotlib.pyplot as plt   # for plotting
import seaborn as sns      # visualization tool

file_id = 'car.data'
link = 'https://archive.ics.uci.edu/ml/machine-learning-databases/car/{FILE_ID}'
csv_url = link.format(FILE_ID = file_id)

data = pd.read_csv(csv_url)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1727 entries, 0 to 1726
Data columns (total 7 columns):
vhigh      1727 non-null object
vhigh.1    1727 non-null object
2          1727 non-null object
2.1        1727 non-null object
small      1727 non-null object
low        1727 non-null object
unacc      1727 non-null object
dtypes: object(7)
memory usage: 94.5+ KB


In [0]:
data.columns=['price', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'score']
data.head()

Unnamed: 0,price,maint,doors,persons,lug_boot,safety,score
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


In [0]:
data['price'].replace(['vhigh', 'high', 'med', 'low'], [4, 3, 2, 1], inplace=True)
data['lug_boot'].replace(['big', 'med', 'small'], [3, 2, 1], inplace=True)
data['maint'].replace(['vhigh', 'high', 'med', 'low'], [4, 3, 2, 1], inplace=True)
data['safety'].replace(['high', 'med', 'low'], [3, 2, 1], inplace=True)
data['doors'].replace(['2', '3', '4', '5more'], [2, 3, 4, 5], inplace=True)
data['persons'].replace(['2', '4', 'more'], [2, 4, 5], inplace=True)
data['score'].replace(['unacc', 'acc', 'good', 'vgood'], [1, 2, 3, 4], inplace=True)
data.head()

Unnamed: 0,price,maint,doors,persons,lug_boot,safety,score
0,4,4,2,2,1,2,1
1,4,4,2,2,1,3,1
2,4,4,2,2,2,1,1
3,4,4,2,2,2,2,1
4,4,4,2,2,2,3,1


In [0]:
def z_trans(data):
  return (data - data.mean()) / data.std()

data['p'] = z_trans(data['price'])
data['m'] = z_trans(data['maint'])
data['s'] = z_trans(data['safety'])
data['d'] = z_trans(data['doors'])
data['pp'] = z_trans(data['persons'])
data['lug'] = z_trans(data['lug_boot'])

data.head()

Unnamed: 0,price,maint,doors,persons,lug_boot,safety,score,p,m,s,d,pp,lug
0,4,4,2,2,1,2,1,1.34234,1.34234,-0.000709,-1.34234,-1.336997,-1.225277
1,4,4,2,2,1,3,1,1.34234,1.34234,1.223859,-1.34234,-1.336997,-1.225277
2,4,4,2,2,2,1,1,1.34234,1.34234,-1.225277,-1.34234,-1.336997,-0.000709
3,4,4,2,2,2,2,1,1.34234,1.34234,-0.000709,-1.34234,-1.336997,-0.000709
4,4,4,2,2,2,3,1,1.34234,1.34234,1.223859,-1.34234,-1.336997,-0.000709


Some general rules about the hidden layer are the following based on this paper:* Approximating Number of Hidden layer neurons in Multiple Hidden Layer BPNN Architecture* by Saurabh Karsoliya.

In general:

*   The number of hidden layer neurons are 2/3 (or 70% to 90%) of the size of the input layer.
*   The number of hidden layer neurons should be less than twice of the number of neurons in input layer.
*   The size of the hidden layer neurons is between the input layer size and the output layer size.



In [0]:
X = data[['p', 'm', 's', 'd', 'pp', 'lug']]
y = data['score']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(5, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [0]:
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)

from sklearn.metrics import accuracy_score

print('Training set R^2 =', accuracy_score(y_train, y_pred_train))
print('Test set R^2 =', accuracy_score(y_test, y_pred_test))

Training set R^2 = 0.9160028964518465
Test set R^2 = 0.8728323699421965


In [0]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(max_iter=100)

parameter_space = {
    'hidden_layer_sizes': [(3,3,3), (5,2), (3,2), (5,5), (5,6), (2,3), (3,3), (4,4), (6, 6, 6), (6, 6), (6, 2), (6, 3), (6, 4), (6, 5)],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam', 'lbfgs'],
    'alpha': [0.0001, 0.05, 1e-5, 0.01, 0.001],
    'learning_rate': ['constant', 'adaptive', 'invscaling'],
}

from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)
clf.fit(X_train, y_train)

# Best paramete set
print('Best parameters found:\n', clf.best_params_)

# All results
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
    
y_true, y_pred = y_test , clf.predict(X_test)

from sklearn.metrics import classification_report
print('Results on the test set:')
print(classification_report(y_true, y_pred))



Best parameters found:
 {'activation': 'tanh', 'alpha': 0.01, 'hidden_layer_sizes': (5, 5), 'learning_rate': 'invscaling', 'solver': 'lbfgs'}
0.773 (+/-0.012) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (3, 3, 3), 'learning_rate': 'constant', 'solver': 'sgd'}
0.810 (+/-0.022) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (3, 3, 3), 'learning_rate': 'constant', 'solver': 'adam'}
0.846 (+/-0.010) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (3, 3, 3), 'learning_rate': 'constant', 'solver': 'lbfgs'}
0.757 (+/-0.068) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (3, 3, 3), 'learning_rate': 'adaptive', 'solver': 'sgd'}
0.792 (+/-0.027) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (3, 3, 3), 'learning_rate': 'adaptive', 'solver': 'adam'}
0.846 (+/-0.010) for {'activation': 'identity', 'alpha': 0.0001, 'hidden_layer_sizes': (3, 3, 3), 'learning_rate': 'adaptive', 'solver

In [0]:
clf = MLPClassifier(activation='tanh', alpha=0.01, hidden_layer_sizes=(5, 5), learning_rate='invscaling', solver='lbfgs')

clf.fit(X_train, y_train)

y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)

print('Model ran', clf.n_iter_, 'iterations using', clf.out_activation_, 'function')
print('loss =', clf.loss_)
print('Training set R^2 =', accuracy_score(y_train, y_pred_train))
print('Test set R^2 =', accuracy_score(y_test, y_pred_test))
print('coefficients =', clf.coefs_)


Model ran 201 iterations using softmax function
loss = 0.016453678620426935
Training set R^2 = 0.9942070963070239
Test set R^2 = 0.9884393063583815
coefficients = [array([[-3.6083916 , -0.05708744,  0.04287446, -4.09568286,  4.5456664 ],
       [ 0.46741491, -0.06099174,  0.03186461, -3.84666479,  4.3088371 ],
       [-5.08217235,  6.14873563,  0.51743006, -1.08129268,  0.24500454],
       [-0.05004012, -0.19771971,  0.19527946,  0.14225162,  0.04687904],
       [-1.86377776,  6.74861008, -0.11416303, -2.23700146, -0.05838594],
       [-0.46764833, -0.53283164,  0.45474734, -0.44172129,  0.16140118]]), array([[-2.54267018, -1.84470041,  0.43096763, -0.76810462, -1.99864309],
       [ 2.24598036, -1.65437016,  3.72352202, -0.00640185,  2.1706632 ],
       [ 4.98811218,  4.02654404,  3.93753102, -4.16746593,  0.1933029 ],
       [-2.13614723, -0.1298306 ,  3.47708563, -0.16756904,  0.51911859],
       [ 1.77332583, -5.34665323, -0.95956788,  2.78544961, -4.39429169]]), array([[-3.3358085