### import packages

In [5]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

### Random Sample Data 

In [7]:
x = np.random.uniform(0,100,(1000,16))
y = np.random.randint(2, size=1000) ##가속 1, 제동 0

In [9]:
data=pd.DataFrame(x)

### Prefix
앞차 : 'f'
옆차 : 's'
뒷차 : 'l'

In [10]:
data.columns = ['f_distance', 'f_size', 'f_speed', 'f_price','s_distance', 's_size', 's_speed', 's_price','l_distance', 'l_size', 'l_speed', 'l_price', 'altitude', 'status', 'slope', 'weather']

#### pandas dataframe , size : 1000*16

In [11]:
data

Unnamed: 0,f_distance,f_size,f_speed,f_price,s_distance,s_size,s_speed,s_price,l_distance,l_size,l_speed,l_price,altitude,status,slope,weather
0,60.681565,79.590212,86.622260,15.109308,49.550627,24.687129,33.960805,77.070117,60.550609,6.196457,39.586626,54.965475,21.305484,60.192807,42.053858,19.239693
1,81.385792,1.639839,5.747231,1.305640,29.013413,25.800537,57.402992,37.423779,3.638031,31.300968,73.402845,13.375304,76.096143,6.854451,99.825116,9.368599
2,94.750704,3.360956,9.705905,76.513742,90.403438,52.759549,65.658562,38.416184,95.097696,93.337193,81.032183,21.896646,66.379955,37.785787,2.738350,22.780221
3,49.823128,95.093569,57.110996,30.591088,0.393801,73.499451,43.922896,88.452499,8.361755,87.554688,50.130107,46.385883,58.140782,88.247018,82.185620,80.339064
4,50.530070,79.502984,83.337614,94.655294,22.988858,22.733027,82.513783,26.553865,11.827262,5.525154,90.655837,94.747166,21.019548,57.831451,57.676941,27.713481
5,74.916071,7.199023,16.504806,96.450630,4.444030,26.241204,48.779465,34.947917,3.999004,53.447736,18.721700,90.999593,3.154849,13.984246,97.794025,93.832456
6,82.436396,65.525983,70.021217,45.066047,42.701908,83.395252,4.751869,40.255526,29.128695,21.650737,10.354087,82.120007,62.365065,11.087538,57.852304,37.996553
7,6.083611,66.537929,40.059809,61.564967,52.520199,89.981495,34.907577,12.119353,58.945952,99.387656,25.029301,42.388209,54.217976,51.955111,16.528186,16.613288
8,79.416465,62.373708,18.397144,80.404381,85.127105,15.765028,47.497655,7.436928,22.048999,30.008044,28.834728,18.750124,62.997424,21.617973,3.362319,3.991108
9,88.942880,82.449975,58.340798,19.452181,80.585126,42.618310,63.004864,80.806725,40.261911,98.733555,31.728514,61.313403,73.296509,94.503606,23.288957,25.961148


### Mlp Classifier with Adam

In [53]:
clf = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=500, alpha=0.0001,
                     solver='adam',  random_state=21,tol=0.000000001, activation='relu')
# hiddenlayer size : 각 층에 100개의 hidden units이 있는 3층의 mlp classifier
# 보다 복잡한 training data를 다루기 위해 다층퍼셉트론 등장, 층이 많아지면 gradient vanishing 문제로 인해 정확도가 떨어지는 현상 발생 1~2 층을 쌓았을ㄷ때 가장 높은
# 성능을 보인다고 함
# solver : adam optimizer , 'lbfgs'와 'sgd' , 'adam' 으로 실험 결과 adam 이 가장 우수한 성능
# max_iteration : 수행 횟수
#activation function : relu , ReLU function: f(t)=max(0,t), AlexNet에서 relu + dropout 조합이 다른 활성화 함수에 비하여 가장 좋은 성능을 보임을 증명

In [54]:
x_train, x_test, y_train, y_test = train_test_split(data,y, test_size= 0.25, random_state=27)

In [55]:
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)



In [56]:
accuracy_score(y_test, y_pred)

0.484

### Mlp Classifier with sgd

In [44]:
clf_sgd = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=500, alpha=0.0001,
                     solver='sgd',verbose=10,  random_state=21,tol=0.000000001)

#SGD는 모든 데이터의 gradient를 평균내어 gradient update를 하는 대신 , 일부의 데이터로 ‘mini batch’를 형성하여 한 batch에 대한 gradient만을 계산하여 전체 parameter를 update

In [45]:
clf_sgd.fit(x_train, y_train)
y_pred = clf_sgd.predict(x_test)

Iteration 1, loss = 7.78097714
Iteration 2, loss = 2.54225407
Iteration 3, loss = 0.92263943
Iteration 4, loss = 0.83791894
Iteration 5, loss = 0.78972587
Iteration 6, loss = 0.74213904
Iteration 7, loss = 0.69747032
Iteration 8, loss = 0.66796697
Iteration 9, loss = 0.64411302
Iteration 10, loss = 0.62454035
Iteration 11, loss = 0.60619506
Iteration 12, loss = 0.59146601
Iteration 13, loss = 0.57940321
Iteration 14, loss = 0.56863041
Iteration 15, loss = 0.55908419
Iteration 16, loss = 0.54991731
Iteration 17, loss = 0.54180500
Iteration 18, loss = 0.53382717
Iteration 19, loss = 0.52630240
Iteration 20, loss = 0.52324921
Iteration 21, loss = 0.51333225
Iteration 22, loss = 0.50715988
Iteration 23, loss = 0.50083597
Iteration 24, loss = 0.49500993
Iteration 25, loss = 0.48733899
Iteration 26, loss = 0.48060686
Iteration 27, loss = 0.47610846
Iteration 28, loss = 0.47402330
Iteration 29, loss = 0.46482763
Iteration 30, loss = 0.45963383
Iteration 31, loss = 0.45391990
Iteration 32, los

Iteration 253, loss = 0.03874602
Iteration 254, loss = 0.03891843
Iteration 255, loss = 0.03855408
Iteration 256, loss = 0.03783996
Iteration 257, loss = 0.03792084
Iteration 258, loss = 0.03752236
Iteration 259, loss = 0.03701795
Iteration 260, loss = 0.03662636
Iteration 261, loss = 0.03639466
Iteration 262, loss = 0.03598397
Iteration 263, loss = 0.03565767
Iteration 264, loss = 0.03541002
Iteration 265, loss = 0.03527672
Iteration 266, loss = 0.03490256
Iteration 267, loss = 0.03459000
Iteration 268, loss = 0.03438432
Iteration 269, loss = 0.03409966
Iteration 270, loss = 0.03372374
Iteration 271, loss = 0.03344192
Iteration 272, loss = 0.03311676
Iteration 273, loss = 0.03292715
Iteration 274, loss = 0.03271927
Iteration 275, loss = 0.03244301
Iteration 276, loss = 0.03227832
Iteration 277, loss = 0.03195140
Iteration 278, loss = 0.03164145
Iteration 279, loss = 0.03142259
Iteration 280, loss = 0.03129292
Iteration 281, loss = 0.03102296
Iteration 282, loss = 0.03079023
Iteration 



In [46]:
accuracy_score(y_test, y_pred)

0.48