In [0]:
import pandas as pd
import numpy as np
data=pd.read_csv('train.csv')

In [0]:
X=data.iloc[:,1:-1]
Y=data.iloc[:,0]

In [0]:
X=pd.get_dummies(X,columns=['Accident_Type_Code'])

In [0]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X=sc.fit_transform(X)

In [0]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.15,random_state=0)

## Neural Networks

### Baseline Model and function

In [0]:
import tensorflow as tf
from keras import backend as K
from keras.models import Sequential
from keras.layers.core import Dense,Activation
from keras.utils import np_utils,to_categorical
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")

In [0]:
sess=tf.Session()
K.set_session(sess)
hidden_layers=8
neurons_num=128
feature_dim=X_train.shape[1]

In [0]:
def create_model(optimizer='Adamax',activation='relu',kernel_initializer='normal',neurons_num=128,hidden_layers=8):
  model=Sequential()
  model.add(Dense(feature_dim,input_dim=feature_dim,kernel_initializer=kernel_initializer,activation=activation))
  for _ in range(0,hidden_layers-1):
    model.add(Dense(neurons_num,kernel_initializer=kernel_initializer,activation=activation))
    model.add(Dense(4,kernel_initializer='normal',activation='softmax'))
    model.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=["accuracy"])
    return model

In [9]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
le.fit(Y_train)
print(le.classes_)
Y_train=le.transform(Y_train)
Y_test=le.transform(Y_test)

['Highly_Fatal_And_Damaging' 'Minor_Damage_And_Injuries'
 'Significant_Damage_And_Fatalities'
 'Significant_Damage_And_Serious_Injuries']


### Tuning Batch and Epochs

In [10]:
model=KerasClassifier(build_fn=create_model,verbose=0)
batch_size=[10,20,40,60,100]
epochs=[10,25,50,100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid=GridSearchCV(estimator=model,param_grid=param_grid,n_jobs=-1,cv=3)
grid_result = grid.fit(X_train, Y_train)






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where








In [11]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.937058 using {'batch_size': 60, 'epochs': 100}
0.926470 (0.004497) with: {'batch_size': 10, 'epochs': 10}
0.933294 (0.005680) with: {'batch_size': 10, 'epochs': 25}
0.932000 (0.003156) with: {'batch_size': 10, 'epochs': 50}
0.934353 (0.004251) with: {'batch_size': 10, 'epochs': 100}
0.923882 (0.001023) with: {'batch_size': 20, 'epochs': 10}
0.932352 (0.004669) with: {'batch_size': 20, 'epochs': 25}
0.933294 (0.003970) with: {'batch_size': 20, 'epochs': 50}
0.934941 (0.003613) with: {'batch_size': 20, 'epochs': 100}
0.925882 (0.004448) with: {'batch_size': 40, 'epochs': 10}
0.932117 (0.006205) with: {'batch_size': 40, 'epochs': 25}
0.932587 (0.006250) with: {'batch_size': 40, 'epochs': 50}
0.933411 (0.004058) with: {'batch_size': 40, 'epochs': 100}
0.913763 (0.013846) with: {'batch_size': 60, 'epochs': 10}
0.932118 (0.001693) with: {'batch_size': 60, 'epochs': 25}
0.935646 (0.003848) with: {'batch_size': 60, 'epochs': 50}
0.937058 (0.002853) with: {'batch_size': 60, 'epochs': 10

### Tuning Optimizer

In [0]:
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=60, verbose=0)
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [22]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.937764 using {'optimizer': 'Adamax'}
0.923764 (0.004168) with: {'optimizer': 'SGD'}
0.936117 (0.003405) with: {'optimizer': 'RMSprop'}
0.931647 (0.003635) with: {'optimizer': 'Adagrad'}
0.935882 (0.003063) with: {'optimizer': 'Adadelta'}
0.936470 (0.002755) with: {'optimizer': 'Adam'}
0.937764 (0.001341) with: {'optimizer': 'Adamax'}
0.934235 (0.003848) with: {'optimizer': 'Nadam'}


### Tuning Activation function

In [0]:
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=60, verbose=0)
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [25]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.934823 using {'activation': 'relu'}
0.901999 (0.004611) with: {'activation': 'softmax'}
0.917412 (0.002018) with: {'activation': 'softplus'}
0.933647 (0.001319) with: {'activation': 'softsign'}
0.934823 (0.004371) with: {'activation': 'relu'}
0.928470 (0.007046) with: {'activation': 'tanh'}
0.910471 (0.001750) with: {'activation': 'sigmoid'}
0.877766 (0.017700) with: {'activation': 'hard_sigmoid'}
0.617647 (0.002148) with: {'activation': 'linear'}


### Tuning Kernel Initializer

In [0]:
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=60, verbose=0)
kernel_initializer = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
param_grid = dict(kernel_initializer=kernel_initializer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [28]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.935529 using {'kernel_initializer': 'normal'}
0.934235 (0.003007) with: {'kernel_initializer': 'uniform'}
0.933412 (0.000603) with: {'kernel_initializer': 'lecun_uniform'}
0.935529 (0.003770) with: {'kernel_initializer': 'normal'}
0.305059 (0.001805) with: {'kernel_initializer': 'zero'}
0.934470 (0.004602) with: {'kernel_initializer': 'glorot_normal'}
0.935176 (0.004584) with: {'kernel_initializer': 'glorot_uniform'}
0.927176 (0.002802) with: {'kernel_initializer': 'he_normal'}
0.927999 (0.006895) with: {'kernel_initializer': 'he_uniform'}


### Tuning Number of Neurons

In [0]:
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=60, verbose=0)
neurons_num = [1, 2, 4, 8, 16, 32, 64, 128, 256]
param_grid = dict(neurons_num=neurons_num)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [31]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.939647 using {'neurons_num': 128}
0.660477 (0.065096) with: {'neurons_num': 1}
0.871882 (0.005953) with: {'neurons_num': 2}
0.866938 (0.056949) with: {'neurons_num': 4}
0.934353 (0.001807) with: {'neurons_num': 8}
0.936470 (0.001009) with: {'neurons_num': 16}
0.933294 (0.003179) with: {'neurons_num': 32}
0.936117 (0.004271) with: {'neurons_num': 64}
0.939647 (0.001763) with: {'neurons_num': 128}
0.937176 (0.002891) with: {'neurons_num': 256}


### Tuning Hidden Layers

In [0]:
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=60, verbose=0)
hidden_layers = [1,2,4,8,10,12,14,16,18,20]
param_grid = dict(hidden_layers=hidden_layers)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [34]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.937176 using {'hidden_layers': 2}
nan (nan) with: {'hidden_layers': 1}
0.937176 (0.006081) with: {'hidden_layers': 2}
0.936588 (0.004331) with: {'hidden_layers': 4}
0.934705 (0.002572) with: {'hidden_layers': 8}
0.934824 (0.003667) with: {'hidden_layers': 10}
0.937058 (0.002321) with: {'hidden_layers': 12}
0.935529 (0.003247) with: {'hidden_layers': 14}
0.935999 (0.003770) with: {'hidden_layers': 16}
0.936470 (0.004858) with: {'hidden_layers': 18}
0.934940 (0.005341) with: {'hidden_layers': 20}


## Predicting and Writing to CSV

### Using Best Model

In [0]:
model=create_model("Adamax",'relu','normal',128,2)

In [36]:
from keras.callbacks import TensorBoard
from time import time

tensorboard=TensorBoard(log_dir="logs/{}".format(time()))
model.fit(X_train,Y_train,epochs=100,batch_size=60,verbose=2,callbacks=[tensorboard],validation_split=0.15)

Train on 7225 samples, validate on 1275 samples












Epoch 1/100
 - 0s - loss: 1.2396 - acc: 0.4454 - val_loss: 1.0509 - val_acc: 0.5827






Epoch 2/100
 - 0s - loss: 0.9494 - acc: 0.6061 - val_loss: 0.8599 - val_acc: 0.6361
Epoch 3/100
 - 0s - loss: 0.7724 - acc: 0.6728 - val_loss: 0.7075 - val_acc: 0.7067
Epoch 4/100
 - 0s - loss: 0.6430 - acc: 0.7412 - val_loss: 0.5980 - val_acc: 0.7686
Epoch 5/100
 - 0s - loss: 0.5445 - acc: 0.7916 - val_loss: 0.5135 - val_acc: 0.8149
Epoch 6/100
 - 0s - loss: 0.4696 - acc: 0.8328 - val_loss: 0.4478 - val_acc: 0.8565
Epoch 7/100
 - 0s - loss: 0.4142 - acc: 0.8591 - val_loss: 0.4025 - val_acc: 0.8745
Epoch 8/100
 - 0s - loss: 0.3734 - acc: 0.8821 - val_loss: 0.3672 - val_acc: 0.8910
Epoch 9/100
 - 0s - loss: 0.3425 - acc: 0.8963 - val_loss: 0.3407 - val_acc: 0.8965
Epoch 10/100
 - 0s - loss: 0.3188 - acc: 0.9077 - val_loss: 0.3194 - val_acc: 0.9035
Epoch 11/100
 - 0s - loss: 0.3006 - acc: 0.9118 - val_loss: 0.3009 - val_acc: 0.9122
Epoch 12/100
 - 0s - loss: 0.2863 - acc: 0.9189 - val_loss: 0.2881 - val_acc: 0.9059
Epoch 13/100
 - 0s - loss: 0.2748 - acc: 0.9217 - val_loss: 0.2762 - val_

<keras.callbacks.History at 0x7fdcf4143a90>

In [37]:
scores=model.evaluate(X_test,Y_test,verbose=2)
print("\nTest %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


Test acc: 93.93%


### Writing to CSV

In [0]:
X_validate=pd.read_csv('test.csv')
acc_id=X_validate.iloc[:,-1]
X_validate=X_validate.iloc[:,:-1]

In [0]:
X_validate=pd.get_dummies(X_validate,columns=['Accident_Type_Code'])

In [0]:
X_validate=sc.fit_transform(X_validate)

In [41]:
Y_validate=model.predict_classes(X_validate)
Y_validate

array([0, 2, 3, ..., 3, 3, 0])

In [42]:
Y_validate=le.inverse_transform(Y_validate)
Y_validate

array(['Highly_Fatal_And_Damaging', 'Significant_Damage_And_Fatalities',
       'Significant_Damage_And_Serious_Injuries', ...,
       'Significant_Damage_And_Serious_Injuries',
       'Significant_Damage_And_Serious_Injuries',
       'Highly_Fatal_And_Damaging'], dtype=object)

In [0]:
Y_final=pd.concat([acc_id,pd.Series(Y_validate)],axis=1)

In [44]:
Y_final.reset_index()
Y_final

Unnamed: 0,Accident_ID,0
0,1,Highly_Fatal_And_Damaging
1,10,Significant_Damage_And_Fatalities
2,14,Significant_Damage_And_Serious_Injuries
3,17,Highly_Fatal_And_Damaging
4,21,Significant_Damage_And_Fatalities
...,...,...
2495,12484,Highly_Fatal_And_Damaging
2496,12487,Significant_Damage_And_Serious_Injuries
2497,12488,Significant_Damage_And_Serious_Injuries
2498,12491,Significant_Damage_And_Serious_Injuries


In [0]:
Y_final.to_csv('final.csv')

In [17]:
K.clear_session()











