## Preprocessing

In [1]:
!nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 3070 Laptop GPU (UUID: GPU-54856215-75bb-1551-b47a-9e3ba83e1e40)


In [2]:
import pandas as pd
data = pd.read_csv('data.csv')

In [3]:
X = data.iloc[:, 1:-1]
Y = data.iloc[:, 0]


In [4]:
X = pd.get_dummies(X, columns=['Accident_Type_Code'])


In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.25, random_state=0, shuffle=True)

X_test, X_val, Y_test, Y_val = train_test_split(
    X_test, Y_test, test_size=0.3, random_state=0, shuffle=True)


In [7]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(Y_train)
print(le.classes_)
Y_train = le.transform(Y_train)
Y_test = le.transform(Y_test)
Y_val = le.transform(Y_val)


['Highly_Fatal_And_Damaging' 'Minor_Damage_And_Injuries'
 'Significant_Damage_And_Fatalities'
 'Significant_Damage_And_Serious_Injuries']


## Neural Network


### Baseline Model and function


In [8]:
from tensorflow import keras

import warnings
warnings.filterwarnings("ignore")


In [9]:
def build_model(hp):
    feature_dim = X_train.shape[1]

    # Defining the choice of hyperparameters
    kernel_init = hp.Choice(
        'kernel_init', ['uniform', 'lecun_uniform', 'normal', 'zero',
                        'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'])

    activation = hp.Choice('activation', ['softmax', 'softplus', 'softsign',
                                          'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'])

    optimizer = hp.Choice('optimizer', ['SGD', 'RMSprop', 'Adagrad',
                                        'Adadelta', 'Adam', 'Adamax', 'Nadam'])

    hidden_layers = hp.Choice(
        'hidden_layers', [1, 2, 4, 8, 10, 12, 14, 16, 18, 20])

    dropout = hp.Boolean('dropout')

    neurons_num = hp.Choice('neurons_num', [1, 2, 4, 8, 16, 32, 64, 128, 256])

    # Model Definition
    model = keras.Sequential()

    model.add(keras.layers.Dense(feature_dim, input_dim=feature_dim,
              kernel_initializer=kernel_init, activation=activation))

    for _ in range(0, hidden_layers-1):
        model.add(keras.layers.Dense(
            neurons_num, kernel_initializer=kernel_init, activation=activation))

    if dropout:
        model.add(keras.layers.Dropout(0.2))

    model.add(keras.layers.Dense(
        4, kernel_initializer='normal', activation='softmax'))
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy', metrics=["accuracy"])

    return model


In [10]:
# Function to create the model keras model after finding the hyperparameters
def create_model(kernel_init, activation, optimizer, hidden_layers, dropout, neurons_num):
    feature_dim = X_train.shape[1]
    model = keras.Sequential()

    model.add(keras.layers.Dense(feature_dim, input_dim=feature_dim,
              kernel_initializer=kernel_init, activation=activation))

    for _ in range(0, hidden_layers-1):
        model.add(keras.layers.Dense(
            neurons_num, kernel_initializer=kernel_init, activation=activation))

    if dropout:
        model.add(keras.layers.Dropout(0.2))

    model.add(keras.layers.Dense(
        4, kernel_initializer='normal', activation='softmax'))
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy', metrics=["accuracy"])

    return model


In [11]:
import keras_tuner as kt

tuner = kt.Hyperband(
    hypermodel=build_model,
    objective="val_accuracy",
    project_name="hyper_tuning",
    directory="my_dir",
)


In [12]:
tuner.search_space_summary()

Search space summary
Default search space size: 6
kernel_init (Choice)
{'default': 'uniform', 'conditions': [], 'values': ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'], 'ordered': False}
activation (Choice)
{'default': 'softmax', 'conditions': [], 'values': ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'], 'ordered': False}
optimizer (Choice)
{'default': 'SGD', 'conditions': [], 'values': ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'], 'ordered': False}
hidden_layers (Choice)
{'default': 1, 'conditions': [], 'values': [1, 2, 4, 8, 10, 12, 14, 16, 18, 20], 'ordered': True}
dropout (Boolean)
{'default': False, 'conditions': []}
neurons_num (Choice)
{'default': 1, 'conditions': [], 'values': [1, 2, 4, 8, 16, 32, 64, 128, 256], 'ordered': True}


In [13]:
# uncomment before proceeding
# tuner.search(X_train, Y_train, epochs=10, validation_data=(X_val, Y_val))

In [15]:
# Get the top 2 models.
# models = tuner.get_best_models(num_models=2)
# best_model = models[0]

# Build the model.
# best_model.build()
# best_model.summary()

## Predicting and Writing to CSV


### Using Best Model


In [16]:
model = create_model("normal", "softplus", "Nadam", 4, False, 64)


In [17]:
from time import time

tensorboard = keras.callbacks.TensorBoard(log_dir="logs/{}".format(time()))

earlystopping = keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    min_delta=0,
    patience=3,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
)


In [19]:
model.fit(X_train, Y_train, epochs=100, batch_size=60, verbose=2,
          callbacks=[tensorboard, earlystopping], validation_split=0.15)


Epoch 1/100
107/107 - 2s - loss: 1.3216 - accuracy: 0.3967 - val_loss: 1.2180 - val_accuracy: 0.5093 - 2s/epoch - 16ms/step
Epoch 2/100
107/107 - 2s - loss: 1.1926 - accuracy: 0.5170 - val_loss: 1.1817 - val_accuracy: 0.4978 - 2s/epoch - 16ms/step
Epoch 3/100
107/107 - 2s - loss: 1.0494 - accuracy: 0.5581 - val_loss: 1.0090 - val_accuracy: 0.6133 - 2s/epoch - 15ms/step
Epoch 4/100
107/107 - 2s - loss: 0.8768 - accuracy: 0.6389 - val_loss: 0.7915 - val_accuracy: 0.6933 - 2s/epoch - 17ms/step
Epoch 5/100
107/107 - 2s - loss: 0.6901 - accuracy: 0.7216 - val_loss: 0.5962 - val_accuracy: 0.8000 - 2s/epoch - 16ms/step
Epoch 6/100
107/107 - 2s - loss: 0.5413 - accuracy: 0.7896 - val_loss: 0.5076 - val_accuracy: 0.8000 - 2s/epoch - 15ms/step
Epoch 7/100
107/107 - 2s - loss: 0.4766 - accuracy: 0.8304 - val_loss: 0.4361 - val_accuracy: 0.8311 - 2s/epoch - 15ms/step
Epoch 8/100
107/107 - 2s - loss: 0.4224 - accuracy: 0.8380 - val_loss: 0.4202 - val_accuracy: 0.8444 - 2s/epoch - 15ms/step
Epoch 9/

<keras.callbacks.History at 0x20e75a4ac70>

In [20]:
scores = model.evaluate(X_test, Y_test, verbose=2)
print("\nTest %s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


55/55 - 0s - loss: 0.2421 - accuracy: 0.9251 - 206ms/epoch - 4ms/step

Test accuracy: 92.51%


In [21]:
model.save('model.h5')


### Writing to CSV


In [None]:
# X_validate = pd.read_csv('test.csv')
# acc_id = X_validate.iloc[:, -1]
# X_validate = X_validate.iloc[:, :-1]


In [None]:
# X_validate = pd.get_dummies(X_validate, columns=['Accident_Type_Code'])
# X_validate = sc.fit_transform(X_validate)


In [None]:
# Y_validate = model.predict_classes(X_validate)
# Y_validate = le.inverse_transform(Y_validate)
# Y_validate


In [None]:
# Y_final = pd.concat([acc_id, pd.Series(Y_validate)], axis=1)
# Y_final.reset_index()
# Y_final.to_csv('final.csv')
