In [1]:
import math
import time
# Saved in the d2l package for later use
class Timer(object):
    """Record multiple running times."""
    def __init__(self):
        self.times = []
        self.start()

    def start(self):
        # Start the timer
        self.start_time = time.time()

    def stop(self):
        # Stop the timer and record the time in a list
        self.times.append(time.time() - self.start_time)
        return self.times[-1]

    def avg(self):
        # Return the average time
        return sum(self.times)/len(self.times)

    def sum(self):
        # Return the sum of time
        return sum(self.times)

    def cumsum(self):
        # Return the accumuated times
        return np.array(self.times).cumsum().tolist()

In [2]:
%matplotlib inline
import d2l
import pandas as pd
import mxnet
import numpy
from mxnet import np
from mxnet import autograd, init, gluon
from mxnet import ndarray as nd

In [3]:
# Accuracy for a single prediction
def accuracy(output, label):
    return nd.mean(output.argmax(axis = 1) == label).asscalar()

# Accuracy for the whole test data iteretor
def evaluate_accuracy(data_iterator, net):
    acc = 0.
    for data, label in data_iterator:
        output = net(data)
        #print(output)
        acc += accuracy(output, label)
        #break
    return acc/len(data_iterator)

In [4]:
accident = pd.read_csv('../data/US_Accidents_2019_Segment.csv')

In [5]:
accident = accident.dropna(subset = ['Severity']) # Make sure each the label has no nan values
accident.shape

(953630, 13)

In [6]:
accident.columns

Index(['Wind_Speed(mph)', 'Precipitation(in)', 'Temperature(F)',
       'Wind_Chill(F)', 'Wind_Direction', 'Humidity(%)', 'Pressure(in)',
       'Visibility(mi)', 'Severity', 'Distance(mi)', 'Side',
       'Weather_Condition', 'Sunrise_Sunset'],
      dtype='object')

In [21]:
# Randomly sample some data from each type of label
df2 = accident[accident.Severity == 2].sample(30000)
df3 = accident[accident.Severity == 3].sample(30000)
df4 = accident[accident.Severity == 4]

In [22]:
# put each sampled data together
tempData = pd.concat([df2,df3,df4])
# make the label strat from 0
tempData.Severity = tempData.Severity -2
# make a random array
sampler = numpy.random.permutation(tempData.shape[0])
# randomly sort the data
tempData = tempData.take(sampler)

In [23]:
numeric_features = tempData.dtypes[tempData.dtypes == 'float64'].index
tempData[numeric_features] = tempData[numeric_features].apply(lambda x: (x - x.mean()/x.std()))
#tempData

In [24]:
# using one-hot encoding to express nominal data
dummyData = pd.get_dummies(tempData.dropna())
dummyData = dummyData.astype('float32')

In [25]:
dummyData.shape

(63797, 104)

## Using 7:3 Train-Test Data to Train

In [26]:
sampler = numpy.random.permutation(dummyData.shape[0])
# randomly take 70% of data to be the train data and 30% to be the test data
train_data = dummyData.take(sampler[0:int(len(sampler)*0.7)])
test_data = dummyData.take(sampler[int(len(sampler)*0.7):])

train_features = train_data.drop(columns = 'Severity')
test_features = test_data.drop(columns ='Severity')

train_labels = train_data['Severity']
test_labels = test_data['Severity']

In [27]:
batch_size = 512
#turn the dataset to ndarray, and add it to data loader for later use
train_features_array = nd.array(train_features.values, dtype = 'float32')
test_features_array = nd.array(test_features.values, dtype = 'float32')

train_labels_array = nd.array(train_labels.values, dtype = 'float32')
test_labels_array = nd.array(test_labels.values, dtype = 'float32')

train_iter = d2l.load_array((train_features_array, train_labels_array), batch_size)
test_iter = d2l.load_array((test_features_array, test_labels_array), batch_size)

In [28]:
def train(net, train_data, test_data, num_epoch,batch_size):
    for epoch in range(num_epoch):
        train_loss = 0.
        train_acc = 0.
        # iterate the training data to network
        for data, label in train_data:
            with autograd.record():
                output = net(data)
                # compute the loss 
                loss = softmax_cross_entropy(output, label)
            # calculate the gradient of loss
            loss.backward()
            # used adam to upgrate the parameters
            trainer.step(batch_size = 32)
            # calculate the loss and accuracy of the module
            train_loss += nd.mean(loss).asscalar()
            train_acc += accuracy(output, label)
        # calculate the accuracy of test data
        test_acc = evaluate_accuracy(test_data, net)
        # print the result every 5 epochs
        if epoch % 5 == 0:
            print('Epoch: %d, Loss: %f, Train acc: %f, Test acc: %f' % 
                  (epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc))


In [29]:
dropout1 , dropout2 = 0.15,0.4

# bulid a neural network with two hidden layer and droup out rate for each hidden layer is 0.15 and 0.4
net = gluon.nn.Sequential()
net.add(gluon.nn.Dense(64, activation = 'relu'),
        gluon.nn.Dropout(dropout1),
        gluon.nn.Dense(16, activation = 'relu'),
        gluon.nn.Dropout(dropout2),
        gluon.nn.Dense(3)) # 3 labels

net.initialize(init.Normal(sigma=0.01))
# Loss function is cross entropy of softmax
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate':.001})

# Train the module, the result is not good
batch_size = 512
num_epoch = 100
timer = Timer()
train(net, train_iter, test_iter, num_epoch, batch_size)
'%.5f sec' % timer.stop()

Epoch: 0, Loss: 1.088227, Train acc: 0.373768, Test acc: 0.466485
Epoch: 5, Loss: 0.934964, Train acc: 0.539424, Test acc: 0.542321
Epoch: 10, Loss: 0.896186, Train acc: 0.565953, Test acc: 0.571512
Epoch: 15, Loss: 0.875305, Train acc: 0.582327, Test acc: 0.589828
Epoch: 20, Loss: 0.864193, Train acc: 0.585355, Test acc: 0.594765
Epoch: 25, Loss: 0.858195, Train acc: 0.589232, Test acc: 0.602164
Epoch: 30, Loss: 0.841868, Train acc: 0.594243, Test acc: 0.608973
Epoch: 35, Loss: 0.845170, Train acc: 0.592568, Test acc: 0.609122
Epoch: 40, Loss: 0.838805, Train acc: 0.595537, Test acc: 0.616132
Epoch: 45, Loss: 0.839641, Train acc: 0.595606, Test acc: 0.600887
Epoch: 50, Loss: 0.839284, Train acc: 0.594997, Test acc: 0.600232
Epoch: 55, Loss: 0.837469, Train acc: 0.591077, Test acc: 0.587477
Epoch: 60, Loss: 0.828360, Train acc: 0.598037, Test acc: 0.478410
Epoch: 65, Loss: 0.830244, Train acc: 0.598680, Test acc: 0.611515
Epoch: 70, Loss: 0.837616, Train acc: 0.595001, Test acc: 0.5798

'148.77922 sec'

## k-fold validation 

In [12]:
# use the whole dataset to be the training data
# test data is a segment of training data
train_features_data = dummyData.drop(columns ='Severity')
train_labels_data = dummyData['Severity']

In [13]:
def get_k_fold_data(k, i, features, labels):
    fold_size = features.shape[0]//k
    f_train , l_train = None, None
    f_test, l_test = None, None
    # iterate each data segment
    for j in range(k):
        index = slice(j*fold_size, (j+1)*fold_size)
        f_part = features[index]
        l_part = labels[index]
        # i-th data segment will be the test data in i-th iteration of k-fold
        if j == i:
            f_test = f_part
            l_test = l_part
        elif f_train is None:
            f_train = f_part
            l_train = l_part
        else:
            # other data segment will concated together to be training data
            f_train = pd.concat((f_train, f_part), axis = 0)
            l_train = pd.concat((l_train, l_part), axis = 0)
    
    # using ndarray will cause this step as the shape of ndarray will change after extraction of data from ndarray
    # so we transfrom the dataset to array after slicing, this question won't happen in np.array
    ftrain = nd.array(f_train.values, dtype = 'float32')
    ltrain = nd.array(l_train.values, dtype = 'float32')
    ftest = nd.array(f_test.values, dtype = 'float32')
    ltest = nd.array(l_test.values, dtype = 'float32')
    return ftrain, ltrain, ftest, ltest

In [14]:
def train_k_fold(k, net, train_features, train_labels, num_epoch, batch_size):
    for i in range(k):
        # extract training data and test data and turn them into data iterator
        trainFeatureTemp, trainLabelTemp, testFeatureTemp, testLabelTemp= get_k_fold_data(k,i,train_features, train_labels)
        train_iter = d2l.load_array((trainFeatureTemp, trainLabelTemp), batch_size)
        test_iter = d2l.load_array((testFeatureTemp, testLabelTemp), batch_size)
        # the same training process
        for epoch in range(num_epoch):
            train_loss = 0.
            train_acc = 0.
            for data, label in train_iter:
                with autograd.record():
                    output = net(data)
                    loss = softmax_cross_entropy(output, label)
                loss.backward()
                trainer.step(batch_size)
                train_loss += nd.mean(loss).asscalar()
                train_acc += accuracy(output, label)
            test_acc = evaluate_accuracy(test_iter, net)
            # print the result for each 5 epoches
            if (epoch+1)% 5 == 0 :
                print('K: %d, Epoch: %d, Loss: %f, Train acc: %f, Test acc: %f' % 
                  (i ,epoch + 1, train_loss/len(train_iter), train_acc/len(train_iter), test_acc))

In [18]:
dropout1 , dropout2 = 0.15,0.45
net = gluon.nn.Sequential()
# we bulid a neural network with two hidden layer.
net.add(gluon.nn.Dense(64, activation = 'relu'),
        gluon.nn.Dropout(dropout1),
        gluon.nn.Dense(16, activation = 'relu'),
        gluon.nn.Dropout(dropout2),
        gluon.nn.Dense(3))

# initialize the parameters of neural net
net.initialize(init.Normal(sigma=0.01))
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate':.001})

# 3-fold cross validation
k = 3
num_epoch = 50
batch_size = 512
timer = Timer()
train_k_fold(k,net, train_features_data, train_labels_data, num_epoch, batch_size)
'%.5f sec' % timer.stop()

K: 0, Epoch: 5, Loss: 0.942420, Train acc: 0.524151, Test acc: 0.548083
K: 0, Epoch: 10, Loss: 0.893661, Train acc: 0.559687, Test acc: 0.578456
K: 0, Epoch: 15, Loss: 0.876442, Train acc: 0.570292, Test acc: 0.575463
K: 0, Epoch: 20, Loss: 0.865400, Train acc: 0.578780, Test acc: 0.589434
K: 0, Epoch: 25, Loss: 0.859876, Train acc: 0.583418, Test acc: 0.582577
K: 0, Epoch: 30, Loss: 0.861189, Train acc: 0.584826, Test acc: 0.599368
K: 0, Epoch: 35, Loss: 0.863054, Train acc: 0.585131, Test acc: 0.600528
K: 0, Epoch: 40, Loss: 0.857032, Train acc: 0.585543, Test acc: 0.590339
K: 0, Epoch: 45, Loss: 0.862229, Train acc: 0.584179, Test acc: 0.605824
K: 0, Epoch: 50, Loss: 0.848974, Train acc: 0.589441, Test acc: 0.596036
K: 1, Epoch: 5, Loss: 0.839138, Train acc: 0.597433, Test acc: 0.615107
K: 1, Epoch: 10, Loss: 0.839056, Train acc: 0.600026, Test acc: 0.592458
K: 1, Epoch: 15, Loss: 0.832848, Train acc: 0.600591, Test acc: 0.617149
K: 1, Epoch: 20, Loss: 0.835314, Train acc: 0.601345,

'214.58635 sec'

In [17]:
dropout1 , dropout2 = 0.15,0.45
net = gluon.nn.Sequential()
# we bulid a neural network with two hidden layer. 
net.add(gluon.nn.Dense(64, activation = 'relu'),
        gluon.nn.Dropout(dropout1),
        gluon.nn.Dense(16, activation = 'relu'),
        gluon.nn.Dropout(dropout2),
        gluon.nn.Dense(3))

net.initialize(init.Normal(sigma=0.01))
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate':.001})


#train_k_fold(k, net, train_features, train_labels, num_epoch, batch_size)
# the result is not good neither
k = 3
num_epoch = 50
batch_size = 512
timer = Timer()
train_k_fold(k,net, train_features_data, train_labels_data, num_epoch, batch_size)
'%.5f sec' % timer.stop()

K: 0, Epoch: 5, Loss: 0.949653, Train acc: 0.525005, Test acc: 0.526515
K: 0, Epoch: 10, Loss: 0.900550, Train acc: 0.562258, Test acc: 0.570208
K: 0, Epoch: 15, Loss: 0.883537, Train acc: 0.572217, Test acc: 0.569165
K: 0, Epoch: 20, Loss: 0.868496, Train acc: 0.582038, Test acc: 0.571536
K: 0, Epoch: 25, Loss: 0.858387, Train acc: 0.580471, Test acc: 0.576969
K: 0, Epoch: 30, Loss: 0.847056, Train acc: 0.588301, Test acc: 0.604543
K: 0, Epoch: 35, Loss: 0.859027, Train acc: 0.584104, Test acc: 0.577208
K: 0, Epoch: 40, Loss: 0.859064, Train acc: 0.583046, Test acc: 0.579786
K: 0, Epoch: 45, Loss: 0.845339, Train acc: 0.585502, Test acc: 0.611285
K: 0, Epoch: 50, Loss: 0.848499, Train acc: 0.589779, Test acc: 0.580684
K: 1, Epoch: 5, Loss: 0.849997, Train acc: 0.591321, Test acc: 0.589962
K: 1, Epoch: 10, Loss: 0.844705, Train acc: 0.595061, Test acc: 0.614676
K: 1, Epoch: 15, Loss: 0.847025, Train acc: 0.597855, Test acc: 0.619027
K: 1, Epoch: 20, Loss: 0.844524, Train acc: 0.599210,

'234.15255 sec'

In [30]:
# using loop to record the data requires too much time
# so we collect the hyper parameters and training results manually

results = {'layers' : [2,2,2,2,3,3,3,3,3,3,3,3,3],
'hidden_neuron':[32,128,128,128,(64,16),(64,16),(64,16),(64,16),(64,16),(64,16),(64,16),(64,16),(64,16)],
'first_droupout':[None,None,None,None,0.15,0.15,0.15,0.15,0.15,0.15,0.20, None,0.15],
'second_droupout':[None,None,None,None,0.45,0.45,0.45,0.45,0.45,0.45,0.50, None, 0.45],
'learning_rate': [.001,.001,.01,.1,.001,.001,.0001,.001,.0001,.0001,.0001,.0001,1],
'k' : [4,4,3,3,3,3,3,4,4,4,4,4,3],
'num_epoch': [100,50,50,50,50,100,100,100,50,100,100,100,50],
'batch_size' : [512,512,512,512,512,512,512,512,512,512,512,512,512],
'test_acc' : [0.631811,0.631336,0.617333,0.350696,0.620943,0.622151,0.623949,0.626174,0.609783,0.634133,0.627020,0.632617,0.350419]
}

In [31]:
result = pd.DataFrame(results)
result

Unnamed: 0,layers,hidden_neuron,first_droupout,second_droupout,learning_rate,k,num_epoch,batch_size,test_acc
0,2,32,,,0.001,4,100,512,0.631811
1,2,128,,,0.001,4,50,512,0.631336
2,2,128,,,0.01,3,50,512,0.617333
3,2,128,,,0.1,3,50,512,0.350696
4,3,"(64, 16)",0.15,0.45,0.001,3,50,512,0.620943
5,3,"(64, 16)",0.15,0.45,0.001,3,100,512,0.622151
6,3,"(64, 16)",0.15,0.45,0.0001,3,100,512,0.623949
7,3,"(64, 16)",0.15,0.45,0.001,4,100,512,0.626174
8,3,"(64, 16)",0.15,0.45,0.0001,4,50,512,0.609783
9,3,"(64, 16)",0.15,0.45,0.0001,4,100,512,0.634133
