In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf

In [2]:
crashes_df = pd.read_csv("Resources\crashes_cleaned_df.csv")
crashes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56414 entries, 0 to 56413
Columns: 159 entries, Unnamed: 0 to STAT_DIV_NAME_Metro
dtypes: float64(7), int64(151), object(1)
memory usage: 68.4+ MB


In [3]:
original_data = pd.read_csv("Resources/Road_Crashes_for_five_Years_Victoria.csv")
original_data = original_data.drop(columns=["X", "Y", "ACCIDENT_NO", "SRNS", "SRNS_ALL", "DIVIDED", "DIVIDED_ALL", "UNKNOWN", "NODE_ID", "OBJECTID"])
original_data = original_data.dropna()

In [5]:
LGA_to_replace = ["MILDURA",
"MACEDON RANGES",
"MURRINDINDI",
"COLAC OTWAY",
"MOORABOOL",
"SOUTH GIPPSLAND",
"SURF COAST",
"CAMPASPE",
"BASS COAST",
"MOIRA",
"WANGARATTA",
"GOLDEN PLAINS",
"WARRNAMBOOL",
"WODONGA",
"CORANGAMITE",
"MANSFIELD",
"MOYNE",
"ALPINE",
"HEPBURN",
"GLENELG",
"STRATHBOGIE",
"SWAN HILL",
"HORSHAM",
"INDIGO",
"BENALLA",
"NORTHERN GRAMPIANS",
"MOUNT ALEXANDER",
"SOUTHERN GRAMPIANS",
"TOWONG",
"PYRENEES",
"ARARAT",
"CENTRAL GOLDFIELDS",
"LODDON",
"GANNAWARRA",
"BULOKE",
"HINDMARSH",
"WEST WIMMERA",
"YARRIAMBIACK",
"(MOUNT HOTHAM)",
"QUEENSCLIFFE",
"(LAKE MOUNTAIN)",
"(MOUNT BULLER)",
"(FALLS CREEK)",
"(MOUNT BAW BAW)",
"(FRENCH ISLAND)",
" ",
"(MOUNT STIRLING)"]

for lga in LGA_to_replace:
    original_data['LGA_NAME'] = original_data['LGA_NAME'].replace(lga,"Other")

# Check to make sure binning was successful
original_data['LGA_NAME'].value_counts()

LGA_NAME
Other                   8384
MELBOURNE               3049
CASEY                   2420
GEELONG                 2273
DANDENONG               1936
HUME                    1870
BRIMBANK                1683
MONASH                  1663
WHITTLESEA              1656
MORELAND                1619
YARRA RANGES            1582
YARRA                   1395
DAREBIN                 1377
KINGSTON                1355
WYNDHAM                 1270
BOROONDARA              1231
WHITEHORSE              1208
MORNINGTON PENINSULA    1163
STONNINGTON             1162
KNOX                    1134
PORT PHILLIP            1103
GLEN EIRA               1083
BENDIGO                 1077
BALLARAT                1040
FRANKSTON               1004
MELTON                   955
MOONEE VALLEY            905
CARDINIA                 882
MAROONDAH                851
BANYULE                  841
MARIBYRNONG              792
HOBSONS BAY              770
MANNINGHAM               732
SHEPPARTON               730
BAYSI

In [6]:
original_data = original_data.drop(original_data.loc[original_data['REGION_NAME'] == " "].index)

In [7]:
model_data_df = crashes_df[['DAY_OF_WEEK_1', 'DAY_OF_WEEK_2', 'DAY_OF_WEEK_3', 'DAY_OF_WEEK_4', 'DAY_OF_WEEK_5',
                                 'DAY_OF_WEEK_6', 'DAY_OF_WEEK_7', 'ACCIDENT_TYPE_Collision with a fixed object',
                                 'ACCIDENT_TYPE_Collision with vehicle', 'ACCIDENT_TYPE_Fall from or in moving vehicle',
                                 'ACCIDENT_TYPE_No collision and no object struck', 'ACCIDENT_TYPE_Other accident',
                                 'ACCIDENT_TYPE_Struck Pedestrian', 'ACCIDENT_TYPE_Struck animal',
                                 'ACCIDENT_TYPE_Vehicle overturned (no collision)',
                                 'ACCIDENT_TYPE_collision with some other object', 'LIGHT_CONDITION_Dark No street lights',
                                 'LIGHT_CONDITION_Dark Street lights off', 'LIGHT_CONDITION_Dark Street lights on',
                                 'LIGHT_CONDITION_Dark Street lights unknown', 'LIGHT_CONDITION_Day',
                                 'LIGHT_CONDITION_Dusk/Dawn', 'LIGHT_CONDITION_Unk.', 'ROAD_GEOMETRY_Cross intersection',
                                 'ROAD_GEOMETRY_Dead end', 'ROAD_GEOMETRY_Multiple intersection',
                                 'ROAD_GEOMETRY_Not at intersection', 'ROAD_GEOMETRY_Private property',
                                 'ROAD_GEOMETRY_Road closure', 'ROAD_GEOMETRY_T intersection', 'ROAD_GEOMETRY_Unknown',
                                 'ROAD_GEOMETRY_Y intersection','SPEED_ZONE_100 km/hr', 'SPEED_ZONE_110 km/hr',
                                 'SPEED_ZONE_40 km/hr', 'SPEED_ZONE_50 km/hr', 'SPEED_ZONE_60 km/hr', 'SPEED_ZONE_70 km/hr',
                                 'SPEED_ZONE_80 km/hr', 'SPEED_ZONE_90 km/hr', 'SPEED_ZONE_Camping grounds or off road',
                                 'SPEED_ZONE_Not known', 'SPEED_ZONE_Other speed limit', 'TOTAL_PERSONS', 'INJ_OR_FATAL',
                                 'MALES', 'FEMALES', 'UNLICENCSED', 'RMA_Arterial Highway', 'RMA_Arterial Other',
                                 'RMA_Freeway', 'RMA_Local Road', 'RMA_Non Arterial', 'RMA_ALL_Arterial Highway',
                                 'RMA_ALL_Arterial Highway,Arterial Other', 'RMA_ALL_Arterial Highway,Local Road',
                                 'RMA_ALL_Arterial Other', 'RMA_ALL_Arterial Other,Arterial Highway',
                                 'RMA_ALL_Arterial Other,Local Road', 'RMA_ALL_Freeway', 'RMA_ALL_Freeway,Arterial Other',
                                 'RMA_ALL_Local Road', 'RMA_ALL_Local Road,Arterial Highway',
                                 'RMA_ALL_Local Road,Arterial Other', 'RMA_ALL_Other', 'SEVERITY_Fatal accident',
                                 'SEVERITY_Non injury accident', 'SEVERITY_Other injury accident',
                                 'SEVERITY_Serious injury accident', 'REGION_NAME_EASTERN REGION',
                                 'REGION_NAME_METROPOLITAN NORTH WEST REGION', 'REGION_NAME_METROPOLITAN SOUTH EAST REGION',
                                 'REGION_NAME_NORTH EASTERN REGION', 'REGION_NAME_NORTHERN REGION', 
                                 'REGION_NAME_SOUTH WESTERN REGION', 'REGION_NAME_WESTERN REGION', 
                                 'STAT_DIV_NAME_Country', 'STAT_DIV_NAME_Metro']]
model_data_df.head()

Unnamed: 0,DAY_OF_WEEK_1,DAY_OF_WEEK_2,DAY_OF_WEEK_3,DAY_OF_WEEK_4,DAY_OF_WEEK_5,DAY_OF_WEEK_6,DAY_OF_WEEK_7,ACCIDENT_TYPE_Collision with a fixed object,ACCIDENT_TYPE_Collision with vehicle,ACCIDENT_TYPE_Fall from or in moving vehicle,...,SEVERITY_Serious injury accident,REGION_NAME_EASTERN REGION,REGION_NAME_METROPOLITAN NORTH WEST REGION,REGION_NAME_METROPOLITAN SOUTH EAST REGION,REGION_NAME_NORTH EASTERN REGION,REGION_NAME_NORTHERN REGION,REGION_NAME_SOUTH WESTERN REGION,REGION_NAME_WESTERN REGION,STAT_DIV_NAME_Country,STAT_DIV_NAME_Metro
0,0,0,0,1,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,1,0
1,0,0,0,1,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,1
2,0,0,0,1,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,1
3,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,1,0,1,0
4,0,0,0,0,1,0,0,0,1,0,...,0,1,0,0,0,0,0,0,1,0


In [19]:
original_data['DEG_URBAN_NAME'].value_counts()

DEG_URBAN_NAME
MELB_URBAN                 35274
RURAL_VICTORIA             11836
LARGE_PROVINCIAL_CITIES     3265
SMALL_CITIES                2975
TOWNS                       1785
MELBOURNE_CBD                749
SMALL_TOWNS                  530
Name: count, dtype: int64

In [20]:
original_data.loc[original_data["DEG_URBAN_NAME"] == 'MELB_URBAN', 'DEG_URBAN_NAME_ENCODED'] = 0
original_data.loc[original_data["DEG_URBAN_NAME"] == 'RURAL_VICTORIA', 'DEG_URBAN_NAME_ENCODED'] = 1
original_data.loc[original_data["DEG_URBAN_NAME"] == 'LARGE_PROVINCIAL_CITIES', 'DEG_URBAN_NAME_ENCODED'] = 2
original_data.loc[original_data["DEG_URBAN_NAME"] == 'SMALL_CITIES', 'DEG_URBAN_NAME_ENCODED'] = 3
original_data.loc[original_data["DEG_URBAN_NAME"] == 'TOWNS', 'DEG_URBAN_NAME_ENCODED'] = 4
original_data.loc[original_data["DEG_URBAN_NAME"] == 'MELBOURNE_CBD', 'DEG_URBAN_NAME_ENCODED'] = 5
original_data.loc[original_data["DEG_URBAN_NAME"] == 'SMALL_TOWNS', 'DEG_URBAN_NAME_ENCODED'] = 6
original_data['DEG_URBAN_NAME_ENCODED'].value_counts()

DEG_URBAN_NAME_ENCODED
0.0    35274
1.0    11836
2.0     3265
3.0     2975
4.0     1785
5.0      749
6.0      530
Name: count, dtype: int64

In [22]:
scaler = StandardScaler().fit(model_data_df)

X_scaled = scaler.transform(model_data_df)

In [27]:
y = original_data['DEG_URBAN_NAME_ENCODED']
y

0        2.0
1        0.0
2        0.0
4        1.0
5        1.0
        ... 
60684    1.0
60685    0.0
60686    0.0
60687    1.0
60688    0.0
Name: DEG_URBAN_NAME_ENCODED, Length: 56414, dtype: float64

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=78)

In [37]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=78))
nn_model.add(tf.keras.layers.Dense(units=16, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=7))

# Compile the Sequential model together and customise metrics
nn_model.compile(loss="SparseCategoricalCrossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=35)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
441/441 - 1s - loss: 1.9456 - accuracy: 0.0718 - 974ms/epoch - 2ms/step
Loss: 1.9456424713134766, Accuracy: 0.0718235969543457


In [35]:
# Evaluate the performance of model using the loss and predictive accuracy of the model on the test dataset.
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

441/441 - 1s - loss: 1.9458 - accuracy: 0.1088 - 811ms/epoch - 2ms/step
Loss: 1.9457803964614868, Accuracy: 0.10883437097072601
