In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import time


from sklearn.neural_network import MLPClassifier

from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import classification_report,confusion_matrix

df = pd.read_csv("crash_data_median_clustered.csv")
df2 = pd.read_csv("crash_data_only_numeric_values.csv")

#add crash_sev_code to one_hot_encoded
df = df.assign(CRASH_SEV_CODE = df2["CRASH_SEV_CODE"])
df.drop('CRASH_SEV_F', axis='columns', inplace=True)
df.drop('CRASH_SEV_M', axis='columns', inplace=True)
df.drop('CRASH_SEV_N', axis='columns', inplace=True)
df.drop('CRASH_SEV_S', axis='columns', inplace=True)
#for cluster_scaled
df.drop('CLUSTER', axis='columns', inplace=True)
#for cluster
#df.drop('CLUSTER_SCALED', axis='columns', inplace=True)

df = sklearn.utils.shuffle(df)
df.fillna(0,inplace=True)
encode = LabelEncoder()

features = ['CRASH_YEAR', 
'NUM_LANES', 
'SPD_LIM', 
'TRAFFIC_CTRL_Give Way Sign', 
'TRAFFIC_CTRL_Nil', 
'TRAFFIC_CTRL_Points Man', 
'TRAFFIC_CTRL_School Patrol', 
'TRAFFIC_CTRL_Stop Sign', 
'TRAFFIC_CTRL_Traffic Signal', 
'MULTI_VEH_Cyclist(s)+Pedestrian(s) only', 
'MULTI_VEH_Cyclists only', 
'MULTI_VEH_Multi vehicle', 
'MULTI_VEH_Other', 
'MULTI_VEH_Others without non-parked veh', 
'MULTI_VEH_Single vehicle', 
'MULTI_VEH_Vehicle(s)+Cyclist(s) only', 
'MULTI_VEH_Vehicle(s)+Pedestrian(s)', 
'MULTI_VEH_Vehicle(s)+multiple other types', 
'HOLIDAY_Christmas/New Year', 
'HOLIDAY_Easter', 
'HOLIDAY_Labour Weekend', 
'HOLIDAY_None', 
'HOLIDAY_Queens Birthday', 
'LG_REGION_DESC_0', 
'LG_REGION_DESC_Auckland            ', 
'LG_REGION_DESC_Bay of Plenty       ', 
'LG_REGION_DESC_Canterbury          ', 
'LG_REGION_DESC_Gisborne            ', 
'LG_REGION_DESC_Hawkes Bay          ', 
'LG_REGION_DESC_Manawatu/Wanganui   ', 
'LG_REGION_DESC_Nelson/Marlborough  ', 
'LG_REGION_DESC_Northland           ', 
'LG_REGION_DESC_Otago               ', 
'LG_REGION_DESC_Southland           ', 
'LG_REGION_DESC_Taranaki            ', 
'LG_REGION_DESC_Waikato             ', 
'LG_REGION_DESC_Wellington          ', 
'LG_REGION_DESC_West Coast          ', 
'JUNCTION_TYPE_Driveway', 
'JUNCTION_TYPE_Multi Rd Join', 
'JUNCTION_TYPE_Roundabout', 
'JUNCTION_TYPE_T Type Junction',  
'JUNCTION_TYPE_X Type Junction', 
'JUNCTION_TYPE_Y Type Junction', 
'DIRN_ROLE1_DESC_0', 
'DIRN_ROLE1_DESC_East', 
'DIRN_ROLE1_DESC_North', 
'DIRN_ROLE1_DESC_South',  
'DIRN_ROLE1_DESC_West', 
'INTSN_MIDBLOCK_Intersection', 
'INTSN_MIDBLOCK_Mid Block', 
'FLAT_HILL_Flat', 
'FLAT_HILL_Hill',  
'ROAD_CURVATURE_Easy Curve', 
'ROAD_CURVATURE_Moderate Curve', 
'ROAD_CURVATURE_Severe Curve', 
'ROAD_CURVATURE_Straight Road',  
'ROAD_MARKINGS_Centre Line', 
'ROAD_MARKINGS_No Marks', 
'ROAD_MARKINGS_No Passing Lines', 
'ROAD_MARKINGS_Painted Island', 
'ROAD_MARKINGS_Ped Crossing', 
'ROAD_MARKINGS_Raised Island', 
'ROAD_SURFACE_Sealed', 
'ROAD_SURFACE_Unsealed', 
'ROAD_WET_Dry', 
'ROAD_WET_Ice/ Snow', 
'ROAD_WET_Wet', 
'URBAN_Openroad', 
'URBAN_Urban', 
'LIGHT_Bright Sun', 
'LIGHT_Dark', 
'LIGHT_Overcast', 
'LIGHT_Twilight',  
'STREET_LIGHT_None', 
'STREET_LIGHT_Off', 
'STREET_LIGHT_On',  
'WEATHER_A_Fine', 
'WEATHER_A_Heavy Rain', 
'WEATHER_A_Light Rain', 
'WEATHER_A_Mist', 
'WEATHER_A_Snow',
'CLUSTER_SCALED']

df_non_injury = df[df.CRASH_SEV_CODE==2]  # 480452 samples
df_minor = df[df.CRASH_SEV_CODE==1]       # 150834 samples
df_serious = df[df.CRASH_SEV_CODE==3]    # 37347  samples
df_fatal = df[df.CRASH_SEV_CODE==0]       # 6178   samples

df_minor_downsampled = resample(df_minor, replace=True,n_samples=6178,random_state=40)
df_serious_downsampled = resample(df_serious, replace=True,n_samples=6178,random_state=40)
df_non_injury_downsampled = resample(df_non_injury, replace=True,n_samples=6178,random_state=40)

df_downsampled = pd.concat([df_non_injury_downsampled, df_minor_downsampled, df_serious_downsampled, df_fatal])

df_downsampled.CRASH_SEV_CODE.value_counts()

3    6178
2    6178
1    6178
0    6178
Name: CRASH_SEV_CODE, dtype: int64

In [3]:
X = df_downsampled.drop(["CRASH_SEV_CODE"], axis=1).values
y = df_downsampled["CRASH_SEV_CODE"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=40)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(X_train.shape); print(X_test.shape)

(22240, 83)
(2472, 83)


In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(83,41,1), activation='relu', solver='adam', max_iter=500,verbose = True)

start = time.time()
mlp.fit(X_train,y_train)
stop = time.time()

predict_train = mlp.predict(X_train)
predict_test = mlp.predict(X_test)
print(f"\n Training time: {stop - start}s")
print("\n training data\n")
print(confusion_matrix(y_train,predict_train))
print("\n")
print(classification_report(y_train,predict_train,labels=pd.unique(predict_train)))
print("\n---------------------------------------------------------------------------------------\n")
print("\n test data\n")
print(confusion_matrix(y_test,predict_test))
print("\n")
print(classification_report(y_test,predict_test,labels=pd.unique(predict_test)))