In [31]:
# Load the merged / cleaned dataset
import pandas as pd 
df = pd.read_csv('../../datasets/merged.csv')

In [32]:
# display some first few rows again
df.head()

Unnamed: 0,ACCIDENT_NO,ACCIDENT_DATE,ACCIDENT_TIME,DAY_WEEK_DESC,NODE_ID,SPEED_ZONE,SEVERITY,NODE_TYPE,LGA_NAME,DEG_URBAN_NAME,LATITUDE,LONGITUDE,POSTCODE_CRASH
0,T20230010953,2023-05-10,15:10:00,Wednesday,774076,50,3,N,DANDENONG,MELB_URBAN,-37.978294,145.20834,3175.0
1,T20240016992,2024-06-26,08:30:00,Wednesday,806039,777,3,N,YARRA,MELB_URBAN,-37.827618,144.99759,3121.0
2,T20160012456,2016-06-01,15:30:00,Wednesday,304411,999,3,N,PORT PHILLIP,MELB_URBAN,-37.83835,144.94193,3207.0
3,T20120009795,2012-04-25,05:20:00,Wednesday,251602,100,3,N,BENDIGO,RURAL_VICTORIA,-36.809982,144.1787,3551.0
4,T20150001493,2015-01-22,09:24:00,Thursday,281862,80,3,I,WODONGA,RURAL_VICTORIA,-36.17658,146.941,3691.0


In [33]:
# function to convert accident time to just 4 categories: morning, afternoon, evening, late night
def categorize_accident_time(accident_time):
    # extract hour 
    hour = accident_time.split(':')[0]

    # then return the categories based on hours
    hour = int(hour)
    if 5 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 21:
        return 'Evening'
    else:
        return 'Late Night'
    
# now apply this change with the column 'ACCIDENT_TIME'
df['ACCIDENT_TIME'] = df['ACCIDENT_TIME'].apply(categorize_accident_time)

# check the updated dataframe
df.head()

Unnamed: 0,ACCIDENT_NO,ACCIDENT_DATE,ACCIDENT_TIME,DAY_WEEK_DESC,NODE_ID,SPEED_ZONE,SEVERITY,NODE_TYPE,LGA_NAME,DEG_URBAN_NAME,LATITUDE,LONGITUDE,POSTCODE_CRASH
0,T20230010953,2023-05-10,Afternoon,Wednesday,774076,50,3,N,DANDENONG,MELB_URBAN,-37.978294,145.20834,3175.0
1,T20240016992,2024-06-26,Morning,Wednesday,806039,777,3,N,YARRA,MELB_URBAN,-37.827618,144.99759,3121.0
2,T20160012456,2016-06-01,Afternoon,Wednesday,304411,999,3,N,PORT PHILLIP,MELB_URBAN,-37.83835,144.94193,3207.0
3,T20120009795,2012-04-25,Morning,Wednesday,251602,100,3,N,BENDIGO,RURAL_VICTORIA,-36.809982,144.1787,3551.0
4,T20150001493,2015-01-22,Morning,Thursday,281862,80,3,I,WODONGA,RURAL_VICTORIA,-36.17658,146.941,3691.0


## Now check some node_id with the most number of accident counts

In [34]:
# Get the top 10 NODE_IDs with the highest accident counts
top_node_ids = df['NODE_ID'].value_counts()

# get top 110
top_node_ids.head(10)

NODE_ID
65743     92
29420     76
65831     76
36335     70
65799     68
43917     60
65851     58
229963    50
10592     47
36856     46
Name: count, dtype: int64

In [35]:
# Let's filter the NODE_IDs that have more than 30 accidents
high_accident_nodes = top_node_ids[top_node_ids > 30]
print("NODE_IDs with more than 30 accidents:")
print(high_accident_nodes)
print("The number of node_id with more than 30 accidents: ", len(high_accident_nodes))

NODE_IDs with more than 30 accidents:
NODE_ID
65743     92
29420     76
65831     76
36335     70
65799     68
          ..
70726     31
45881     31
234609    31
34518     31
221459    31
Name: count, Length: 70, dtype: int64
The number of node_id with more than 30 accidents:  70


In [36]:
most_accident_node = df[df['NODE_ID'] == 65743]

In [37]:
most_accident_node

Unnamed: 0,ACCIDENT_NO,ACCIDENT_DATE,ACCIDENT_TIME,DAY_WEEK_DESC,NODE_ID,SPEED_ZONE,SEVERITY,NODE_TYPE,LGA_NAME,DEG_URBAN_NAME,LATITUDE,LONGITUDE,POSTCODE_CRASH
849,T20140017066,2014-08-17,Evening,Sunday,65743,40,3,I,MELBOURNE,MELBOURNE_CBD,-37.810318,144.96136,3000.0
850,T20140017066,2014-08-17,Evening,Sunday,65743,40,3,I,MELBOURNE,MELB_URBAN,-37.810318,144.96136,3000.0
14333,T20120026801,2012-11-28,Morning,Wednesday,65743,50,2,I,MELBOURNE,MELBOURNE_CBD,-37.810318,144.96136,3000.0
14334,T20120026801,2012-11-28,Morning,Wednesday,65743,50,2,I,MELBOURNE,MELB_URBAN,-37.810318,144.96136,3000.0
15018,T20180008558,2018-05-05,Afternoon,Saturday,65743,40,3,I,MELBOURNE,MELBOURNE_CBD,-37.810318,144.96136,3000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
181030,T20140017055,2014-08-15,Late Night,Friday,65743,40,3,I,MELBOURNE,MELB_URBAN,-37.810318,144.96136,3000.0
181658,T20190021596,2019-10-31,Afternoon,Thursday,65743,40,3,I,MELBOURNE,MELBOURNE_CBD,-37.810318,144.96136,3000.0
181659,T20190021596,2019-10-31,Afternoon,Thursday,65743,40,3,I,MELBOURNE,MELB_URBAN,-37.810318,144.96136,3000.0
182079,T20240014397,2024-06-03,Evening,Monday,65743,40,3,I,MELBOURNE,MELBOURNE_CBD,-37.810318,144.96136,3000.0
