In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import os
import gc
import json

import folium
from folium.plugins import MarkerCluster, HeatMap, HeatMapWithTime
from shapely.geometry import Point, shape
import geopandas as gpd
from pyproj import CRS

from math import cos, asin, sqrt, pi

In [None]:
path = r'C:\Users\nukis\Documents\Projects\08. Road Safety'

In [None]:
df = pd.read_pickle(os.path.join(path, '01. Data', 'Prepared data', 'road_safety_cleaned.pkl'))

In [None]:
# Command to maximize view of rows and columns
pd.options.display.max_rows = None
pd.options.display.max_columns = len(df.columns)

In [None]:
df[['Lat', 'Lon']] = df[['Lat', 'Lon']].astype('float64')
df.head(2)

In [None]:
df['Position Type'].value_counts()

In [None]:
# Import ".json" file for the U.S. 

suburb_geo = os.path.join(path, '01. Data', 'Original Data', 'Suburbs_geojson', 'Suburbs_GDA2020.geojson')

In [None]:
# load GeoJSON
with open(suburb_geo) as jsonFile:
    geo_data = json.load(jsonFile)

In [None]:
geo_data

In [None]:
# remove post code codes not in geo data

geo_postcode = []

for i in range(len(geo_data['features'])):
    if geo_data['features'][i]['properties']['postcode'] in list(df['Postcode'].unique()):
        geo_postcode.append(geo_data['features'][i])

In [None]:
geo_postcode

In [None]:
# creating new JSON object

new_json = dict.fromkeys(['type','features'])
new_json['type'] = 'FeatureCollection'
new_json['features'] = geo_postcode

In [None]:
# save uodated JSON object
open('cleaned_geodata.json', 'w').write(json.dumps(new_json, sort_keys=True, indent=4, separators=(',', ': ')))

In [None]:
locs_geometry = [Point(xy) for xy in zip(df['Lon'], df['Lat'])]

crs = CRS('EPSG:4326')
# Coordinate Reference Systems, "epsg:4326" is a common projection of WGS84 Latitude/Longitude

locs_gdf = gpd.GeoDataFrame(df, crs=crs, geometry=locs_geometry)

In [None]:
locs_gdf.head()

### Circle Marker Map

In [None]:
crash_map = folium.Map(location=[-34.921230, 138.599503], zoom_start=11)

feature_ft = folium.FeatureGroup(name='Fatality')
feature_si = folium.FeatureGroup(name='Serious Injury')
feature_mi = folium.FeatureGroup(name='Minor Injury')
feature_pd = folium.FeatureGroup(name='Property Damage')

for i, v in locs_gdf.iterrows():
    popup = """
    Suburb : <b>%s</b><br>
    Sex : <b>%s</b><br>
    Age : <b>%s</b><br>
    Total Casualty : <b>%d</b><br>
    """ % (v['Suburb'], v['Sex'], v['Age'], v['Total Cas'])
    
    if v['Target'] == 'Fatal':
        folium.CircleMarker(location=[v['Lat'], v['Lon']],
                            radius=1,
                            tooltip=popup,
                            color='#FF0700',
                            fill_color='#FF0700',
                            fill=True).add_to(feature_ft)
    elif v['Target'] == 'Si':
        folium.CircleMarker(location=[v['Lat'], v['Lon']],
                            radius=1,
                            tooltip=popup,
                            color='#FF5722',
                            fill_color='#FF5722',
                            fill=True).add_to(feature_si)
    elif v['Target'] == 'Mi':
        folium.CircleMarker(location=[v['Lat'], v['Lon']],
                            radius=1,
                            tooltip=popup,
                            color='#087FBF',
                            fill_color='#087FBF',
                            fill=True).add_to(feature_mi)
    elif v['Target'] == 'Pdo':
        folium.CircleMarker(location=[v['Lat'], v['Lon']],
                            radius=1,
                            tooltip=popup,
                            color='#007944',
                            fill_color='#007944',
                            fill=True).add_to(feature_pd)

feature_ft.add_to(crash_map)
feature_si.add_to(crash_map)
feature_mi.add_to(crash_map)
feature_pd.add_to(crash_map)
folium.LayerControl(collapsed=False).add_to(crash_map)

In [None]:
crash_map

### Density Map (Based on Postcode)

In [None]:
total_accident = df.groupby('Postcode').agg({'Postcode': 'count', 'Total Cas': 'sum'})
total_accident = total_accident.rename(columns ={'Postcode': 'Number of Accidents'}).reset_index()
total_accident.head()

In [None]:
def accident_map(datatoplot, x, y, legend):
    cleaned_geo = 'cleaned_geodata.json'
    fol = folium.Map(location=[-34.921230, 138.599503], zoom_start=11)
    
    chor = folium.Choropleth(
        geo_data = cleaned_geo, 
        data = datatoplot,
        columns = [x, y],
        key_on = 'feature.properties.postcode', 
        fill_color = 'OrRd', fill_opacity=0.6, line_opacity=0.2,
        nan_fill_color = 'grey',
        legend_name = legend
    )
    return {'fol': fol, 'chor': chor}

In [None]:
# Plotting

plot_sum = accident_map(total_accident, 'Postcode', 'Number of Accidents', 'Number of Accidents')
plot_sum['chor'].add_to(plot_sum['fol'])
folium.LayerControl().add_to(plot_sum['fol'])

plot_sum['fol']

### Heat Map

In [None]:
total_accident2 = df.groupby(['Lat', 'Lon']).agg({'Postcode': 'count'})
total_accident2 = total_accident2.rename(columns ={'Postcode': 'Number of Accidents'}).reset_index()
total_accident2.head()

In [None]:
# Plotting heat map

map_heat = folium.Map([-34.921230, 138.599503], zoom_start = 9, width = "%100", height = "%100")

#heat_data = df.groupby(['Lat','Lon'])['Postcode'].sum().reset_index().values.tolist()

folium.plugins.HeatMap(total_accident2).add_to(map_heat)

map_heat

### Cluster Map

In [None]:
# Plotting as cluster

map_cluster = folium.Map([-34.921230, 138.599503], zoom_start=10,width="%100",height="%100")

markerCluster = MarkerCluster(control=False).add_to(map_cluster)

for i, row in df.iterrows():
    lat = df.at[i, 'Lat']
    lng = df.at[i, 'Lon']

    folium.Marker(location = [lat, lng]).add_to(markerCluster)

map_cluster

### Calculating New Feature: Distance to City Center

In [None]:
def distance(lat1, lon1, lat2, lon2):
    p = pi/180
    a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p) * cos(lat2*p) * (1-cos((lon2-lon1)*p))/2
    return round(12742 * asin(sqrt(a)), 2)

In [None]:
distance_to_cc = []

for i in range (len(df)):
    lat1 = -34.929
    lon1 = 138.601

    lat2 = df.iloc[i, 25]
    lon2 = df.iloc[i, 26]
    
    distance_to_cc.append(distance(lat1, lon1, lat2, lon2)) # in km

In [None]:
df['Distance To CC'] = distance_to_cc

In [None]:
dist_to_cc = df.groupby(['Distance To CC', 'Total Cas' , 'Target']).agg({'Distance To CC': 'count'}).rename(columns = {'Distance To CC': 'Counts'}).reset_index().sort_values(by='Total Cas', ascending=False)
dist_to_cc = pd.DataFrame(dist_to_cc)
dist_to_cc.head()

In [None]:
dist_to_cc.max()

In [None]:
hue_order = ['Pdo', 'Mi', 'Si', 'Fatal']
palette = ['#007944', '#087FBF', '#FF5722', '#FF0700']

# Change figure size
plt.figure(figsize=(20, 6))

sns.set(style="ticks")
sc = sns.scatterplot(x = 'Distance To CC', y = 'Counts', hue = 'Target', hue_order = hue_order, size = 'Total Cas', 
                     sizes = (10, 300), palette = palette, data = dist_to_cc)



sc.set(ylabel='Number of Accidents')
sc.set(xlabel='Distance to Downtown')
sc.set(ylim=(-1, 50))
sc.set(xlim=(-5, 1200))
sc.set(xticks=np.arange(0, 1300, 100))
sc.set_xticklabels(list(range(0, 1300, 100)))
plt.show()

In [None]:
# Grouping Minor injury, Serious injury abd fatality into one class

df.loc[df['Target'] == 'Mi', 'Severity'] = 'Injury/Death'
df.loc[df['Target'] == 'Si', 'Severity'] = 'Injury/Death'
df.loc[df['Target'] == 'Fatal', 'Severity'] = 'Injury/Death'
df.loc[df['Target'] == 'Pdo', 'Severity'] = 'PDO'

In [None]:
df.head()

In [None]:
df.to_pickle(os.path.join(path, '01. Data', 'Prepared data', 'road_safety_cleaned_Tab.pkl'))
df.to_csv(os.path.join(path, '01. Data', 'Prepared data', 'road_safety_cleaned_Tab.csv'), sep = ',')

In [None]:
gc.collect()