In [716]:
import pandas as pd
import calendar
import folium
import numpy as np
import statsmodels.api as sm
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [686]:
df = pd.read_csv('Baton_Rouge_Traffic_Incidents.csv')

In [687]:
# Convert to datetime
df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE'])

# Remove years 20', 22' for MoM analysis
df = df[~df['CRASH DATE'].dt.year.isin([2020, 2022])]

# Get month
df['month'] = df['CRASH DATE'].dt.month

# Get hour
df['Hour'] = pd.to_datetime(df['CRASH DATE']).dt.hour

In [688]:
total_vehicles = df['TOTAL VEHICLES'].sum()

In [689]:
total_accidents = df['INCIDENT NUMBER'].count()

In [724]:
# Count accidents per hr
hourly_accidents = df.groupby('Hour')['INCIDENT NUMBER'].count().reset_index(name='Count')

# Bins
hour_ranges = ['12AM-1AM', '1AM-2AM', '2AM-3AM', '3AM-4AM', '4AM-5AM', '5AM-6AM', '6AM-7AM', '7AM-8AM',
               '8AM-9AM', '9AM-10AM', '10AM-11AM', '11AM-12PM', '12PM-1PM', '1PM-2PM', '2PM-3PM', '3PM-4PM',
               '4PM-5PM', '5PM-6PM', '6PM-7PM', '7PM-8PM', '8PM-9PM', '9PM-10PM', '10PM-11PM', '11PM-12AM']

# Bin hr ranges
hourly_accidents['Hour Range'] = pd.cut(hourly_accidents['Hour'], bins=24, labels=hour_ranges)

# Add hr range to df
df['hour_range'] = pd.cut(df['Hour'], bins=24, labels=hour_ranges)

# Rename count and rm hour col
hourly_accidents = hourly_accidents.groupby('Hour Range')['Count'].sum().reset_index(name='Total Accidents')

# Create bar char
fig = px.bar(hourly_accidents, x='Hour Range', y='Total Accidents', color='Total Accidents', color_continuous_scale='darkmint', title='Accidents by Hour Range')
fig.update_layout(title_font=dict(size=20), title_x=0.5)
fig.show()

<h2> Get quick delta of accidents pertaining to different conditions

In [691]:
df[df['WEATHER'] == 'SNOW']['INCIDENT NUMBER'].count() / total_accidents *100

0.02323285125166986

<h2> Single viz hour 🔟

In [725]:
# Hr range with least crashes
n_crashes = int(hourly_accidents[hourly_accidents['Hour Range'] == '3PM-4PM']['Total Accidents'])
fig = go.Figure(go.Indicator(
    mode = "number",
    value = n_crashes,
    title = {"text": "Hour Range With Most Accidents <br><br><b> 3PM - 4PM<b>", "font": {"size": 20}},
    number = {'font_color':'#FF6A74'}
))

fig.show()

In [726]:
# Hr range with least crashes
n_crashes = int(hourly_accidents[hourly_accidents['Hour Range'] == '3AM-4AM']['Total Accidents'])
fig = go.Figure(go.Indicator(
    mode = "number",
    value = n_crashes,
    title = {"text": "Hour Range With Least Accidents <br><br><b> 3AM - 4AM<b>", "font": {"size": 20}},
    number = {'font_color':'#009b00'}
))

fig.show()

In [694]:
# Get mos name and count accidents per mo
df_by_month = df.groupby(df['CRASH DATE'].dt.strftime('%B'))['INCIDENT NUMBER'].count().reset_index()
df['month_name'] = pd.to_datetime(df['month'], format='%m').dt.month_name()

df_by_month['CRASH DATE'] = df_by_month['CRASH DATE'].apply(lambda x: calendar.month_name[list(calendar.month_name).index(x)])

In [695]:
month_to_num = {month: i for i, month in enumerate(calendar.month_name) if month}
df_by_month['month_num'] = df_by_month['CRASH DATE'].map(month_to_num)
df_by_month = df_by_month.sort_values('month_num')

<h2> Number of accidents per month 📈

In [696]:
fig = px.line(df_by_month, x='CRASH DATE', y='INCIDENT NUMBER', markers=True, labels={'CRASH DATE': 'Month', 'INCIDENT NUMBER': 'Number of Accidents'})
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<h2> Single value viz month 📅

In [727]:
n_crashes = int(df_by_month[df_by_month['CRASH DATE'] == 'September']['INCIDENT NUMBER'])
fig = go.Figure(go.Indicator(
    mode = "number",
    value = n_crashes,
    title = {"text": "<b> September<b>", "font": {"size": 20}},
    number = {'font_color':'#FF6A74'}
))

fig.show()

In [728]:
n_crashes = int(df_by_month[df_by_month['CRASH DATE'] == 'February']['INCIDENT NUMBER'])
fig = go.Figure(go.Indicator(
    mode = "number",
    value = n_crashes,
    title = {"text": "<b> February<b>", "font": {"size": 20}},
    number = {'font_color':'#009b00'}
))

fig.show()

In [729]:
n_crashes = int(df_by_month['INCIDENT NUMBER'].sum())
fig = go.Figure(go.Indicator(
    mode = "number",
    value = n_crashes,
    title = {"text": "<b>2021", "font": {"size": 20}},
    number = {'font_color':'#6A5ACD'}
))

fig.show()

In [730]:
weather_counts = df['WEATHER'].value_counts()                 
weather_counts = weather_counts.loc[['CLEAR', 'CLOUDY', 'RAIN']]
fig = px.bar(weather_counts, x=weather_counts.index, y=weather_counts.values)
fig.update_layout(title='Accidents per Weather Condition')
fig.show()

In [731]:
n_crashes = weather_counts.loc['RAIN']
fig = go.Figure(go.Indicator(
    mode = "number",
    value = n_crashes,
    title = {"text": "Accidents in Rain Conditions<b>", "font": {"size": 20}},
    number = {'font_color':'Gray'}
))

fig.show()

<h2>Poisson - GLM 📏

In [706]:
# New df counts indicidents per mo, hr, weather type, and lighting type
df_risk = df.groupby(['month_name','hour_range','WEATHER','LIGHTING'])['INCIDENT NUMBER'].count().reset_index(name="n_crashes")

In [732]:
# Get dummies of categorical data
X = pd.get_dummies(df_risk[['month_name', 'hour_range', 'WEATHER', 'LIGHTING']], drop_first=True)

# fit a Poisson GLM to n_crashes
model = sm.GLM(df_risk['n_crashes'], X, family=sm.families.Poisson()).fit()

# print coefs
print(model.summary())

# save summary
# with open('model_summary.txt', 'w') as file:
#     file.write(str(model.summary()))

                 Generalized Linear Model Regression Results                  
Dep. Variable:              n_crashes   No. Observations:                18432
Model:                            GLM   Df Residuals:                    18384
Model Family:                 Poisson   Df Model:                           47
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -13216.
Date:                Sun, 26 Feb 2023   Deviance:                       20946.
Time:                        22:35:02   Pearson chi2:                 1.22e+05
No. Iterations:                    12   Pseudo R-squ. (CS):             0.9946
Covariance Type:            nonrobust                                         
                                                        coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------

In [721]:
# extract top 5 coefs
coefs = pd.Series(model.params)
top_coefs = coefs.nlargest(5)
for idx, val in top_coefs.items():
    hour_range = idx.split('_')[-1]
    print(f"{hour_range[-7:]}: {round(val, 2)}")

3PM-4PM: 2.89
5PM-6PM: 2.88
4PM-5PM: 2.83
2PM-3PM: 2.73
2PM-1PM: 2.6


<h2> Maps 🗺️

In [684]:
df = df[df['LATITUDE'].notnull() & df['LONGITUDE'].notnull()]
df['location'] = list(zip(df['LATITUDE'], df['LONGITUDE']))
sep_crashes = df[df['month'] == 9]

map = folium.Map(location=[30.4515, -91.1871], zoom_start=11)

for location in sep_crashes['location']:
    folium.Marker(location=location).add_to(map)

# map.save('crash_map.html')

In [503]:
map_crashes = folium.Map(location=[30.4583, -91.1403], zoom_start=11)

# Extract lat and long
crash_locations = sep_crashes[['LATITUDE', 'LONGITUDE']].dropna()

# Create a heatmap
heat_data = [[row['LATITUDE'], row['LONGITUDE']] for index, row in crash_locations.iterrows()]
HeatMap(heat_data).add_to(map_crashes)
heat_map = HeatMap(heat_data, radius=2)

# map_crashes.save('heat_map.html')

In [504]:
marker_cluster = MarkerCluster().add_to(k)

# add markers
for lat, lon in zip(sep_crashes['LATITUDE'], sep_crashes['LONGITUDE']):
    folium.Marker(location=[lat, lon]).add_to(marker_cluster)

# k.save('markers_map.html')