# USA Car Accidents

## imports:

### External dependencies

In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import dash_bootstrap_components as dbc
from dash import Dash, Input, Output, html, dcc
import plotly.express as px
import os
import dash

### Internal dependencies

In [4]:
# internal dependencies
from constants import *
from graphs import *
from utility_functions import *

SyntaxError: invalid syntax. Perhaps you forgot a comma? (graphs.py, line 45)

## Configuration parameters:

In [None]:
# population dataset path
Path_to_population_dataset = 'Dataset\\US_Population.csv'

# accident dataset path
Path_to_accident_dataset = 'Dataset\\US_Accidents_Sampled.csv'

# sampling factor (>=1)
Sampling_Factor = 100

## Load and edit datasets

### Population Dataset

In [None]:
# load dataset for US population
df_pop = pd.read_csv(Path_to_population_dataset)
df_pop.dropna()
df_pop.head()

Unnamed: 0,Year,AL,AK,AZ,AR,CA,CO,CT,DE,FL,...,SD,TN,TX,UT,VT,VA,WA,WV,WI,WY
0,2016,4860545,741522,6941072,2989918,39250017,5540545,3576452,952065,20612439,...,865454,6651194,27862596,3051217,624594,8411808,7288000,1831102,5778708,585501
1,2017,4874747,739786,7044008,3001345,39536653,5607154,3573880,961939,20928863,...,869666,6715984,28304596,3101833,623657,8470020,7423362,1818157,5790186,584910
2,2018,4887681,735139,7158024,3009733,39776830,5691287,3573297,971180,21244317,...,878698,6771631,28704330,3153550,624344,8517685,7535591,1804291,5807406,578668
3,2019,4903185,731158,7278717,3017804,39576757,5758736,3571520,981822,21538187,...,882235,6829174,29145505,3205958,624358,8565256,7614893,1792147,5822434,577601
4,2020,4921532,727890,7421401,3029887,39368078,5845526,3565287,990837,21899341,...,886667,6886834,29618533,3251617,623989,8616207,7693612,1778070,5837466,567025


### Accident Dataset

In [None]:

# import datset
df_acc = pd.read_csv(Path_to_accident_dataset)

# remove unecessary columns
df_acc.drop(['Source', 'End_Lat', 'End_Lng', 'Description', 'Airport_Code'], axis=1, inplace=True)

# remove null values
df_acc.dropna(inplace=True)

# Convert 'Start_Time' and 'End_Time to datetime
df_acc['Start_Time'] = pd.to_datetime(df_acc['Start_Time'],format='ISO8601')
df_acc['End_Time'] = pd.to_datetime(df_acc['End_Time'],format='ISO8601')

df_acc['Year'] = df_acc['Start_Time'].dt.year


# Convert the 'Severity' column to a string type
df_acc['Severity'] = df_acc['Severity'].astype(str)

# convert temperature from fareneight to celsius
df_acc['Temperature(F)'] = (df_acc['Temperature(F)'] - 32) * 5.0/9.0
df_acc.rename(columns={'Temperature(F)': 'Temperature(C)'}, inplace=True)

# remove year 2016, 2017, 2018, 2023 because they are incomplete
df_acc = df_acc[(df_acc['Start_Time'].dt.year != 2016)]
df_acc = df_acc[(df_acc['Start_Time'].dt.year != 2017)]
df_acc = df_acc[(df_acc['Start_Time'].dt.year != 2018)]
df_acc = df_acc[(df_acc['Start_Time'].dt.year != 2023)]


df_acc.head()

Unnamed: 0.1,Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,Distance(mi),Street,City,...,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight,Year
1,4123847,A-4154878,2,2022-11-22 16:09:03,2022-11-22 17:26:55,36.12612,-86.904076,0.99,I-40 W,Nashville,...,False,False,False,False,False,Day,Day,Day,Day,2022
2,7006131,A-7055452,2,2020-06-24 20:28:00,2020-06-24 21:02:05,45.02394,-93.28464,0.0,I-94 E,Minneapolis,...,False,False,False,False,False,Day,Day,Day,Day,2020
3,1191012,A-1200791,2,2021-01-08 18:01:03,2021-01-08 18:47:15,30.471621,-91.089096,0.0,S Choctaw Dr,Baton Rouge,...,False,False,False,True,False,Night,Night,Day,Day,2021
7,801563,A-811271,3,2021-11-19 18:47:57,2021-11-19 19:17:25,40.00996,-75.064369,0.0,Fraley St,Philadelphia,...,False,False,False,False,False,Night,Night,Night,Night,2021
8,1398450,A-1408279,3,2020-08-01 13:00:16,2020-08-01 14:00:59,29.784163,-95.531845,0.0,Bunker Hill Rd,Houston,...,False,False,False,False,False,Day,Day,Day,Day,2020


In [None]:
df_acc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 44305 entries, 1 to 69999
Data columns (total 43 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Unnamed: 0             44305 non-null  int64         
 1   ID                     44305 non-null  object        
 2   Severity               44305 non-null  object        
 3   Start_Time             44305 non-null  datetime64[ns]
 4   End_Time               44305 non-null  datetime64[ns]
 5   Start_Lat              44305 non-null  float64       
 6   Start_Lng              44305 non-null  float64       
 7   Distance(mi)           44305 non-null  float64       
 8   Street                 44305 non-null  object        
 9   City                   44305 non-null  object        
 10  County                 44305 non-null  object        
 11  State                  44305 non-null  object        
 12  Zipcode                44305 non-null  object        
 13  Countr

## Dashboard

### input components:

#### first input group:

In [None]:
input1 = dbc.Col(
    dbc.Row(
        [
        dbc.Col([
            html.P("Time Interval:"),
            dbc.Select(
                options=[
                        {"label":"Monthly", "value":"Monthly"}, 
                        {"label":"Yearly", "value":"Yearly"}, 
                        {"label":"Daily", "value":"Daily"},
                        {"label":"Hourly", "value":"Hourly"}  
                ],
                value='Monthly',
                id="Time-Interval-Select"
            )
        ], width = 6
        ),
        dbc.Col([
                html.P("View Mode:"),
                dbc.RadioItems(
                    options=[
                            {"label":"Grouped", "value":"Grouped"}, 
                            {"label":"Separated", "value":"Separated"}
                    ],
                    value='Grouped',
                    id='ViewMode-Select'
                )
            ], width = 6)
        ]
    )
)

#### second input group:

In [None]:
input2 = dbc.Col(
    [
        html.P("Period:"),
        dbc.Select(
            options=[
                    {"label":"All", "value":"all"}, 
                    {"label":"2019", "value":"2019"},
                    {"label":"2020", "value":"2020"},
                    {"label":"2021", "value":"2021"},
                    {"label":"2022", "value":"2022"}        
            ],
            value='all',
            id="Time-Interval-Select-pie"
        ),
        
    ]
)

#### third input group:

In [None]:
input3 = dbc.Col(
    dbc.Row(
        [
        dbc.Col([
            html.P("Order:"),
            dbc.Select(
                options=[
                    {"label":"None", "value":"None"}, 
                    {"label":"Worst to best", "value":"WorstToBest"},
                    {"label":"Best to worst", "value":"BestToWorst"},      
                ],
                value='None',
                id="Accident_per_populetion_order_select"
            )
        ]),
        dbc.Col(
            [
            html.P("Show:"),
            dbc.RadioItems(
                options=[
                        {"label":"All", "value":"all"}, 
                        {"label":"Top 10", "value":"Top 10"}
                ],
                value='Top 10',
                id='Show-Select'
            )
            ]
        )
        ]
    )
)

### Graphs:

graph functions are defined in ```graphs.py```

#### Time Distribution graph:

In [None]:
TimeDistributionBAR = dbc.Col(
    [dcc.Graph(id='Temporal-Distribution-acc-graph')]
)

#### Severity Distribution graph:

In [None]:
SeverityDistributionPIE = dbc.Col(
    [dcc.Graph(id='Severity-distribution-pie')]
)

#### Worst Best graph:

In [None]:
WorstBestBAR = dbc.Col(
    [dcc.Graph(id='Accident_per_populetion_barchart')]
)

#### Temperature graph:

In [None]:
TemperatureDistributionPIE = dbc.Col(
    [dcc.Graph(id='Temperature_PIE')],
    width=6
)

#### Location graph:

In [None]:
LocationCoordsSCATTER = dbc.Col(
    [dcc.Graph(id='Location_Graph_Scatter')],
    width=6
)

### Dashboard Layout

In [None]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# title
title = dbc.Row(
    dbc.Col(html.H1("USA Accidents"), width="auto"),
    justify="center",
    className="mb-2",
)

FirstInputRow = dbc.Row(
    [input1, input2, input3]
)

FirstRow = dbc.Row(
    [TimeDistributionBAR, SeverityDistributionPIE],
    className="mb-2",
)


SecondRow = dbc.Row(
    [TemperatureDistributionPIE, LocationCoordsSCATTER]
)

lcol =  dbc.Col(
         [FirstRow, SecondRow],
         width=8
)

# right column
rcol = dbc.Col(
        [dbc.Row(WorstBestBAR)],
         width=4
)

# app layout
app.layout=dbc.Container([
    dbc.Col([
        title,
        FirstInputRow,
        dbc.Row([lcol, rcol])
        ]
    )
],
style={'width' : '100%', 'backgroundColor' : BACKGROUND_COLOR},
fluid = True,
)

app.run(debug=True)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[18], line 3, in WorstBestPieChart(
    time_interval='all',
    order_by='None',
    show='Top 10'
)
      1 @app.callback(Output("Accident_per_populetion_barchart", "figure"),[Input("Time-Interval-Select-pie", "value"),Input("Accident_per_populetion_order_select", "value"),Input("Show-Select", "value")])
      2 def WorstBestPieChart(time_interval, order_by, show):
----> 3     fig = BestWorstAcc(df_acc, df_pop, time_interval, order_by, (show == 'all'), Sampling_Factor)
        df_acc =        Unnamed: 0         ID Severity          Start_Time            End_Time  \
1         4123847  A-4154878        2 2022-11-22 16:09:03 2022-11-22 17:26:55   
2         7006131  A-7055452        2 2020-06-24 20:28:00 2020-06-24 21:02:05   
3         1191012  A-1200791        2 2021-01-08 18:01:03 2021-01-08 18:47:15   
7          801563   A-81

### Callbacks:

#### Time Distribution callback:

In [None]:
@app.callback(Output("Temporal-Distribution-acc-graph", "figure"),[Input("Time-Interval-Select", "value"), Input("ViewMode-Select", "value"),Input("Time-Interval-Select-pie", "value")])
def TDABarchart(time_interval, ViewMode, year):
    if(ViewMode == 'Grouped'):
        fig = SingleBarChart(df_acc, time_interval, Sampling_Factor, year)
        return fig
    else:
        if(ViewMode == 'Separated'):
            fig = MultiBarChart(df_acc, time_interval, Sampling_Factor, year)
            return fig

#### Severity Distribution callback:

In [None]:
@app.callback(Output("Severity-distribution-pie", "figure"),[Input("Time-Interval-Select-pie", "value")])
def SDPiechart(time_interval):
    fig = PieChart(df_acc, time_interval)
    return fig

#### Worst Best callback:

In [None]:
@app.callback(Output("Accident_per_populetion_barchart", "figure"),[Input("Time-Interval-Select-pie", "value"),Input("Accident_per_populetion_order_select", "value"),Input("Show-Select", "value")])
def WorstBestPieChart(time_interval, order_by, show):
    fig = BestWorstAcc(df_acc, df_pop, time_interval, order_by, (show == 'all'), Sampling_Factor)
    return fig

#### Temperature graph:

In [None]:
@app.callback(Output("Temperature_PIE", "figure"),[Input("Time-Interval-Select-pie", "value")])
def TemperaturePieChart(year):
    fig = TemperaturePIE(df_acc, year)
    return fig

#### Location callback:

In [None]:
@app.callback(Output("Location_Graph_Scatter", "figure"),[Input("Time-Interval-Select-pie", "value"),Input("ViewMode-Select", "value")])
def LocationScatter(year, viewmode):
    fig = LocationScatterPlot(df_acc, year, (viewmode=='Separated'))
    return fig