In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import time
import os
from scipy.stats import linregress
from citipy import citipy

In [None]:
#Robert's code for importing csv
file = 'Resources/MN_Accidents_Dec20.csv'
df = pd.read_csv(file)
df.head()


In [None]:
#delete all unneccesary columns
df = df.drop(["Source","End_Lat","End_Lng","Distance(mi)","Side","Zipcode","Country","Timezone","Airport_Code","Weather_Timestamp","Wind_Chill(F)","Humidity(%)","Pressure(in)","Wind_Direction","Wind_Speed(mph)","Precipitation(in)","Amenity","Bump","Crossing","Give_Way","Junction","No_Exit","Railway","Roundabout","Station","Stop","Traffic_Calming","Turning_Loop","Sunrise_Sunset","Civil_Twilight","Nautical_Twilight","Astronomical_Twilight"], axis=1)


In [None]:
df.head()

In [None]:
#df.head()
#change start time object into datetime format for filtering
df['Start_Time'] = pd.to_datetime(df['Start_Time'])
df.dtypes

### Severity
Shows the severity of the accident, a number between 1 and 4, where 1 indicates the least impact on traffic (i.e., short delay as a result of the accident) and 4 indicates a significant impact on traffic (i.e., long delay). Note that severity reported by different sources may differ in their underlying impact on traffic, so please separate data from different sources when doing severity-based analysis.

In [None]:
# Remove all dates outside of year 2020
# 42,442 rows
df_2020 = df[(df["Start_Time"] >= '2020-1-1') & (df["Start_Time"] <= '2020-12-31')]
df_2020.describe()

In [None]:
# Create a DataFrame with all severity 4 (1 to 4)
# 172 rows
df_2020_severe4 = df_2020.loc[(df_2020["Severity"] == 4)]
df_2020_severe4.describe()

In [None]:
# Create a DataFrame with all severity 3 (1 to 4)
# 3,265 rows
df_2020_severe3 = df_2020.loc[(df_2020["Severity"] == 3)]
df_2020_severe3.describe()

In [None]:
# Create a DataFrame with all severity 2 (1 to 4)
# 38,972 rows
df_2020_severe2 = df_2020.loc[(df_2020["Severity"] == 2)]
df_2020_severe2.describe()

In [None]:
# Create a DataFrame with all severity 1 (1 to 4)
# 33 rows
df_2020_severe1 = df_2020.loc[(df_2020["Severity"] == 1)]
df_2020_severe1.describe()

### Break up the 2020 Dataframe into 4 parts

In [None]:
# Break up the 2020 DataFrame into 4 parts (1 of 4) and write to .csv
# 10,610 rows
df_1 = df_2020.iloc[0:10610,:]
df_1.to_csv(r'Resources\df_1.csv', index=False)
df_1.describe()

In [None]:
# Break up the 2020 DataFrame into 4 parts (2 of 4) and write to .csv
# 10,610 rows
df_2 = df_2020.iloc[10611:21221,:]
df_2.to_csv(r'Resources\df_2.csv', index=False)
df_2.describe()

In [None]:
# Break up the 2020 DataFrame into 4 parts (3 of 4) and write to .csv
# 10,610 rows
df_3 = df_2020.iloc[21222:31832,:]
df_3.to_csv(r'Resources\df_3.csv', index=False)
df_3.describe()

In [None]:
# Break up the 2020 DataFrame into 4 parts (4 of 4) and write to .csv
# 10,609 rows
df_4 = df_2020.iloc[31833:42442,:]
df_4.to_csv(r'Resources\df_4.csv', index=False)
df_4.describe()

### Combine speed limit data into a single dataframe and save as .csv file

In [None]:
# data files
data_file_list = ["Resources/df_1WithSpeed.csv",
                  "Resources/df_2WithSpeed.csv",
                  "Resources/df_3WithSpeed.csv",
                  "Resources/df_4WithSpeed.csv"]

# list to store dataframes
list_of_df = []

# iterate through csv files and create list of dataframes
for filename in data_file_list:
    list_of_df.append(pd.read_csv(filename))

# combine dataframes in list to single dataframe
combined_data = pd.concat(list_of_df)

# drop nan's from speed limit column and drop unnamed column
combined_data.dropna(subset=["Speed Limit"], inplace=True)
combined_data.drop("Unnamed: 0", axis=1, inplace=True)

# create final data csv file
combined_data.to_csv("Resources/AccidentDataWithSpeed.csv", index=False)

# Data Visualization

### Create bar graphs to show the speed limit and accident severity accidents

In [None]:
#Jon's bar graphs start here

In [None]:
# import .csv and convert to df
file = 'Resources/AccidentDataWithSpeed.csv'
speeddf = pd.read_csv(file)
#speeddf

# Groupby speed limit and severity
count = speeddf.groupby(["Severity", "Speed Limit"]).count()["ID"]
count

In [None]:
speeds = speeddf["Speed Limit"].unique()
# [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70]
#speeds

### Bar Graph (Severity 1 & 2 Combined)

In [None]:
# Plot bargraph
# Example found: https://python-graph-gallery.com/11-grouped-barplot/

# Set height of bars
low_sev = [1, 10, 2247, 682, 1109, 1378, 1119, 8333, 8903, 5041, 6813]

# Set width of bar
barwidth = 0.5

# Set position of bar
r1 = np.arange(len(low_sev))

# Make the plot
plt.bar(r1, low_sev, width=barwidth, edgecolor='white', label='Severity 1 & 2 (of 4)', color='orange')

# Add xticks to the middle of group bars
plt.xlabel("Speed Limits", fontweight='bold')
plt.xticks([r for r in range(len(low_sev))], ['20mph', '25mph', '30mph', '35mph', '40mph', '45mph',
                                                         '50mph', '55mph', '60mph', '65mph', '70mph'], rotation=90)

# Add ylabel and graph title
plt.ylabel("Number of Traffic Accidents", fontweight='bold')
plt.title("Minnesota Traffic Accidents By Severity and MPH Zones", fontweight='bold')

# Create legend and show graph
plt.legend()
plt.show()

### Bar Graph (Severity 3)

In [None]:
# Plot bargraph
# Example found: https://python-graph-gallery.com/11-grouped-barplot/

# Set height of bars
sev3 = [0, 0, 259, 78, 116, 162, 64, 900, 776, 246, 442]

# Set width of bar
barwidth = 0.5

# Set position of bar
r1 = np.arange(len(sev3))

# Make the plot
plt.bar(r1, sev3, width=barwidth, edgecolor='white', label='Severity 3 (of 4)', color='green')

# Add xticks to the middle of group bars
plt.xlabel("Speed Limits", fontweight='bold')
plt.xticks([r for r in range(len(sev3))], ['20mph', '25mph', '30mph', '35mph', '40mph', '45mph',
                                                         '50mph', '55mph', '60mph', '65mph', '70mph'], rotation=90)

# Add ylabel and graph title
plt.ylabel("Number of Traffic Accidents", fontweight='bold')
plt.title("Minnesota Traffic Accidents By Severity and MPH Zones", fontweight='bold')

# Create legend and show graph
plt.legend()
plt.show()

### Bar Graph (Severity 4)

In [None]:
# Plot bargraph
# Example found: https://python-graph-gallery.com/11-grouped-barplot/

# Set height of bars
sev4 = [0, 0, 2, 2, 7, 2, 6, 78, 27, 9, 21]

# Set width of bar
barwidth = 0.5

# Set position of bar
r1 = np.arange(len(sev4))

# Make the plot
plt.bar(r1, sev3, width=barwidth, edgecolor='white', label='Severity 4 (of 4)', color='red')

# Add xticks to the middle of group bars
plt.xlabel("Speed Limits", fontweight='bold')
plt.xticks([r for r in range(len(sev4))], ['20mph', '25mph', '30mph', '35mph', '40mph', '45mph',
                                                         '50mph', '55mph', '60mph', '65mph', '70mph'], rotation=90)

# Add ylabel and graph title
plt.ylabel("Number of Traffic Accidents", fontweight='bold')
plt.title("Minnesota Traffic Accidents By Severity and MPH Zones", fontweight='bold')

# Create legend and show graph
plt.legend()
plt.show()

### Bar Graph (Severity 1-4)

In [None]:
# Plot bargraph
# Example found: https://python-graph-gallery.com/11-grouped-barplot/
# Set width of bar
barWidth = 0.3

# Set height of bars
sev1 = [0, 0, 0, 2, 0, 0, 1, 14, 9, 4, 0]
sev2 = [1, 10, 2247, 680, 1109, 1378, 1118, 8319, 8894, 5037, 6813]
sev3 = [0, 0, 259, 78, 116, 162, 64, 900, 776, 246, 442]
sev4 = [0, 0, 2, 2, 7, 2, 6, 78, 27, 9, 21]

# Set position of bar
r1 = np.arange(len(sev1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]

# Make the plot
plt.bar(r1, sev1, width=barWidth, edgecolor='white', label='Severity 1')
plt.bar(r2, sev2, width=barWidth, edgecolor='white', label='Severity 2')
plt.bar(r3, sev3, width=barWidth, edgecolor='white', label='Severity 3')
plt.bar(r4, sev4, width=barWidth, edgecolor='white', label='Severity 4')

# Add xticks to the middle of group bars
plt.xlabel("Speed Limits", fontweight='bold')
plt.xticks([r + barWidth for r in range(len(sev1))], ['20mph', '25mph', '30mph', '35mph', '40mph', '45mph',
                                                      '50mph', '55mph', '60mph', '65mph', '70mph'], rotation=90)
# Add ylabel and graph title
plt.ylabel("Number of Traffic Accidents", fontweight='bold')
plt.title("Minnesota Traffic Accidents By Severity and MPH Zones", fontweight='bold')

# Create legend and show graph
plt.legend()
plt.show()

In [None]:
#Jon's bar graphs end here

In [None]:
#Calvin's gmaps start here

In [None]:
#Calvin's gmap ends here

In [None]:
#Robert's regression analysis starts here

In [None]:
#Robert's regression ends here

In [None]:
#Mike can do some cool stuff here

In [None]:
#mike can stop doing cool stuff here