# Traffic Incident Management in Nashville, TN

### Loading the libraries

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import datetime
import math
import re

from shapely.geometry import Point
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

%matplotlib inline

### Importing the data

In [2]:
traffic_df = pd.read_csv('../data/MASTER File 2021.csv')
d_weather = pd.read_csv('../data/Daily Weather Formatted.csv')
h_weather = pd.read_csv('../data/Hourly Weather Formatted.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


### Reviewing the Traffic Dataframe

In [None]:
print(traffic_df.shape)

In [None]:
#keep in case
#print(traffic_df.crs)

In [None]:
traffic_df.head()

In [None]:
traffic_df.tail()

In [None]:
traffic_df.info()

In [None]:
traffic_df.columns

In [None]:
traffic_df['Incident Type'].value_counts()

In [None]:
traffic_df['Incident Type'].isnull().value_counts()

In [None]:
traffic_df['Incident Type'].unique()

In [None]:
davidson = traffic_df[traffic_df['County']=='Davidson']

### geospatial coding. update 'geometry' column

In [None]:
traffic_df['geometry'] = traffic_df.apply(lambda x: Point((float(x.Longitude),
                                                   float(x.Latitude))),
                                  axis=1)

In [None]:
traffic_df.head()

In [None]:
traffic_geo = gpd.GeoDataFrame(traffic_df)

In [None]:
traffic_geo.head()

In [None]:
type(traffic_geo)

### Save the number of rows and columns as a tuple

In [None]:
rows_and_cols = traffic_df.shape
print('There are {} rows and {} columns.\n'.format(
    rows_and_cols[0], rows_and_cols[1]))

In [None]:
# compile the list of dataframes to be merged
data_frames = [car_accident0, car_accident1, car_accident2,car_accident3, car_accident4]

In [None]:
traffic_2021 = pd.concat([traffic_df, h_weather, d_weather], sort=True)
traffic_2021.head(2)

In [None]:
#The amount in brackets will populate. 
print('There are {} accidents in the data'.format(len(car_accidents_2010_2020)))

In [None]:
#dropping columns
pd.set_option('display.max_columns', 1000)
df.drop(['property_damage', 'accident_number', 'reporting_officer', 'collision_type', 'weather', 'illumination', 
         'harmful_codes', 'state_name', 'rpa', 'geocoded_column', ':@computed_region_wvby_4s8j', 
         ':@computed_region_3aw5_2wv7', ':@computed_region_p6sk_2acq', ':@computed_region_gxvr_9jxz',
         'property_damage'], axis=1, inplace=True)
df.tail()

# columns listed
'Region', 'Incident ID', 'Incident Type', 'Full Location ',
       'Start Date', 'Start Time', 'Duration (mins)', 'Lanes\nBlocked',
       'Blockage\nDuration', 'Resp Time', 'Rural/\nUrban', 'Key ', 'County',
       'Interstate', 'Route', 'Direction', 'Unnamed: 16', 'Description',
       'Inc Mile Marker', 'Latitude', 'Longitude'

### top _ sorted 

In [None]:
gdp_and_internet_use_2014.sort_values(by = 'Internet_Users_Pct', ascending = False).head()

### Reviewing Weather Data

In [None]:
h_weather.head()

In [None]:
h_weather.columns

In [None]:
d_weather.head()

In [None]:
d_weather.columns

### Date/Time

In [None]:
car_accidents_2010_2020['DateTime'] = pd.to_datetime(car_accidents_2010_2020['Date and Time'])
car_accidents_2010_2020['date'] = car_accidents_2010_2020['DateTime'].dt.date
car_accidents_2010_2020['time'] = car_accidents_2010_2020['DateTime'].dt.time

In [None]:
car_accidents_2010_2020 = car_accidents_2010_2020.set_index('DateTime')
car_accidents_2010_2020.head(3)

### Histographs

In [None]:
traffic_df['column name'].value_counts()

In [None]:
traffic_df['number_of_motor_vehicles'] = pd.to_numeric(traffic_df['number_of_motor_vehicles'])  
                                                                        
traffic_df.hist(['number_of_motor_vehicles'])

In [None]:
car_accidents_2010_2020['Collision Type'].value_counts(normalize=True).sort_index().plot.bar()
plt.grid()
plt.title('collision Type')
plt.xlabel('Collision Type')
plt.ylabel('Fraction');

In [None]:
car_accidents_2010_2020['Collision Type'].value_counts().plot.bar()
plt.grid()
plt.title('collision Type')
plt.xlabel('Collision Type')
plt.ylabel('Fraction');

In [None]:
car_accidents_2010_2020['Collision Type Description'].value_counts(normalize=True).sort_index().plot.bar()
plt.grid()
plt.title('Collision Type Description')
plt.xlabel('Collision Type Description')
plt.ylabel('Fraction');

In [None]:
car_accidents_2010_2020['Collision Type Description'].value_counts().plot.bar()
plt.grid()
plt.title('Collision Type Description')
plt.xlabel('Collision Type Description')
plt.ylabel('Total count');

In [None]:
car_accidents_2010_2020['Weekday'] = car_accidents_2010_2020.index.weekday_name
weekday = car_accidents_2010_2020.groupby('Weekday')['Accident Number'].count()
weekday = weekday/weekday.sum()
dayOfWeek=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
weekday[dayOfWeek].plot.bar()
plt.title('Acccidents by Weekday')
plt.xlabel('Weekday')
plt.ylabel('Accidents');

In [None]:
years = ['2010','2011','2012','2013', '2014', '2015', '2016', '2017', '2018', '2019']
fig, _ = plt.subplots(1,10,figsize=(21,7), sharex='all', sharey='all')

fig.suptitle('Acccidents by Weekday for Different Years')
plt.xlabel('Weekday')
plt.ylabel('Accidents')
for i, year in enumerate(years,1):
    weekday = car_accidents_2010_2020.loc[year].groupby('Weekday')['Accident Number'].count()
    weekday = weekday/weekday.sum()
    dayOfWeek=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
    plt.subplot(1,10,i)
    plt.title(year)
    weekday[dayOfWeek].plot.bar()

### Create scatterplots

In [None]:
plt.figure(figsize = (15,10))
sns.scatterplot(x = 'Internet_Users_Pct', y = 'GDP_Per_Capita', data = gdp_and_internet_use_2004, alpha = 0.9)
plt.title('2004 Percent Using the Internet vs GDP Per Capita', fontsize = 15)
plt.xlabel('Percentage of Internet Users', fontsize = 12)
plt.ylabel('GDP Per Capita', fontsize = 12)

In [None]:
# distribution of gdp per capita 
ax = sns.distplot(gdp_and_internet_use.gdp_percapita,
                 bins = 25,
                  hist_kw={'alpha' :.2})
ax.set(xlabel = 'GDP per Capita', ylabel = 'Frequency')

In [None]:
#internet use
ax = sns.distplot(gdp_and_internet_use.Internet_Users_Pct,
                 bins = 25,
                  color  = 'green',
                  hist_kw={'alpha':.4})
ax.set(xlabel = 'Internet User Pct', ylabel = 'Frequency')

In [None]:
years = ['2010','2011','2012','2013', '2014', '2015', '2016', '2017', '2018', '2019']
fig, _ = plt.subplots(1,10, figsize=(44,8), sharex='all', sharey='all')

fig.suptitle('Acccidents by month for Different Years')
plt.xlabel('month')
plt.ylabel('Accidents')
for i, year in enumerate(years,1):
    plt.subplot(1,10,i)
    sample = car_accidents_2010_2020.loc[year]['Accident Number'].resample('M').count()
    sample.plot()
    plt.ylim(0,8000)
    plt.title('Accidents, {} count'.format(text))
    plt.xlabel('Year')
    plt.ylabel('Accident Count');

In [None]:
#extracting dataframe into a csv file
df.to_csv('../data/___df.csv')