In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt


In [2]:
AshevilleTraffic = pd.read_csv('../data/APDAccidents.csv', low_memory=False)

##Date Source: https://data-avl.opendata.arcgis.com/datasets/05af48ba4caf43499a37f348abb98a66_0/explore

In [3]:
AshevilleTraffic.info()  #114918 rows

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114918 entries, 0 to 114917
Data columns (total 8 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   X              114918 non-null  float64
 1   Y              114918 non-null  float64
 2   accident_date  114918 non-null  object 
 3   accident_id    114578 non-null  object 
 4   accident_time  114918 non-null  int64  
 5   agency         114918 non-null  object 
 6   case_id        114918 non-null  int64  
 7   OBJECTID       114918 non-null  int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 7.0+ MB


In [9]:
AshevilleTraffic.sort_values('accident_date', ascending=False)  #we have data for 2006 - 2020

Unnamed: 0,X,Y,accident_date,accident_id,accident_time,agency,case_id,OBJECTID
112835,-85.568296,33.568161,12/31/2020,20031801,1222,APD,20031801,112836
112841,-85.568296,33.568161,12/31/2020,20031828,1805,APD,20031828,112842
112832,-85.568296,33.568161,12/31/2020,20031789,944,APD,20031789,112833
112834,-85.568296,33.568161,12/31/2020,20031796,1145,APD,20031796,112835
112836,-82.517255,35.592587,12/31/2020,20031803,1149,APD,20031803,112837
...,...,...,...,...,...,...,...,...
5062,-82.557464,35.586495,01/01/2006,06000038,1213,APD,6000038,5063
5063,-85.568296,33.568161,01/01/2006,06000041,1259,APD,6000041,5064
5065,-82.518772,35.543758,01/01/2006,06000046,1357,APD,6000046,5066
5066,-85.568296,33.568161,01/01/2006,06000048,1444,APD,6000048,5067


In [10]:
## Trim to the relevant years: Brood VI emerged in 2017.  Keeping 2015-2019
#first convert to datetime
AshevilleTraffic['accident_date'] = pd.to_datetime(AshevilleTraffic['accident_date'])

In [13]:
#trim trim trim the years
AshTrim = AshevilleTraffic[AshevilleTraffic['accident_date'] > dt.datetime(2015,1,1)]
AshTrim = AshTrim[AshTrim['accident_date'] < dt.datetime(2020,1,1)]

In [23]:
##Reducing records to May and June of the years 2015, 2016, 2017, 2018, 2019.  Cicada months.

AshTrim_2015 = AshevilleTraffic[AshevilleTraffic['accident_date'] > dt.datetime(2015,4,30)]
AshTrim_2015 = AshTrim_2015[AshTrim_2015['accident_date'] < dt.datetime(2015,7,1)]

In [24]:
AshTrim_2015.sort_values('accident_date', ascending=True)  

Unnamed: 0,X,Y,accident_date,accident_id,accident_time,agency,case_id,OBJECTID
68738,-85.568296,33.568161,2015-05-01,15010633,1534,APD,15010633,68739
68748,-82.553068,35.605575,2015-05-01,15010670,2225,APD,15010670,68749
68747,-82.550871,35.591144,2015-05-01,15010666,2146,APD,15010666,68748
68746,-82.562201,35.592967,2015-05-01,15010659,2036,APD,15010659,68747
68745,-82.552772,35.629111,2015-05-01,1510658,1957,APD,15010658,68746
...,...,...,...,...,...,...,...,...
69989,-82.565332,35.584603,2015-06-30,15016521,1200,APD,15016521,69990
69988,-82.575332,35.590495,2015-06-30,15016494,813,APD,15016494,69989
69987,-85.568296,33.568161,2015-06-30,15016491,704,APD,15016491,69988
69995,-82.551182,35.582292,2015-06-30,15016561,1633,APD,15016561,69996


In [25]:
AshTrim_2016 = AshevilleTraffic[AshevilleTraffic['accident_date'] > dt.datetime(2016,4,30)]
AshTrim_2016 = AshTrim_2016[AshTrim_2016['accident_date'] < dt.datetime(2016,7,1)]

In [26]:
AshTrim_2017 = AshevilleTraffic[AshevilleTraffic['accident_date'] > dt.datetime(2017,4,30)]
AshTrim_2017 = AshTrim_2017[AshTrim_2017['accident_date'] < dt.datetime(2017,7,1)]

In [27]:
AshTrim_2018 = AshevilleTraffic[AshevilleTraffic['accident_date'] > dt.datetime(2018,4,30)]
AshTrim_2018 = AshTrim_2018[AshTrim_2018['accident_date'] < dt.datetime(2018,7,1)]

In [28]:
AshTrim_2019 = AshevilleTraffic[AshevilleTraffic['accident_date'] > dt.datetime(2019,4,30)]
AshTrim_2019 = AshTrim_2019[AshTrim_2019['accident_date'] < dt.datetime(2019,7,1)]

## Bring it together and Export it


In [29]:
## Bringing the yearly dataframes into a single dataframe

years = [AshTrim_2015, AshTrim_2016, AshTrim_2017, AshTrim_2018, AshTrim_2019]
Ash_Traffic_final = pd.concat(years)

In [30]:
#Save new dataframe as a csv
Ash_Traffic_final.to_csv('Ash_Traffic_final.csv', index = False, header=True)