In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
import json
import re

In [4]:
#Read in the csv file
traffic = pd.read_csv('../data/traffic_fatalities_county_month.csv', low_memory=False)

In [5]:
traffic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10544 entries, 0 to 10543
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   May     10544 non-null  int64 
 1   June    10544 non-null  int64 
 2   Total   10544 non-null  int64 
 3   County  10544 non-null  object
 4   State   10544 non-null  object
 5   Year    10544 non-null  int64 
dtypes: int64(4), object(2)
memory usage: 494.4+ KB


### Plan:
1. create lists of counties with cicada sightings, by state
2. use lists to create a new dataframe of fatalities by state for relevant/cicada counties, with a column for year
3. Export for further analysis

#One way to verify output matches list is LEN()

## Creating dataframes for each emergence year

In [164]:
#Data for 2015 emergences of Brood XXIII and Brood IV
traffic2015 = pd.concat([AL2015,AR2015,IL2015,IN2015,KY2015,LA2015,MS2015,MO2015_B23,TN2015, #<---Brood XXIII
                        IA2015,KS2015,NE2015,MO2015_B04,TX2015#<---Brood IV
                        ],axis=0)
traffic2015 #320 rows
traffic2015.to_csv('traffic2015.csv', index = False, header=True)

In [165]:
#Data for 2016 emergences of Brood V
traffic2016 =  pd.concat([WV2016, MD2016, PA2016,VA2016, OH2016, NY2016
                        ],axis=0)
traffic2016 #102 rows
traffic2016.to_csv('traffic2016.csv', index = False, header=True)

In [169]:
##Data for 2017 emergence of Brood VI 
traffic2017 = pd.concat([GA2017, SC2017, NC2017],axis=0)
traffic2017  #31 rows
traffic2017.to_csv('traffic2017.csv', index = False, header=True)

In [170]:
#Data for 2018 emergene of Brood VII
traffic2018 = pd.concat([NY2018],axis=0) #Didn't actually need to concat here because only one county
traffic2018 #7 rows

###Noticing there will be no row for counties with zero fatalities in May/June.
traffic2018.to_csv('traffic2018.csv', index = False, header=True)

In [171]:
#Data for 2019 emergence of Brood VIII
traffic2019 = pd.concat([PA2019, OH2019, WV2019
                        ],axis=0)
traffic2019  #NO FATALITIES in WV for May/June

traffic2019.to_csv('traffic2019.csv', index = False, header=True)

# Using the county lists to get data for relevant counties and years

In [41]:
#The same county name can occur in more than one state. 
     ##Solve for making sure county matches state
#Solve for getting cicada year plus previous and following year

In [157]:
#Lists of brood years, with the preceeding and following year

#2015
Brood23years=[2014,2015,2016,2017]
BroodIVyears=[2014,2015,2016,2017]

#2016
BroodVyears=[2014,2015,2016,2017,2018]

#2017
BroodVIyears=[2015,2016,2017,2018,2019]

#2018
BroodVIIyears=[2015,2016,2017,2018,2019]

#2019
BroodVIIIyears=[2015,2016,2017,2018,2019]  #no 2020 data available (not sure it would be useful anyway because traffic was not normal)

In [158]:
##Brood23years / 2015 emergence

#Alabama 
AL2015 = traffic[(traffic['County'].isin(AL_counties)) & (traffic['State'] == 'Alabama') & 
                 (traffic['Year'].isin(Brood23years))]

#Arkansas
AR2015 = traffic[(traffic['County'].isin(AR_counties)) & 
                 (traffic['State'] == 'Arkansas') & (traffic['Year'].isin(Brood23years))]
#Illinois
IL2015 = traffic[(traffic['County'].isin(IL_counties)) & 
                 (traffic['State'] == 'Illinois') & (traffic['Year'].isin(Brood23years))]

#Indiana
IN2015 = traffic[(traffic['County'].isin(IN_counties)) & 
                 (traffic['State'] == 'Indiana') & (traffic['Year'].isin(Brood23years))]

#Kentucky
KY2015 = traffic[(traffic['County'].isin(KY_counties)) & 
                 (traffic['State'] == 'Kentucky') & (traffic['Year'].isin(Brood23years))]

#Louisiana
LA2015 = traffic[(traffic['County'].isin(LA_counties)) & 
                 (traffic['State'] == 'Louisiana') & (traffic['Year'].isin(Brood23years))]

#Mississippi
MS2015 = traffic[(traffic['County'].isin(MS_counties)) & 
                 (traffic['State'] == 'Mississippi') & (traffic['Year'].isin(Brood23years))]

#Missouri 
#distinguishing between broods 23 and 4, just in case I need it later
MO2015_B23 = traffic[(traffic['County'].isin(MO_counties_B23)) & 
                 (traffic['State'] == 'Missouri') & (traffic['Year'].isin(Brood23years))]

#Tennessee
TN2015 = traffic[(traffic['County'].isin(TN_counties)) & 
                 (traffic['State'] == 'Tennessee') & (traffic['Year'].isin(Brood23years))]

In [159]:
##BroodIVyears / 2015 emergence

#Iowa
IA2015 = traffic[(traffic['County'].isin(IA_counties)) & 
                 (traffic['State'] == 'Iowa') & (traffic['Year'].isin(BroodIVyears))]
#Kansas
KS2015 = traffic[(traffic['County'].isin(KS_counties)) & 
                 (traffic['State'] == 'Kansas') & (traffic['Year'].isin(BroodIVyears))]

#Nebraksa
NE2015 = traffic[(traffic['County'].isin(NE_counties)) & 
                 (traffic['State'] == 'Nebraska') & (traffic['Year'].isin(BroodIVyears))]
#Oklahoma
OK2015 = traffic[(traffic['County'].isin(OK_counties)) & 
                 (traffic['State'] == 'Oklahoma') & (traffic['Year'].isin(BroodIVyears))]

#Missouri 
#distinguishing between broods 23 and 4, just in case i need it later
MO2015_B04 = traffic[(traffic['County'].isin(MO_counties_B4)) & 
                 (traffic['State'] == 'Missouri') & (traffic['Year'].isin(BroodIVyears))]

#Texas
TX2015 = traffic[(traffic['County'].isin(TX_counties)) & 
                 (traffic['State'] == 'Texas') & (traffic['Year'].isin(BroodIVyears))]


In [160]:
##Brood V / 2016 emergence

#West Virgina
WV2016 = traffic[(traffic['County'].isin(WV_counties_2016)) & 
                 (traffic['State'] == 'West Virgina') & (traffic['Year'].isin(BroodVyears))]

#Maryland
MD2016 = traffic[(traffic['County'].isin(MD_counties)) & 
                 (traffic['State'] == 'Maryland') & (traffic['Year'].isin(BroodVyears))]

#Virginia
VA2016 = traffic[(traffic['County'].isin(VA_counties)) & 
                 (traffic['State'] == 'Virgina') & (traffic['Year'].isin(BroodVyears))]

#Pennsylvania
PA2016 = traffic[(traffic['County'].isin(PA_counties_2016)) & 
                 (traffic['State'] == 'Pennsylvania') & (traffic['Year'].isin(BroodVyears))]

#Ohio
OH2016 = traffic[(traffic['County'].isin(OH_counties_2016)) & 
                 (traffic['State'] == 'Ohio') & (traffic['Year'].isin(BroodVyears))]

#New York
NY2016 = traffic[(traffic['County'].isin(NY_counties_2016)) & 
                 (traffic['State'] == 'New York') & (traffic['Year'].isin(BroodVyears))]

In [161]:
##Brood VI / 2017 emergence

#Georgia
GA2017 = traffic[(traffic['County'].isin(GA_counties)) & 
                 (traffic['State'] == 'Georgia') & (traffic['Year'].isin(BroodVIyears))]

#South Carolina
SC2017 = traffic[(traffic['County'].isin(SC_counties)) & 
                 (traffic['State'] == 'South Carolina') & (traffic['Year'].isin(BroodVIyears))]

#North Carolina
NC2017 = traffic[(traffic['County'].isin(NC_counties)) & 
                 (traffic['State'] == 'North Carolina') & (traffic['Year'].isin(BroodVIyears))]

In [162]:
##Brood VII / 2018 emergence

#New York
NY2018 = traffic[(traffic['County'].isin(NY_counties_2018)) & 
                 (traffic['State'] == 'New York') & (traffic['Year'].isin(BroodVIIyears))]

In [163]:
##Brood VIII / 2019 emergence

#Pennsylvania
PA2019 = traffic[(traffic['County'].isin(PA_counties_2019)) & 
                 (traffic['State'] == 'Pennsylvania') & (traffic['Year'].isin(BroodVIIIyears))]
#Ohio
OH2019 = traffic[(traffic['County'].isin(OH_counties_2019)) & 
                 (traffic['State'] == 'Ohio') & (traffic['Year'].isin(BroodVIIIyears))]

#West Virginia
WV2019 = traffic[(traffic['County'].isin(WV_counties_2019)) & 
                 (traffic['State'] == 'West Virgina') & (traffic['Year'].isin(BroodVIIIyears))]

# Creating lists of States and Cicada Counties

### 2015: Brood XXIII

In [7]:
#Alabama (Brood XXIII in 2015)
AL_counties = ['Colbert', 'Lauderdale']

In [35]:
#Arkansas (Brood XXIII in 2015)
AR_counties = ['Cross',
    'Grant',
    'Lonoke',
    'Monroe',
    'Poinsett',
    'Prairie',
    'Pulaksi',
    'Saline',
    'St. Francis']

In [36]:
#Illinois (Brood XXIII in 2015)
IL_counties = ['Massac',
'Alexander',
'Union',
'Johnson',
'Pulaski',
'Williamson',
'Jackson',
'Franklin',
'Perry',
'Randolph',
'Monroe',
'St Clair',
'Washington',
'Clinton',
'De Witt',
'Crawford',
'Lawrence',
'Wabash',
'White']

In [44]:
#Indiana (Brood XXIII in 2015)
IN_counties = ['Clay',
'Gibson',
'Knox',
'Pike',
'Posey',
'Spencer',
'Sullivan',
'Vanderburgh',
'Warrick']

In [75]:
#Iowa counties (Brood IV in 2015)
IA_counties = ['Adair',
'Fremont',
'Mills',
'Montgomery',
'Page',
'Pottawattamie']

In [58]:
#Kansas (Brood IV in 2015)
KS_counties = ['Anderson',
'Bourbon',
'Brown',
'Bulter',
'Chase',
'Chautauqua',
'Cherokee',
'Cowley',
'Crawford',
'Dickinson',
'Douglas',
'Ellsworth',
'Geary',
'Greenwood',
'Harper',
'Harvey',
'Johnson',
'Labette',
'Leavenworth',
'Linn',
'Lyon',
'Marion',
'Marshall',
'McPherson',
'Miami',
'Montgomery',
'Morris',
'Nemaha',
'Neosho',
'Oasge',
'Ottawa',
'Pottawatomie',
'Riley',
'Saline',
'Sedgwick',
'Shawnee',
'Sumner',
'Wabaunsee',
'Wilson',
'Wyandotte']

In [59]:
#Kentucky (Brood VVIII in 2015)
KY_counties = ['Ballard',
'Calloway',
'Carlisle',
'Daviess',
'Graves',
'Henderson',
'Livingston',
'Marshall',
'McCraken',
'Trigg']

In [60]:
#Louisiana (Brood VVIII in 2015)
LA_counties = ['Caldwell',
'Grant',
'Madison',
'Morehouse',
'Ouachita',
'Rapides',
'Richland']

In [78]:
#Mississippi counties (Brood VVIII in 2015)
MS_counties = ['Alcorn',
'Amite',
'Attala',
'Benton',
'Calhoun',
'Carroll',
'Chickasaw',
'Choctaw',
'Copiah',
'Franklin',
'Grenada',
'Hinds',
'Holmes',
'Itawamba',
'Lafayette',
'Leake',
'Lee',
'Lincoln',
'Madison',
'Montgomery',
'Panola',
'Pike',
'Pontotoc',
'Prentiss',
'Rankin',
'Scott',
'Simpson',
'Tippah',
'Tishomingo',
'Webster',
'Yalobusha']

In [77]:
#Missouri counties (Brood IV in 2015)
MO_counties_B4 = ['Andrew',
'Atchison',
'Bates',
'Buchanan',
'Caldwell',
'Cass',
'Clay',
'Clinton',
'Daviess',
'Dekalb',
'Gentry',
'Grundy',
'Harrison',
'Henry',
'Holt',
'Jackson',
'Johnson',
'Lafayette',
'Nodaway',
'Platte',
'St Clair',
'Vernon',
'Worth']

In [79]:
#Missouri counties (Brood XXIII in 2015)
MO_counties_B23 = ['Bollinger',
'Cape Girardeau',
'Mississippi',
'Perry',
'Stoddard']

In [80]:
#Nebraska counties (Brood IV in 2015)
NE_counties = ['Cass',
'Douglas',
'Nemaha',
'Richardson',
'Sarpy',
'Washington']

In [65]:
#Oklahoma (Brood IV in 2015)
OK_counties = ['Bryan',
'Canadian',
'Carter',
'Choctaw',
'Comanche',
'Craig',
'Creek',
'Garfield',
'Grant',
'Jefferson',
'Kay',
'Love',
'Marshall',
'Mayes',
'McCurtain',
'Noble',
'Nowata',
'Osage',
'Payne',
'Rogers',
'Stephens',
'Washington']

In [66]:
#Tennessee (Brood XXIII in 2015)
TN_counties = ['Benton',
'Carroll',
'Fayette',
'Gibson',
'Hardin',
'Haywood',
'Henderson',
'Henry',
'Henry',
'Lauderdale',
'Lewis',
'Madison',
'McKenzie',
'Shelby',
'Tipton',
'Wayne']

In [67]:
#Texas (Brood IV in 2015)
TX_counties = ['Delta',
'Denton',
'Fannin',
'Grayson',
'Hunt',
'Lamar',
'Montague',
'Red River',
'Wise']

### 2016: Brood V

In [91]:
#West Virginia (Brood V in 2016)
WV_counties_2016 = ['Barbour',
'Braxton',
'Brooke',
'Calhoun',
'Clay',
'Doddridge',
'Grant',
'Hancock',
'Hardy',
'Harrison',
'Jackson',
'Lewis',
'Marion',
'Monongalia',
'Nicholas',
'Pleasants',
'Preston',
'Preston',
'Roane',
'Taylor',
'Tyler',
'Upshur',
'Webster',
'Wetzel',
'Wood']

In [81]:
#Maryland (Brood V in 2016)
MD_counties = ['Garrett']

In [82]:
#Virginia (Brood V in 2016)
VA_counties = ['Bath',
'Alleghany']

In [83]:
#Pennsylvania (Brood V in 2016)
PA_counties_2016 = ['Allegheny',
'Carbon',
'Fayette',
'Greene',
'Somerset',
'Washington',
'Westmoreland']

In [84]:
#Ohio counties (Brood V in 2016)
OH_counties_2016 = ['Ashland',
'Ashtabula',
'Athens',
'Belmont',
'Columbiana',
'Coshocton',
'Cuyahoga',
'Fairfield',
'Geauga',
'Guernsey',
'Harrison',
'Hocking',
'Huron',
'Jackson',
'Jefferson',
'Knox',
'Lake',
'Licking',
'Lorain',
'Medina',
'Meigs',
'Morgan',
'Muskingum',
'Perry',
'Pickaway',
'Pike',
'Richland',
'Ross',
'Scioto',
'Seneca',
'Summit',
'Tuscarawas',
'Vinton',
'Washington']

In [90]:
#New York counties (Brood V in 2016)
NY_counties_2016 = ['Suffolk']

### 2017: Brood VI

In [97]:
#Georgia (Brood VI in 2017)
GA_counties = ['Rabun']

In [98]:
#South Carolina (Brood VI in 2017)
SC_counties =['Pickens',
'Oconee']

In [134]:
#North Carolina (Brood VI in 2017)
NC_counties = ['Buncombe',
'Burke',
'Caldwell',
'Catawba',
'Henderson',
'McDowell',
'Polk',
'Rutherford',
'Wilkes']

### 2018: Brood VII

In [135]:
#New York counties (Brood VII in 2018)
NY_counties_2018 = ['Cayuga',
'Onondaga',
'Livingston']

### 2019: Brood VIII

In [128]:
#Pennsylvania (Brood VIII in 2019)
PA_counties_2019 = ['Allegheny',
'Armstrong',
'Beaver',
'Bulter',
'Indiana',
'Lawrence',
'Mercer',
'Venango',
'Washington',
'Westmoreland']

In [149]:
#Ohio counties (Brood VIII in 2019)
OH_counties_2019 = ['Carroll',
'Columbiana',
'Jefferson',
'Mahoning',
'Trumbull']

In [130]:
#West Virginia counties (Brood VIII in 2019)
WV_counties_2019 = ['Hancock',
'Brooke']