In [1]:
import pandas as pd
import numpy as np
pd.options.mode.copy_on_write = True

1999-2020 Homicide data from: https://wonder.cdc.gov/ucd-icd10.html

In [2]:
hom_1999_2010 = pd.read_csv('Underlying Cause of Death, 1999-2010.txt',sep='\t', dtype = {'County Code': str}, usecols=['Year', 'County','County Code','Deaths','Population','Crude Rate'])
hom_1999_2010 = hom_1999_2010.dropna()
hom_1999_2010.Year = hom_1999_2010.Year.astype('int64')
hom_1999_2010

Unnamed: 0,Year,County,County Code,Deaths,Population,Crude Rate
0,1999,"Autauga County, AL",01001,Suppressed,42963,Suppressed
1,1999,"Baldwin County, AL",01003,Suppressed,137555,Suppressed
2,1999,"Barbour County, AL",01005,Suppressed,28866,Suppressed
3,1999,"Bibb County, AL",01007,Suppressed,20560,Suppressed
4,1999,"Blount County, AL",01009,Suppressed,50237,Suppressed
...,...,...,...,...,...,...
37770,2010,"Sweetwater County, WY",56037,Suppressed,43806,Suppressed
37771,2010,"Teton County, WY",56039,0,21294,Unreliable
37772,2010,"Uinta County, WY",56041,0,21118,Unreliable
37773,2010,"Washakie County, WY",56043,0,8533,Unreliable


In [3]:
hom_2011_2020 = pd.read_csv('Underlying Cause of Death, 2011-2020.txt',sep='\t', dtype = {'County Code': str}, usecols=['Year', 'County','County Code','Deaths','Population','Crude Rate'])
hom_2011_2020 = hom_2011_2020.dropna()
hom_2011_2020.Year = hom_2011_2020.Year.astype('int64')
hom_2011_2020

Unnamed: 0,Year,County,County Code,Deaths,Population,Crude Rate
0,2011,"Autauga County, AL",01001,Suppressed,55267,Suppressed
1,2011,"Baldwin County, AL",01003,Suppressed,186717,Suppressed
2,2011,"Barbour County, AL",01005,Suppressed,27119,Suppressed
3,2011,"Bibb County, AL",01007,Suppressed,22766,Suppressed
4,2011,"Blount County, AL",01009,Suppressed,57677,Suppressed
...,...,...,...,...,...,...
31474,2020,"Sweetwater County, WY",56037,Suppressed,42673,Suppressed
31475,2020,"Teton County, WY",56039,0,23497,Unreliable
31476,2020,"Uinta County, WY",56041,0,20215,Unreliable
31477,2020,"Washakie County, WY",56043,0,7760,Unreliable


2021 homicide data from: https://wonder.cdc.gov/ucd-icd10-expanded.html

In [4]:
hom_2021 = pd.read_csv('Underlying Cause of Death, 2021 county.txt',sep='\t', dtype = {'County Code': str}, usecols=['County','County Code','Deaths','Population','Crude Rate'])
hom_2021 = hom_2021.dropna()
hom_2021.insert(0, 'Year', 2021)
hom_2021

Unnamed: 0,Year,County,County Code,Deaths,Population,Crude Rate
0,2021,"Autauga County, AL",01001,Suppressed,59095.0,Suppressed
1,2021,"Baldwin County, AL",01003,10,239294.0,Unreliable
2,2021,"Barbour County, AL",01005,Suppressed,24964.0,Suppressed
3,2021,"Bibb County, AL",01007,Suppressed,22477.0,Suppressed
4,2021,"Blount County, AL",01009,Suppressed,59041.0,Suppressed
...,...,...,...,...,...,...
3137,2021,"Sweetwater County, WY",56037,0,41614.0,Unreliable
3138,2021,"Teton County, WY",56039,0,23575.0,Unreliable
3139,2021,"Uinta County, WY",56041,0,20635.0,Unreliable
3140,2021,"Washakie County, WY",56043,0,7705.0,Unreliable


In [5]:
df = pd.concat([hom_1999_2010,hom_2011_2020,hom_2021])
df

Unnamed: 0,Year,County,County Code,Deaths,Population,Crude Rate
0,1999,"Autauga County, AL",01001,Suppressed,42963,Suppressed
1,1999,"Baldwin County, AL",01003,Suppressed,137555,Suppressed
2,1999,"Barbour County, AL",01005,Suppressed,28866,Suppressed
3,1999,"Bibb County, AL",01007,Suppressed,20560,Suppressed
4,1999,"Blount County, AL",01009,Suppressed,50237,Suppressed
...,...,...,...,...,...,...
3137,2021,"Sweetwater County, WY",56037,0,41614.0,Unreliable
3138,2021,"Teton County, WY",56039,0,23575.0,Unreliable
3139,2021,"Uinta County, WY",56041,0,20635.0,Unreliable
3140,2021,"Washakie County, WY",56043,0,7705.0,Unreliable


In [6]:
#Rename columns
df.columns = ['date','county_state','FIPS','homicides','population','homicides_per_100k']
#Split county_state to have access to both county name and state name separately
df[['county', 'state_abbr']] = df['county_state'].str.rsplit(', ', n=1, expand=True)
df.head()

Unnamed: 0,date,county_state,FIPS,homicides,population,homicides_per_100k,county,state_abbr
0,1999,"Autauga County, AL",1001,Suppressed,42963,Suppressed,Autauga County,AL
1,1999,"Baldwin County, AL",1003,Suppressed,137555,Suppressed,Baldwin County,AL
2,1999,"Barbour County, AL",1005,Suppressed,28866,Suppressed,Barbour County,AL
3,1999,"Bibb County, AL",1007,Suppressed,20560,Suppressed,Bibb County,AL
4,1999,"Blount County, AL",1009,Suppressed,50237,Suppressed,Blount County,AL


In [7]:
#Remove rows with no homicide data
df = df.loc[~(df.homicides.str.contains('Missing'))]

In [8]:
#Homicide values 1-10 are suppressed by the CDC, so I replace them with 0 here to allow the data type to be changed
df.homicides = df.homicides.replace('Suppressed',0)
df.homicides = df.homicides.astype('int64')
df.population = df.population.astype('int64')
df

Unnamed: 0,date,county_state,FIPS,homicides,population,homicides_per_100k,county,state_abbr
0,1999,"Autauga County, AL",01001,0,42963,Suppressed,Autauga County,AL
1,1999,"Baldwin County, AL",01003,0,137555,Suppressed,Baldwin County,AL
2,1999,"Barbour County, AL",01005,0,28866,Suppressed,Barbour County,AL
3,1999,"Bibb County, AL",01007,0,20560,Suppressed,Bibb County,AL
4,1999,"Blount County, AL",01009,0,50237,Suppressed,Blount County,AL
...,...,...,...,...,...,...,...,...
3137,2021,"Sweetwater County, WY",56037,0,41614,Unreliable,Sweetwater County,WY
3138,2021,"Teton County, WY",56039,0,23575,Unreliable,Teton County,WY
3139,2021,"Uinta County, WY",56041,0,20635,Unreliable,Uinta County,WY
3140,2021,"Washakie County, WY",56043,0,7705,Unreliable,Washakie County,WY


In [9]:
#Dictionary for updating FIPS and county names so they are recognized by Tableau.
#See here for details: https://github.com/spearitual/Updated-FIPS-codes-for-Tableau 
FIPS_swapper = {'51540': '51003',
 '51580': '51005',
 '51820': '51015',
 '51680': '51031',
 '51640': '51035',
 '51730': '51053',
 '51610': '51059',
 '51840': '51069',
 '51595': '51081',
 '51690': '51089',
 '51830': '51095',
 '15005': '15009',
 '51750': '51121',
 '51590': '51143',
 '51670': '51149',
 '51685': '51153',
 '51775': '51161',
 '51678': '51163',
 '51660': '51165',
 '51620': '51175',
 '51630': '51177',
 '51520': '51191',
 '51720': '51195',
 '51735': '51199',
 '51790': '51015',
 '51570': '51053',
 '51600': '51059',
 '51683': '51153',
 '51530': '51163',
 '09013': '09170',
 '09001': '09190',
 '02010': '02016',
 '02201': '02198',
 '02231': '02105',
 '02232': '02105',
 '02280': '02195',
 '02901': '02016',
 '02903': '02105',
 '02904': '02185',
 '02905': '02050',
 '02907': '02164',
 '02908': '02066',
 '02910': '02100',
 '02912': '02122',
 '02916': '02290',
 '02919': '02130',
 '02920': '02198',
 '02921': '02122',
 '02922': '02220',
 '02923': '02282',
 '02924': '02240',
 '02925': '02290',
 '02926': '02063',
 '02928': '02275',
 '02929': '02290',
 '15901': '15009',
 '51901': '51003',
 '51903': '51005',
 '51560': '51005',
 '51907': '51015',
 '51911': '51031',
 '51913': '51035',
 '51918': '51053',
 '51919': '51059',
 '51921': '51069',
 '51923': '51081',
 '51929': '51089',
 '51931': '51095',
 '51933': '51121',
 '51939': '51143',
 '51941': '51149',
 '51942': '51153',
 '51944': '51161',
 '51945': '51163',
 '51947': '51165',
 '51949': '51175',
 '51951': '51177',
 '51953': '51191',
 '51955': '51195',
 '51958': '51199',
 '55901': '55115',
 '02270': '02158',
 '46113': '46102',
 '51515': '51019'}
name_swapper = {'Charlottesville city': 'Albemarle + Charlottesville',
 'Covington city': 'Alleghany + Covington',
 'Waynesboro city': 'Augusta, Staunton + Waynesboro',
 'Lynchburg city': 'Campbell + Lynchburg',
 'Galax city': 'Carroll + Galax',
 'Petersburg city': 'Dinwiddie, Colonial Heights + Petersburg',
 'Falls Church city': 'Fairfax, Fairfax City + Falls Church',
 'Winchester city': 'Frederick + Winchester',
 'Emporia city': 'Greensville + Emporia',
 'Martinsville city': 'Henry + Martinsville',
 'Williamsburg city': 'James City + Williamsburg',
 'Kalawao County': 'Maui + Kalawao',
 'Radford city': 'Montgomery + Radford',
 'Danville city': 'Pittsylvania + Danville',
 'Hopewell city': 'Prince George + Hopewell',
 'Manassas Park city': 'Prince William, Manassas + Manassas Park',
 'Salem city': 'Roanoke + Salem',
 'Lexington city': 'Rockbridge, Buena Vista + Lexington',
 'Harrisonburg city': 'Rockingham + Harrisonburg',
 'Franklin city': 'Southampton + Franklin',
 'Fredericksburg city': 'Spotsylvania + Fredericksburg',
 'Bristol city': 'Washington + Bristol',
 'Norton city': 'Wise + Norton',
 'Poquoson city': 'York + Poquoson',
 'Staunton city': 'Augusta, Staunton + Waynesboro',
 'Colonial Heights city': 'Dinwiddie, Colonial Heights + Petersburg',
 'Fairfax city': 'Fairfax, Fairfax City + Falls Church',
 'Manassas city': 'Prince William, Manassas + Manassas Park',
 'Buena Vista city': 'Rockbridge, Buena Vista + Lexington',
 'Clifton Forge city':'Alleghany + Covington',
 'Albemarle County': 'Albemarle + Charlottesville',
 'Alleghany County': 'Alleghany + Covington',
 'Augusta County': 'Augusta, Staunton + Waynesboro',
 'Campbell County': 'Campbell + Lynchburg',
 'Carroll County': 'Carroll + Galax',
 'Dinwiddie County': 'Dinwiddie, Colonial Heights + Petersburg',
 'Fairfax County': 'Fairfax, Fairfax City + Falls Church',
 'Frederick County': 'Frederick + Winchester',
 'Greensville County': 'Greensville + Emporia',
 'Henry County': 'Henry + Martinsville',
 'James City County': 'James City + Williamsburg',
 'Maui County': 'Maui + Kalawao',
 'Montgomery County': 'Montgomery + Radford',
 'Pittsylvania County': 'Pittsylvania + Danville',
 'Prince George County': 'Prince George + Hopewell',
 'Prince William County': 'Prince William, Manassas + Manassas Park',
 'Roanoke County': 'Roanoke + Salem',
 'Rockbridge County': 'Rockbridge, Buena Vista + Lexington',
 'Rockingham County': 'Rockingham + Harrisonburg',
 'Southampton County': 'Southampton + Franklin',
 'Spotsylvania County': 'Spotsylvania + Fredericksburg',
 'Washington County': 'Washington + Bristol',
 'Wise County': 'Wise + Norton',
 'York County': 'York + Poquoson',
 'Yukon-Koyukuk Division':'Yukon-Koyukuk Census Area',
 'Upper Yukon Division':'Yukon-Koyukuk Census Area',
 'Kuskokwim Division':'Yukon-Koyukuk Census Area',
 'Kenai-Cook Inlet Division':'Kenai Peninsula Borough',
 'Seward Division':'Kenai Peninsula Borough',
 'Bedford city':'Bedford County'   ,
 'Skagway-Hoonah-Angoon Census Area':'Hoonah-Angoon Census Area',
 'Wrangell-Petersburg Census Area':'Petersburg Borough/Census Area',
 'Prince of Wales-Outer Ketchikan Census Area':'Prince of Wales-Hyder Census Area',
 'Wade Hampton Census Area':'Kusilvak Census Area'
           }

In [10]:
#Update FIPS
for old_FIPS, new_FIPS in FIPS_swapper.items():
    df['FIPS'] = df.FIPS.replace(old_FIPS, new_FIPS)
for old_name, new_name in name_swapper.items():
    df['county'] = df.county.replace(old_name, new_name)
df.head()

Unnamed: 0,date,county_state,FIPS,homicides,population,homicides_per_100k,county,state_abbr
0,1999,"Autauga County, AL",1001,0,42963,Suppressed,Autauga County,AL
1,1999,"Baldwin County, AL",1003,0,137555,Suppressed,Baldwin County,AL
2,1999,"Barbour County, AL",1005,0,28866,Suppressed,Barbour County,AL
3,1999,"Bibb County, AL",1007,0,20560,Suppressed,Bibb County,AL
4,1999,"Blount County, AL",1009,0,50237,Suppressed,Blount County,AL


In [11]:
#Combine data for combined counties
df = df.groupby(['FIPS','date','county','state_abbr']).agg('sum').reset_index()
df.head()

Unnamed: 0,FIPS,date,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,1001,1999,Autauga County,AL,"Autauga County, AL",0,42963,Suppressed
1,1001,2000,Autauga County,AL,"Autauga County, AL",0,43671,Suppressed
2,1001,2001,Autauga County,AL,"Autauga County, AL",0,44889,Suppressed
3,1001,2002,Autauga County,AL,"Autauga County, AL",0,45909,Suppressed
4,1001,2003,Autauga County,AL,"Autauga County, AL",0,46800,Suppressed


In [12]:
#Update CT fips only after 2020
FIPS_swapper = {'09005':'09160',
               '09009':'09140',
               '09003':'09110',
               '09007':'09130',
               '09015':'09150',
               '09011':'09180'}
for old_FIPS, new_FIPS in FIPS_swapper.items():
    df.loc[(df['date'] >= 2020) & (df['FIPS'] == old_FIPS), "FIPS"] = new_FIPS

In [13]:
#Recreate county_state, so new names are included correctly
df['county_state'] = df.county + ', ' + df.state_abbr
df.head()

Unnamed: 0,FIPS,date,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,1001,1999,Autauga County,AL,"Autauga County, AL",0,42963,Suppressed
1,1001,2000,Autauga County,AL,"Autauga County, AL",0,43671,Suppressed
2,1001,2001,Autauga County,AL,"Autauga County, AL",0,44889,Suppressed
3,1001,2002,Autauga County,AL,"Autauga County, AL",0,45909,Suppressed
4,1001,2003,Autauga County,AL,"Autauga County, AL",0,46800,Suppressed


In [14]:
#Replace suppressed and unreliable rates with rates based on homicide
df['homicides_per_100k'] = round(df.homicides/(df.population/100000),1)
#Made sure any rate based on a value 20 or less is not reported
df.loc[(df.homicides <= 20), "homicides_per_100k"] = np.nan
df.head()

Unnamed: 0,FIPS,date,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,1001,1999,Autauga County,AL,"Autauga County, AL",0,42963,
1,1001,2000,Autauga County,AL,"Autauga County, AL",0,43671,
2,1001,2001,Autauga County,AL,"Autauga County, AL",0,44889,
3,1001,2002,Autauga County,AL,"Autauga County, AL",0,45909,
4,1001,2003,Autauga County,AL,"Autauga County, AL",0,46800,


In [15]:
#Make sure no homicide value below 10 is included in the dataset, as required by law
df.loc[(df.homicides <= 9), "homicides"] = np.nan
df.head()

Unnamed: 0,FIPS,date,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,1001,1999,Autauga County,AL,"Autauga County, AL",,42963,
1,1001,2000,Autauga County,AL,"Autauga County, AL",,43671,
2,1001,2001,Autauga County,AL,"Autauga County, AL",,44889,
3,1001,2002,Autauga County,AL,"Autauga County, AL",,45909,
4,1001,2003,Autauga County,AL,"Autauga County, AL",,46800,


In [16]:
#Make date consistent with other datasets
df.date = df.date.astype(str)+'-07-01'
df.head()

Unnamed: 0,FIPS,date,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,01001,1999-07-01,Autauga County,AL,"Autauga County, AL",,42963,
1,01001,2000-07-01,Autauga County,AL,"Autauga County, AL",,43671,
2,01001,2001-07-01,Autauga County,AL,"Autauga County, AL",,44889,
3,01001,2002-07-01,Autauga County,AL,"Autauga County, AL",,45909,
4,01001,2003-07-01,Autauga County,AL,"Autauga County, AL",,46800,
...,...,...,...,...,...,...,...,...
71556,56045,2017-07-01,Weston County,WY,"Weston County, WY",,6927,
71557,56045,2018-07-01,Weston County,WY,"Weston County, WY",,6967,
71558,56045,2019-07-01,Weston County,WY,"Weston County, WY",,6927,
71559,56045,2020-07-01,Weston County,WY,"Weston County, WY",,6743,


In [17]:
#Export to csv
#df.to_csv('county_homicides_1999-2021_annual.csv', index=False)