In [1]:
import pandas as pd
import numpy as np

1999-2020 Homicide data from: https://wonder.cdc.gov/ucd-icd10.html

In [2]:
df = pd.read_csv('Underlying Cause of Death, 1999-2020, county totals.txt',sep='\t', dtype = {'County Code': str}, usecols=['County','County Code','Deaths','Population','Crude Rate'])
df = df.dropna()
df

Unnamed: 0,County,County Code,Deaths,Population,Crude Rate
0,"Autauga County, AL",01001,60,1143294.0,5.2
1,"Baldwin County, AL",01003,151,3987384.0,3.8
2,"Barbour County, AL",01005,53,599031.0,8.8
3,"Bibb County, AL",01007,42,487078.0,8.6
4,"Blount County, AL",01009,67,1229334.0,5.5
...,...,...,...,...,...
3142,"Sweetwater County, WY",56037,26,915894.0,2.8
3143,"Teton County, WY",56039,Suppressed,463622.0,Suppressed
3144,"Uinta County, WY",56041,Suppressed,446562.0,Suppressed
3145,"Washakie County, WY",56043,Suppressed,179874.0,Suppressed


In [3]:
#Rename columns
df.columns = ['county_state','FIPS','homicides','population','homicides_per_100k']
#Split county and state names to refer to each independently
df[['county', 'state_abbr']] = df['county_state'].str.rsplit(', ', n=1, expand=True)
df.head()

Unnamed: 0,county_state,FIPS,homicides,population,homicides_per_100k,county,state_abbr
0,"Autauga County, AL",01001,60,1143294.0,5.2,Autauga County,AL
1,"Baldwin County, AL",01003,151,3987384.0,3.8,Baldwin County,AL
2,"Barbour County, AL",01005,53,599031.0,8.8,Barbour County,AL
3,"Bibb County, AL",01007,42,487078.0,8.6,Bibb County,AL
4,"Blount County, AL",01009,67,1229334.0,5.5,Blount County,AL
...,...,...,...,...,...,...,...
3142,"Sweetwater County, WY",56037,26,915894.0,2.8,Sweetwater County,WY
3143,"Teton County, WY",56039,Suppressed,463622.0,Suppressed,Teton County,WY
3144,"Uinta County, WY",56041,Suppressed,446562.0,Suppressed,Uinta County,WY
3145,"Washakie County, WY",56043,Suppressed,179874.0,Suppressed,Washakie County,WY


In [4]:
#Remove rows with "Missing" as these are placeholders for counties that did not exist at the time
df = df.loc[~(df.homicides.str.contains('Missing'))]

In [5]:
#Homicide values 1-10 are suppressed by the CDC, so I replace them with 0 here to allow the data type to be changed
df.homicides = df.homicides.replace('Suppressed',0)
df.homicides = df.homicides.astype('int64')
df.population = df.population.astype('int64')
df

Unnamed: 0,county_state,FIPS,homicides,population,homicides_per_100k,county,state_abbr
0,"Autauga County, AL",01001,60,1143294,5.2,Autauga County,AL
1,"Baldwin County, AL",01003,151,3987384,3.8,Baldwin County,AL
2,"Barbour County, AL",01005,53,599031,8.8,Barbour County,AL
3,"Bibb County, AL",01007,42,487078,8.6,Bibb County,AL
4,"Blount County, AL",01009,67,1229334,5.5,Blount County,AL
...,...,...,...,...,...,...,...
3142,"Sweetwater County, WY",56037,26,915894,2.8,Sweetwater County,WY
3143,"Teton County, WY",56039,0,463622,Suppressed,Teton County,WY
3144,"Uinta County, WY",56041,0,446562,Suppressed,Uinta County,WY
3145,"Washakie County, WY",56043,0,179874,Suppressed,Washakie County,WY


In [6]:
#Dictionary for updating FIPS and county names so they are recognized by Tableau.
#See here for details: https://github.com/spearitual/Updated-FIPS-codes-for-Tableau 
FIPS_swapper = {'51540': '51003',
 '51580': '51005',
 '51820': '51015',
 '51680': '51031',
 '51640': '51035',
 '51730': '51053',
 '51610': '51059',
 '51840': '51069',
 '51595': '51081',
 '51690': '51089',
 '51830': '51095',
 '15005': '15009',
 '51750': '51121',
 '51590': '51143',
 '51670': '51149',
 '51685': '51153',
 '51775': '51161',
 '51678': '51163',
 '51660': '51165',
 '51620': '51175',
 '51630': '51177',
 '51520': '51191',
 '51720': '51195',
 '51735': '51199',
 '51790': '51015',
 '51570': '51053',
 '51600': '51059',
 '51683': '51153',
 '51530': '51163',
 '09013': '09170',
 '09001': '09190',
 '02010': '02016',
 '02201': '02198',
 '02231': '02105',
 '02232': '02105',
 '02280': '02195',
 '02901': '02016',
 '02903': '02105',
 '02904': '02185',
 '02905': '02050',
 '02907': '02164',
 '02908': '02066',
 '02910': '02100',
 '02912': '02122',
 '02916': '02290',
 '02919': '02130',
 '02920': '02198',
 '02921': '02122',
 '02922': '02220',
 '02923': '02282',
 '02924': '02240',
 '02925': '02290',
 '02926': '02063',
 '02928': '02275',
 '02929': '02290',
 '15901': '15009',
 '51901': '51003',
 '51903': '51005',
 '51560': '51005',
 '51907': '51015',
 '51911': '51031',
 '51913': '51035',
 '51918': '51053',
 '51919': '51059',
 '51921': '51069',
 '51923': '51081',
 '51929': '51089',
 '51931': '51095',
 '51933': '51121',
 '51939': '51143',
 '51941': '51149',
 '51942': '51153',
 '51944': '51161',
 '51945': '51163',
 '51947': '51165',
 '51949': '51175',
 '51951': '51177',
 '51953': '51191',
 '51955': '51195',
 '51958': '51199',
 '55901': '55115',
 '02270': '02158',
 '46113': '46102',
 '51515': '51019'}
name_swapper = {'Charlottesville city': 'Albemarle + Charlottesville',
 'Covington city': 'Alleghany + Covington',
 'Waynesboro city': 'Augusta, Staunton + Waynesboro',
 'Lynchburg city': 'Campbell + Lynchburg',
 'Galax city': 'Carroll + Galax',
 'Petersburg city': 'Dinwiddie, Colonial Heights + Petersburg',
 'Falls Church city': 'Fairfax, Fairfax City + Falls Church',
 'Winchester city': 'Frederick + Winchester',
 'Emporia city': 'Greensville + Emporia',
 'Martinsville city': 'Henry + Martinsville',
 'Williamsburg city': 'James City + Williamsburg',
 'Kalawao County': 'Maui + Kalawao',
 'Radford city': 'Montgomery + Radford',
 'Danville city': 'Pittsylvania + Danville',
 'Hopewell city': 'Prince George + Hopewell',
 'Manassas Park city': 'Prince William, Manassas + Manassas Park',
 'Salem city': 'Roanoke + Salem',
 'Lexington city': 'Rockbridge, Buena Vista + Lexington',
 'Harrisonburg city': 'Rockingham + Harrisonburg',
 'Franklin city': 'Southampton + Franklin',
 'Fredericksburg city': 'Spotsylvania + Fredericksburg',
 'Bristol city': 'Washington + Bristol',
 'Norton city': 'Wise + Norton',
 'Poquoson city': 'York + Poquoson',
 'Staunton city': 'Augusta, Staunton + Waynesboro',
 'Colonial Heights city': 'Dinwiddie, Colonial Heights + Petersburg',
 'Fairfax city': 'Fairfax, Fairfax City + Falls Church',
 'Manassas city': 'Prince William, Manassas + Manassas Park',
 'Buena Vista city': 'Rockbridge, Buena Vista + Lexington',
 'Clifton Forge city':'Alleghany + Covington',
 'Albemarle County': 'Albemarle + Charlottesville',
 'Alleghany County': 'Alleghany + Covington',
 'Augusta County': 'Augusta, Staunton + Waynesboro',
 'Campbell County': 'Campbell + Lynchburg',
 'Carroll County': 'Carroll + Galax',
 'Dinwiddie County': 'Dinwiddie, Colonial Heights + Petersburg',
 'Fairfax County': 'Fairfax, Fairfax City + Falls Church',
 'Frederick County': 'Frederick + Winchester',
 'Greensville County': 'Greensville + Emporia',
 'Henry County': 'Henry + Martinsville',
 'James City County': 'James City + Williamsburg',
 'Maui County': 'Maui + Kalawao',
 'Montgomery County': 'Montgomery + Radford',
 'Pittsylvania County': 'Pittsylvania + Danville',
 'Prince George County': 'Prince George + Hopewell',
 'Prince William County': 'Prince William, Manassas + Manassas Park',
 'Roanoke County': 'Roanoke + Salem',
 'Rockbridge County': 'Rockbridge, Buena Vista + Lexington',
 'Rockingham County': 'Rockingham + Harrisonburg',
 'Southampton County': 'Southampton + Franklin',
 'Spotsylvania County': 'Spotsylvania + Fredericksburg',
 'Washington County': 'Washington + Bristol',
 'Wise County': 'Wise + Norton',
 'York County': 'York + Poquoson',
 'Yukon-Koyukuk Division':'Yukon-Koyukuk Census Area',
 'Upper Yukon Division':'Yukon-Koyukuk Census Area',
 'Kuskokwim Division':'Yukon-Koyukuk Census Area',
 'Kenai-Cook Inlet Division':'Kenai Peninsula Borough',
 'Seward Division':'Kenai Peninsula Borough',
 'Bedford city':'Bedford County',
 'Skagway-Hoonah-Angoon Census Area':'Hoonah-Angoon Census Area',
 'Wrangell-Petersburg Census Area':'Petersburg Borough/Census Area',
 'Prince of Wales-Outer Ketchikan Census Area':'Prince of Wales-Hyder Census Area',
 'Wade Hampton Census Area':'Kusilvak Census Area'
           }

In [7]:
#Update FIPS
for old_FIPS, new_FIPS in FIPS_swapper.items():
    df['FIPS'] = df.FIPS.replace(old_FIPS, new_FIPS)
for old_name, new_name in name_swapper.items():
    df['county'] = df.county.replace(old_name, new_name)
df.head()

Unnamed: 0,county_state,FIPS,homicides,population,homicides_per_100k,county,state_abbr
0,"Autauga County, AL",01001,60,1143294,5.2,Autauga County,AL
1,"Baldwin County, AL",01003,151,3987384,3.8,Baldwin County,AL
2,"Barbour County, AL",01005,53,599031,8.8,Barbour County,AL
3,"Bibb County, AL",01007,42,487078,8.6,Bibb County,AL
4,"Blount County, AL",01009,67,1229334,5.5,Blount County,AL
...,...,...,...,...,...,...,...
3142,"Sweetwater County, WY",56037,26,915894,2.8,Sweetwater County,WY
3143,"Teton County, WY",56039,0,463622,Suppressed,Teton County,WY
3144,"Uinta County, WY",56041,0,446562,Suppressed,Uinta County,WY
3145,"Washakie County, WY",56043,0,179874,Suppressed,Washakie County,WY


In [8]:
#Combine data for combined counties
df = df.groupby(['FIPS','county','state_abbr']).agg('sum').reset_index()
df.head()

Unnamed: 0,FIPS,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,01001,Autauga County,AL,"Autauga County, AL",60,1143294,5.2
1,01003,Baldwin County,AL,"Baldwin County, AL",151,3987384,3.8
2,01005,Barbour County,AL,"Barbour County, AL",53,599031,8.8
3,01007,Bibb County,AL,"Bibb County, AL",42,487078,8.6
4,01009,Blount County,AL,"Blount County, AL",67,1229334,5.5
...,...,...,...,...,...,...,...
3108,56037,Sweetwater County,WY,"Sweetwater County, WY",26,915894,2.8
3109,56039,Teton County,WY,"Teton County, WY",0,463622,Suppressed
3110,56041,Uinta County,WY,"Uinta County, WY",0,446562,Suppressed
3111,56043,Washakie County,WY,"Washakie County, WY",0,179874,Suppressed


In [10]:
#Recreate county_state, so new names are included correctly
df['county_state'] = df.county + ', ' + df.state_abbr
df

Unnamed: 0,FIPS,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,01001,Autauga County,AL,"Autauga County, AL",60,1143294,5.2
1,01003,Baldwin County,AL,"Baldwin County, AL",151,3987384,3.8
2,01005,Barbour County,AL,"Barbour County, AL",53,599031,8.8
3,01007,Bibb County,AL,"Bibb County, AL",42,487078,8.6
4,01009,Blount County,AL,"Blount County, AL",67,1229334,5.5
...,...,...,...,...,...,...,...
3108,56037,Sweetwater County,WY,"Sweetwater County, WY",26,915894,2.8
3109,56039,Teton County,WY,"Teton County, WY",0,463622,Suppressed
3110,56041,Uinta County,WY,"Uinta County, WY",0,446562,Suppressed
3111,56043,Washakie County,WY,"Washakie County, WY",0,179874,Suppressed


In [11]:
#Replace suppressed and unreliable rates with rates based on homicide
df['homicides_per_100k'] = round(df.homicides/(df.population/100000),1)
#Made sure any rate based on a value 20 or less is not reported
df.loc[(df.homicides <= 20), "homicides_per_100k"] = np.nan
df.head()

Unnamed: 0,FIPS,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,01001,Autauga County,AL,"Autauga County, AL",60,1143294,5.2
1,01003,Baldwin County,AL,"Baldwin County, AL",151,3987384,3.8
2,01005,Barbour County,AL,"Barbour County, AL",53,599031,8.8
3,01007,Bibb County,AL,"Bibb County, AL",42,487078,8.6
4,01009,Blount County,AL,"Blount County, AL",67,1229334,5.5
...,...,...,...,...,...,...,...
3108,56037,Sweetwater County,WY,"Sweetwater County, WY",26,915894,2.8
3109,56039,Teton County,WY,"Teton County, WY",0,463622,0.0
3110,56041,Uinta County,WY,"Uinta County, WY",0,446562,0.0
3111,56043,Washakie County,WY,"Washakie County, WY",0,179874,0.0


In [13]:
#Make sure no homicide value below 10 is included in the dataset, as required by law
df.loc[(df.homicides <= 9), "homicides"] = np.nan
df

Unnamed: 0,FIPS,county,state_abbr,county_state,homicides,population,homicides_per_100k
0,01001,Autauga County,AL,"Autauga County, AL",60.0,1143294,5.2
1,01003,Baldwin County,AL,"Baldwin County, AL",151.0,3987384,3.8
2,01005,Barbour County,AL,"Barbour County, AL",53.0,599031,8.8
3,01007,Bibb County,AL,"Bibb County, AL",42.0,487078,8.6
4,01009,Blount County,AL,"Blount County, AL",67.0,1229334,5.5
...,...,...,...,...,...,...,...
3108,56037,Sweetwater County,WY,"Sweetwater County, WY",26.0,915894,2.8
3109,56039,Teton County,WY,"Teton County, WY",,463622,
3110,56041,Uinta County,WY,"Uinta County, WY",,446562,
3111,56043,Washakie County,WY,"Washakie County, WY",,179874,


In [21]:
#Export to csv
#df.to_csv('county_homicides_1999-2020_avg.csv', index=False)