In [1]:
# Import dependencies
import pandas as pd
from sqlalchemy import create_engine

# Sexually Transmitted Disease Table

In [2]:
# Load sexually transmitted disease (STD) data into DataFrame
STD_fp = "Data/Atlas_STD.csv"
STD_df = pd.read_csv(STD_fp, skiprows=6, thousands=',')
STD_df.head()

Unnamed: 0,Indicator,Year,Geography,FIPS,Cases,Rate per 100000
0,Primary and Secondary Syphilis,2020 (COVID-19 Pandemic),Alabama,1,529,10.8
1,Primary and Secondary Syphilis,2019,Alabama,1,618,12.6
2,Primary and Secondary Syphilis,2018,Alabama,1,477,9.8
3,Primary and Secondary Syphilis,2017,Alabama,1,424,8.7
4,Primary and Secondary Syphilis,2016,Alabama,1,376,7.7


In [3]:
STD_df.dtypes

Indicator           object
Year                object
Geography           object
FIPS                 int64
Cases                int64
Rate per 100000    float64
dtype: object

In [4]:
# Remove unneeded columns (i.e., FIPS)
STD_df = STD_df.drop(columns=['FIPS'])
STD_df.head()

Unnamed: 0,Indicator,Year,Geography,Cases,Rate per 100000
0,Primary and Secondary Syphilis,2020 (COVID-19 Pandemic),Alabama,529,10.8
1,Primary and Secondary Syphilis,2019,Alabama,618,12.6
2,Primary and Secondary Syphilis,2018,Alabama,477,9.8
3,Primary and Secondary Syphilis,2017,Alabama,424,8.7
4,Primary and Secondary Syphilis,2016,Alabama,376,7.7


In [5]:
# Remove " (COVID-19 Pandemic)" from 2020 value
STD_df['Year'] = STD_df['Year'].replace('2020 (COVID-19 Pandemic)', 2020)
STD_df.head()

Unnamed: 0,Indicator,Year,Geography,Cases,Rate per 100000
0,Primary and Secondary Syphilis,2020,Alabama,529,10.8
1,Primary and Secondary Syphilis,2019,Alabama,618,12.6
2,Primary and Secondary Syphilis,2018,Alabama,477,9.8
3,Primary and Secondary Syphilis,2017,Alabama,424,8.7
4,Primary and Secondary Syphilis,2016,Alabama,376,7.7


In [6]:
# Convert Year to an integer
STD_df['Year'] = pd.to_numeric(STD_df['Year'], downcast='integer')

In [7]:
# Keep years before 2017
STD_df = STD_df.loc[STD_df['Year'] < 2017, :]
STD_df.reset_index()
STD_df

Unnamed: 0,Indicator,Year,Geography,Cases,Rate per 100000
4,Primary and Secondary Syphilis,2016,Alabama,376,7.7
5,Primary and Secondary Syphilis,2015,Alabama,280,5.8
6,Primary and Secondary Syphilis,2014,Alabama,161,3.3
7,Primary and Secondary Syphilis,2013,Alabama,183,3.8
8,Primary and Secondary Syphilis,2012,Alabama,216,4.5
...,...,...,...,...,...
5350,Chlamydia,2004,Wyoming,1082,215.2
5351,Chlamydia,2003,Wyoming,960,192.4
5352,Chlamydia,2002,Wyoming,944,190.0
5353,Chlamydia,2001,Wyoming,839,170.2


In [16]:
# Rename columns (Cases: # of STD Cases; Rate per 100000: STD Rate per 100000)
STD_df = STD_df.rename(columns={"Cases": "# of STD Cases", "Rate per 100000": "STD Rate per 100000"})
STD_df.head(25)

Unnamed: 0,Indicator,Year,Geography,# of STD Cases,STD Rate per 100000
4,Primary and Secondary Syphilis,2016,Alabama,376,7.7
5,Primary and Secondary Syphilis,2015,Alabama,280,5.8
6,Primary and Secondary Syphilis,2014,Alabama,161,3.3
7,Primary and Secondary Syphilis,2013,Alabama,183,3.8
8,Primary and Secondary Syphilis,2012,Alabama,216,4.5
9,Primary and Secondary Syphilis,2011,Alabama,228,4.8
10,Primary and Secondary Syphilis,2010,Alabama,260,5.4
11,Primary and Secondary Syphilis,2009,Alabama,417,8.9
12,Primary and Secondary Syphilis,2008,Alabama,449,9.6
13,Primary and Secondary Syphilis,2007,Alabama,380,8.2


# Drug Poisoning Mortality (DPM)

In [9]:
# Load drug poisoning mortality (DPM) data into DataFrame
DPM_fp = "Data/Drug_Poisoning_Mortality_by_State.csv"
DPM_df = pd.read_csv(DPM_fp)
DPM_df.head()

Unnamed: 0,State,Year,Sex,Age Group,Race and Hispanic Origin,Deaths,Population,Crude Death Rate,Standard Error for Crude Rate,Lower Confidence Limit for Crude Rate,Upper Confidence Limit for Crude Rate,Age-adjusted Rate,Standard Error for Age-adjusted Rate,Lower Confidence Limit for Age-adjusted Rate,Upper Confidence Limit for Age-adjusted Rate,State Crude Rate in Range,US Crude Rate,US Age-adjusted Rate,Unit
0,Alabama,1999,Both Sexes,All Ages,All Races-All Origins,169,4430143,3.8148,0.29344,3.2396,4.3899,3.8521,0.29657,3.2708,4.4334,1.8–7.1,6.0382,6.057,"per 100,000 population"
1,Alabama,2000,Both Sexes,All Ages,All Races-All Origins,197,4447100,4.4299,0.31561,3.8112,5.0485,4.4857,0.31985,3.8588,5.1126,1.8–7.1,6.1882,6.1749,"per 100,000 population"
2,Alabama,2001,Both Sexes,All Ages,All Races-All Origins,216,4467634,4.8348,0.32896,4.19,5.4795,4.8915,0.33329,4.2382,5.5447,1.8–7.1,6.8057,6.7922,"per 100,000 population"
3,Alabama,2002,Both Sexes,All Ages,All Races-All Origins,211,4480089,4.7097,0.32423,4.0742,5.3452,4.7619,0.32868,4.1177,5.4062,1.8–7.1,8.1766,8.1957,"per 100,000 population"
4,Alabama,2003,Both Sexes,All Ages,All Races-All Origins,197,4503491,4.3744,0.31166,3.7635,4.9852,4.4333,0.31701,3.812,5.0547,1.8–7.1,8.8881,8.8765,"per 100,000 population"


In [10]:
DPM_df.groupby(['State']).count()

Unnamed: 0_level_0,Year,Sex,Age Group,Race and Hispanic Origin,Deaths,Population,Crude Death Rate,Standard Error for Crude Rate,Lower Confidence Limit for Crude Rate,Upper Confidence Limit for Crude Rate,Age-adjusted Rate,Standard Error for Age-adjusted Rate,Lower Confidence Limit for Age-adjusted Rate,Upper Confidence Limit for Age-adjusted Rate,State Crude Rate in Range,US Crude Rate,US Age-adjusted Rate,Unit
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Alabama,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
Alaska,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
Arizona,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
Arkansas,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
California,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
Colorado,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
Connecticut,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
Delaware,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
District of Columbia,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18
Florida,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18


In [11]:
# Clean up DataFrames
DPM_df['Year'].min()

1999

In [12]:
# Remove unneeded columns

# Remove years before 2000 (i.e., 1999)

In [13]:
# Load DataFrames into Postgres


In [14]:
# Connect to Postgres
