In [1]:
# Import Dependencies
import pandas as pd
from sqlalchemy import create_engine
import psycopg2


In [2]:
overdose_df = pd.read_csv('overdoses.csv')
overdose_df.head()

Unnamed: 0,State,Population,Deaths,Abbrev
0,Alabama,4833722,723,AL
1,Alaska,735132,124,AK
2,Arizona,6626624,1211,AZ
3,Arkansas,2959373,356,AR
4,California,38332521,4521,CA


In [3]:
prescriber_df = pd.read_csv('prescriber_clean.csv')
prescriber_df.head()

Unnamed: 0,State,Gender,Specialty
0,TX,M,Dentist
1,AL,F,General Surgery
2,NY,F,General Practice
3,AZ,M,Internal Medicine
4,NV,M,Hematology/Oncology


In [4]:
age_ajusted_df = pd.read_csv('raw_data.csv')
age_ajusted_df.head()

Unnamed: 0,Location,Opioid Overdose Death Rate (Age-Adjusted),All Drug Overdose Death Rate (Age-Adjusted),Percent Change in Opioid Overdose Death Rate from Prior Year,Percent Change in All Drug Overdose Death Rate from Prior Year
0,United States,9.0,14.7,0.14,0.07
1,Alabama,5.6,15.2,0.6,0.2
2,Alaska,10.6,16.8,0.15,0.17
3,Arizona,8.8,18.2,0.07,-0.03
4,Arkansas,6.3,12.6,0.13,0.14


In [5]:
# Create new data with select columns
prescriber_info_file_df = prescriber_df[['State','Gender', 'Specialty']]
prescriber_info_file_df.head()

Unnamed: 0,State,Gender,Specialty
0,TX,M,Dentist
1,AL,F,General Surgery
2,NY,F,General Practice
3,AZ,M,Internal Medicine
4,NV,M,Hematology/Oncology


In [6]:
#removing summary row
age_adjusted_rate_df = age_ajusted_df.iloc[1:]
age_adjusted_rate_df.head()

Unnamed: 0,Location,Opioid Overdose Death Rate (Age-Adjusted),All Drug Overdose Death Rate (Age-Adjusted),Percent Change in Opioid Overdose Death Rate from Prior Year,Percent Change in All Drug Overdose Death Rate from Prior Year
1,Alabama,5.6,15.2,0.6,0.2
2,Alaska,10.6,16.8,0.15,0.17
3,Arizona,8.8,18.2,0.07,-0.03
4,Arkansas,6.3,12.6,0.13,0.14
5,California,5.0,11.1,0.02,0.0


In [7]:
#Check for the shape of the DataFrame
overdose_df.shape

(50, 4)

In [8]:
prescriber_df.shape

(25000, 3)

In [9]:
#Check for duplicates
overdose_df = overdose_df.drop_duplicates(subset=None, keep='first', inplace= False)

In [10]:
#Check for null values
overdose_df.isnull().values.any()

False

In [11]:
#Check for null values
prescriber_df.isnull().values.any()

False

In [12]:
overdose_df.head()

Unnamed: 0,State,Population,Deaths,Abbrev
0,Alabama,4833722,723,AL
1,Alaska,735132,124,AK
2,Arizona,6626624,1211,AZ
3,Arkansas,2959373,356,AR
4,California,38332521,4521,CA


In [13]:
overdose_df_renamed = overdose_df.rename(columns={'State': 'state_name', 'Population': 'population','Deaths': 'deaths', 'Abbrev': 'abbrev'})
overdose_df_renamed.head()

Unnamed: 0,state_name,population,deaths,abbrev
0,Alabama,4833722,723,AL
1,Alaska,735132,124,AK
2,Arizona,6626624,1211,AZ
3,Arkansas,2959373,356,AR
4,California,38332521,4521,CA


In [14]:
prescriber_df_renamed = prescriber_df.rename(columns={'State': 'state_name', 'Gender': 'gender', 'Specialty': 'specialty' })
prescriber_df_renamed.head()

Unnamed: 0,state_name,gender,specialty
0,TX,M,Dentist
1,AL,F,General Surgery
2,NY,F,General Practice
3,AZ,M,Internal Medicine
4,NV,M,Hematology/Oncology


In [29]:
age_adjusted_rate_df_renamed = age_adjusted_rate_df.rename(columns={'Location': 'location','Opioid Overdose Death Rate (Age-Adjusted)': 'opioid_death_rate','All Drug Overdose Death Rate (Age-Adjusted)': 'all_overdose_rate','Percent Change in Opioid Overdose Death Rate from Prior Year' :'opioid_percent_change','Percent Change in All Drug Overdose Death Rate from Prior Year': 'overdose_percent_change' })
age_adjusted_rate_df_renamed.head()

Unnamed: 0,location,opioid_death_rate,all_overdose_rate,opioid_percent_change,overdose_percent_change
1,Alabama,5.6,15.2,0.6,0.2
2,Alaska,10.6,16.8,0.15,0.17
3,Arizona,8.8,18.2,0.07,-0.03
4,Arkansas,6.3,12.6,0.13,0.14
5,California,5.0,11.1,0.02,0.0


In [16]:
from confi import pos_password

# Use your own username and password in the following code accordingly
# rds_connection_string = "<insert user name>:<insert password>@localhost:5432/customer_db"

rds_connection_string = (f"postgres:{pos_password}@localhost:5432/opiods")
engine = create_engine(f'postgresql://{rds_connection_string}')

In [17]:
engine.table_names()

['states', 'overdose', 'prescriber', 'age_ajusted_rate']

In [27]:
age_adjusted_rate_df = age_adjusted_rate_df.dropna()

In [22]:
overdose_df_renamed.to_sql(name='overdose', con=engine, if_exists='append', index=False)

In [18]:
prescriber_df_renamed.to_sql(name='prescriber', con=engine, if_exists='append', index=False)

In [30]:
age_adjusted_rate_df_renamed.to_sql(name='age_ajusted_rate', con=engine, if_exists='append', index=False)