In [1]:
import pandas as pd
from sqlalchemy import create_engine
from getpass import getpass
import psycopg2

### Storing CSV into DataFrame

In [6]:
csv_file_Full = "Resources/AQI_data.csv"
AQI_data_df = pd.read_csv(csv_file_Full)

### Saving different DataFrames

In [3]:
# Creating dataframe for state_data_table with Index being unique key
state_data_table_df = AQI_data_df[['Date','State','Latitude','Longitude']].reset_index()
state_data_table_df.head()

Unnamed: 0,index,Date,State,Latitude,Longitude
0,0,2020-11-27 05:00:00,AK,63.588753,-154.493062
1,1,2020-11-27 06:00:00,AK,63.588753,-154.493062
2,2,2020-11-27 07:00:00,AK,63.588753,-154.493062
3,3,2020-11-27 08:00:00,AK,63.588753,-154.493062
4,4,2020-11-27 09:00:00,AK,63.588753,-154.493062


In [4]:
# Creating dataframe for AQI_table with Index being unique key
AQI_table_df = AQI_data_df[['Date','State','AQI']].reset_index()
AQI_table_df.head()

Unnamed: 0,index,Date,State,AQI
0,0,2020-11-27 05:00:00,AK,1
1,1,2020-11-27 06:00:00,AK,1
2,2,2020-11-27 07:00:00,AK,1
3,3,2020-11-27 08:00:00,AK,1
4,4,2020-11-27 09:00:00,AK,1


In [5]:
# Creating dataframe for pollutants_data_table with Index being unique key
pollutants_data_table_df = AQI_data_df[['Date','State','CO','NO','NO2','O3','SO2','PM2_5','PM10','NH3']].reset_index()
pollutants_data_table_df.head()

Unnamed: 0,index,Date,State,CO,NO,NO2,O3,SO2,PM2_5,PM10,NH3
0,0,2020-11-27 05:00:00,AK,195.27,0.0,0.03,54.36,0.06,0.5,0.54,0.0
1,1,2020-11-27 06:00:00,AK,196.93,0.0,0.03,56.51,0.06,0.5,0.54,0.0
2,2,2020-11-27 07:00:00,AK,198.6,0.0,0.03,57.94,0.07,0.5,0.54,0.0
3,3,2020-11-27 08:00:00,AK,198.6,0.0,0.03,58.65,0.07,0.5,0.54,0.0
4,4,2020-11-27 09:00:00,AK,198.6,0.0,0.03,58.65,0.07,0.5,0.54,0.0


### Connecting to AWS database

In [None]:
# Creating connection between AWS and PostgreSQL database using SQLAlchemy
engine = create_engine('postgresql://postgres:elodie1756postgres9343%@database-1.cr0qbwwhutcx.us-east-2.rds.amazonaws.com:5432/postgres')

In [None]:
# Creating schema & state_data_table
engine.execute("""CREATE SCHEMA state_data_table_schema
CREATE TABLE state_data_table (
index INTEGER PRIMARY KEY,
Date TEXT,
State TEXT,
Latitude DOUBLE PRECISION,
Longitude DOUBLE PRECISION)""")

In [None]:
# Populating the state_data_table in pgAdmin
state_data_table_df.to_sql(name='state_data_table',schema='state_data_table_schema', con=engine, chunksize=10, if_exists='replace')

In [None]:
# Creating schema & aqi_data_table
engine.execute("""CREATE SCHEMA aqi_data_table_schema
CREATE TABLE aqi_data_table (
index INTEGER PRIMARY KEY,
Date DATE,
State TEXT,
AQI INTEGER)""")

In [None]:
# Populating the aqi_data_table in pgAdmin
AQI_table_df.to_sql(name='aqi_data_table',schema='aqi_data_table_schema', con=engine, chunksize=10, if_exists='replace')

In [None]:
# Creating schema & pollutants_data_table
engine.execute("""CREATE SCHEMA pollutants_data_table_schema
CREATE TABLE pollutants_data_table (
index INTEGER PRIMARY KEY,
Date DATE,
State TEXT,
CO INTEGER,
NO INTEGER,
NO2 INTEGER,
O3 INTEGER,
SO2 INTEGER,
PM2_5 INTEGER,
PM10 INTEGER,
NH3 INTEGER)""")

In [None]:
# Populating the pollutants_data_table in pgAdmin
pollutants_data_table_df.to_sql(name='pollutants_data_table',schema='pollutants_data_table_schema', con=engine, chunksize=10, if_exists='replace')