# ETL Project

In [12]:
# Import Dependencies

import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import psycopg2

# Get the password from config file
from config import password

## Extract

1. Quality Of Life by State (CSV file)
   Data source: https://worldpopulationreview.com/state-rankings/quality-of-life-by-state

In [2]:
# File path
ranking_csv = 'Resources/Raw_data/state_ranking.csv'

In [3]:
# Read csv and store in to Pandas Dataframe
ranking_df = pd.read_csv(ranking_csv)
ranking_df.head()

Unnamed: 0,State,lifeQualityRank,healthCareRank,educationRank,economyRank
0,Washington,1,4,4,3
1,New Hampshire,2,16,5,13
2,Minnesota,3,10,17,18
3,Utah,4,9,10,2
4,Vermont,5,11,8,29


2. Public School Data (CSV file) Data Source: https://www.kaggle.com/carlosaguayo/usa-public-schools

In [4]:
# File path
school_csv = 'Resources/Raw_data/public_schools.csv'

In [5]:
# Read csv and store in to Pandas DataFrame
schools_df = pd.read_csv(school_csv)
schools_df.head()

Unnamed: 0,X,Y,OBJECTID,NCESID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,...,VAL_METHOD,VAL_DATE,WEBSITE,LEVEL_,ENROLLMENT,ST_GRADE,END_GRADE,DISTRICTID,FT_TEACHER,SHELTER_ID
0,-81.050895,29.022271,2002,120192008041,SAMSULA ACADEMY,248 N SAMSULA DR,NEW SMYRNA,FL,32168,8762,...,IMAGERY,2014-05-20T00:00:00.000Z,http://nces.ed.gov/GLOBALLOCATOR/sch_info_popu...,1,222,KG,5,1201920,13,NOT AVAILABLE
1,-92.507288,31.180659,2003,220129002344,CAROLINE DORMON JUNIOR HIGH SCHOOL,8906 HWY 165 SOUTH,WOODWORTH,LA,71485,NOT AVAILABLE,...,IMAGERY/OTHER,2015-06-19T00:00:00.000Z,http://nces.ed.gov/GLOBALLOCATOR/sch_info_popu...,1,312,PK,8,2201290,21,NOT AVAILABLE
2,-69.97188,43.908147,2004,230378023129,HARRIET BEECHER STOWE ELEMENTARY,44 MCKEEN STREET,BRUNSWICK,ME,4011,NOT AVAILABLE,...,IMAGERY,2014-05-07T00:00:00.000Z,http://nces.ed.gov/GLOBALLOCATOR/sch_info_popu...,1,739,02,5,2303780,52,NOT AVAILABLE
3,-89.542799,32.728496,2005,280252001118,LEAKE CENTRAL ELEMENTARY SCHOOL,603 HWY. 16 WEST,CARTHAGE,MS,39051,NOT AVAILABLE,...,IMAGERY/OTHER,2010-07-06T00:00:00.000Z,http://nces.ed.gov/GLOBALLOCATOR/sch_info_popu...,1,1159,PK,5,2802520,63,NOT AVAILABLE
4,-94.361775,39.364359,2006,291645000891,KEARNEY ELEM.,902 S JEFFERSON,KEARNEY,MO,64060,8518,...,IMAGERY/OTHER,2016-07-18T00:00:00.000Z,http://nces.ed.gov/GLOBALLOCATOR/sch_info_popu...,1,274,KG,5,2916450,22,NOT AVAILABLE


## Transform

1. Quality Of Life by State Data Cleanup

In [6]:
# Cleaning up the DataFrame
# Selecting the columns to be displayed

state_rancking_df = ranking_df[['State', 'lifeQualityRank']]
state_rancking_df.head()

Unnamed: 0,State,lifeQualityRank
0,Washington,1
1,New Hampshire,2
2,Minnesota,3
3,Utah,4
4,Vermont,5


In [7]:
# Export Data
state_rancking_df.to_csv(r'Resources/Clean_data/clean_state_ranking.csv', index = False)

2. Public School Data Cleanup

In [8]:
# Cleaning up the DataFrame
# Selecting the columns to be displayed
clean_schools_df = schools_df[['STATE', 'CITY', 'NAME', 'ADDRESS', 'ZIP', 'X', 'Y', 'DISTRICTID', 'ST_GRADE', 'END_GRADE']]
clean_schools_df

Unnamed: 0,STATE,CITY,NAME,ADDRESS,ZIP,X,Y,DISTRICTID,ST_GRADE,END_GRADE
0,FL,NEW SMYRNA,SAMSULA ACADEMY,248 N SAMSULA DR,32168,-81.050895,29.022271,1201920,KG,05
1,LA,WOODWORTH,CAROLINE DORMON JUNIOR HIGH SCHOOL,8906 HWY 165 SOUTH,71485,-92.507288,31.180659,2201290,PK,08
2,ME,BRUNSWICK,HARRIET BEECHER STOWE ELEMENTARY,44 MCKEEN STREET,4011,-69.971880,43.908147,2303780,02,05
3,MS,CARTHAGE,LEAKE CENTRAL ELEMENTARY SCHOOL,603 HWY. 16 WEST,39051,-89.542799,32.728496,2802520,PK,05
4,MO,KEARNEY,KEARNEY ELEM.,902 S JEFFERSON,64060,-94.361775,39.364359,2916450,KG,05
...,...,...,...,...,...,...,...,...,...,...
102365,MI,DETROIT,COVENANT HOUSE ACADEMY DETROIT - SOUTHWEST SITE,1450 25TH ST,48216,-83.085229,42.320632,2600322,09,12
102366,MI,SOUTH ROCKWOOD,FRED W. RITTER ELEMENTARY SCHOOL,5650 CARLETON ROCKWOOD RD,48179,-83.272599,42.062038,2601980,KG,04
102367,MS,DIBERVILLE,DIBERVILLE ELEM,4540 BRODIE ROAD,39540,-88.914089,30.436478,2801770,KG,03
102368,MO,KANSAS CITY,DAVIDSON ELEM.,5100 N HIGHLAND,64118,-94.558365,39.187941,2922800,PK,05


In [9]:
# Dropping null values
clean_schools_df = clean_schools_df.dropna()
clean_schools_df

Unnamed: 0,STATE,CITY,NAME,ADDRESS,ZIP,X,Y,DISTRICTID,ST_GRADE,END_GRADE
0,FL,NEW SMYRNA,SAMSULA ACADEMY,248 N SAMSULA DR,32168,-81.050895,29.022271,1201920,KG,05
1,LA,WOODWORTH,CAROLINE DORMON JUNIOR HIGH SCHOOL,8906 HWY 165 SOUTH,71485,-92.507288,31.180659,2201290,PK,08
2,ME,BRUNSWICK,HARRIET BEECHER STOWE ELEMENTARY,44 MCKEEN STREET,4011,-69.971880,43.908147,2303780,02,05
3,MS,CARTHAGE,LEAKE CENTRAL ELEMENTARY SCHOOL,603 HWY. 16 WEST,39051,-89.542799,32.728496,2802520,PK,05
4,MO,KEARNEY,KEARNEY ELEM.,902 S JEFFERSON,64060,-94.361775,39.364359,2916450,KG,05
...,...,...,...,...,...,...,...,...,...,...
102365,MI,DETROIT,COVENANT HOUSE ACADEMY DETROIT - SOUTHWEST SITE,1450 25TH ST,48216,-83.085229,42.320632,2600322,09,12
102366,MI,SOUTH ROCKWOOD,FRED W. RITTER ELEMENTARY SCHOOL,5650 CARLETON ROCKWOOD RD,48179,-83.272599,42.062038,2601980,KG,04
102367,MS,DIBERVILLE,DIBERVILLE ELEM,4540 BRODIE ROAD,39540,-88.914089,30.436478,2801770,KG,03
102368,MO,KANSAS CITY,DAVIDSON ELEM.,5100 N HIGHLAND,64118,-94.558365,39.187941,2922800,PK,05


In [10]:
# Renaiming Columns
clean_schools_df = clean_schools_df.rename(columns={'STATE':'State', 'CITY':'City', 'NAME':'SchoolName', 'ADDRESS':'Address',
                                                    'ZIP':'Zip', 'DISTRICTID':'DistricID', 'ST_GRADE':'StartGrade', 'END_GRADE':'EndGrade'})
clean_schools_df.head()

Unnamed: 0,State,City,SchoolName,Address,Zip,X,Y,DistricID,StartGrade,EndGrade
0,FL,NEW SMYRNA,SAMSULA ACADEMY,248 N SAMSULA DR,32168,-81.050895,29.022271,1201920,KG,5
1,LA,WOODWORTH,CAROLINE DORMON JUNIOR HIGH SCHOOL,8906 HWY 165 SOUTH,71485,-92.507288,31.180659,2201290,PK,8
2,ME,BRUNSWICK,HARRIET BEECHER STOWE ELEMENTARY,44 MCKEEN STREET,4011,-69.97188,43.908147,2303780,02,5
3,MS,CARTHAGE,LEAKE CENTRAL ELEMENTARY SCHOOL,603 HWY. 16 WEST,39051,-89.542799,32.728496,2802520,PK,5
4,MO,KEARNEY,KEARNEY ELEM.,902 S JEFFERSON,64060,-94.361775,39.364359,2916450,KG,5


In [11]:
# Export Data
clean_schools_df.to_csv(r'Resources/Clean_data/clean_public_schools.csv', index = False)

## Load

In [13]:
# Create engine and connection to  db.

engine = create_engine(f'postgresql://postgres:{password}@localhost:5432/US_quality_db')
connection = engine.connect()

1. Quality Of Life by State Data Postgresql Database table

In [14]:
state_rancking_df.to_sql(name='US_Ranking_State', con=engine, if_exists='append', index=False)

2. Public School Data Postgresql Database table

In [16]:
clean_schools_df.to_sql(name='Public_Schools', con=engine, if_exists='append', index=False)

In [17]:
connection.close()