In [8]:
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [9]:
csv_file = "Resources/Public_School_Locations_201718.csv"
schools_df = pd.read_csv(csv_file)
schools_df.head()

Unnamed: 0,NCESSCH,NAME,STREET,CITY,STATE,ZIP,NMCNTY,LAT,LON,SCHOOLYEAR
0,10237000000.0,Pillans Middle School,2051 Military Rd,Mobile,AL,36605,Mobile County,30.628471,-88.08544,2017-2018
1,10237000000.0,Phillips Preparatory Middle School,3255 Old Shell Rd,Mobile,AL,36607,Mobile County,30.696872,-88.12032,2017-2018
2,10237000000.0,WD Robbins Elementary School,2416 W Main St,Prichard,AL,36610,Mobile County,30.744798,-88.098192,2017-2018
3,10237000000.0,CL Scarborough Model Middle School,1800 Phillips Ln,Mobile,AL,36618,Mobile County,30.726541,-88.14327,2017-2018
4,10237000000.0,Semmes Middle School,4566 Ed George Road,Semmes,AL,36575,Mobile County,30.786316,-88.290427,2017-2018


### Create new data with select columns

In [10]:
schools_cols = ['NAME', 'STREET', 'CITY', 'STATE', 'ZIP']
schools_transformed = schools_df[schools_cols].copy()

# Rename the column headers 
schools_transformed = schools_transformed.rename(columns={"NAME": "name", 
                                                          "STREET": "street",
                                                          "CITY": "city",
                                                          "STATE": "state",
                                                          "ZIP": "zip"})
# Clean the data by dropping duplicates and setting the index
schools_transformed.drop_duplicates(inplace=True)
schools_transformed.head()

Unnamed: 0,name,street,city,state,zip
0,Pillans Middle School,2051 Military Rd,Mobile,AL,36605
1,Phillips Preparatory Middle School,3255 Old Shell Rd,Mobile,AL,36607
2,WD Robbins Elementary School,2416 W Main St,Prichard,AL,36610
3,CL Scarborough Model Middle School,1800 Phillips Ln,Mobile,AL,36618
4,Semmes Middle School,4566 Ed George Road,Semmes,AL,36575


### Clean DataFrame

In [11]:
# Keep data from the state of GA
schools_transformed_ga = schools_transformed[schools_transformed['state'] == 'GA']
# Sort by Zip and Reset index
schools_transformed_ga.sort_values('zip', inplace=True)
schools_transformed_ga.reset_index(drop=True, inplace=True)
schools_transformed_ga.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,name,street,city,state,zip
0,DeKalb School of the Arts,1192 Clarendon Ave,Avondale Estates,GA,30002
1,Museum School Avondale Estates,3191 Covington Hwy,Avondale Estates,GA,30002
2,DeKalb Elementary School of the Arts,3131 Old Rockbridge Rd,Avondale Estates,GA,30002
3,Avondale Elementary School,8 Lakeshore Dr,Avondale Estates,GA,30002
4,Manning Oaks Elementary School,405 Cumming St,Alpharetta,GA,30004


### Connect to local database

In [12]:
dbuser = "postgres"
dbpassword = "mSpG$20"
rds_connection_string = f"{dbuser}:{dbpassword}@localhost:5432/garealestate_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [13]:
engine.table_names()

['ga_schools']

### Use pandas to load csv converted DataFrame into database

In [14]:
schools_transformed_ga.to_sql(name='ga_schools', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the ga_schools table
* NOTE: can also check using pgAdmin

In [15]:
pd.read_sql_query('select * from ga_schools', con=engine).head()

Unnamed: 0,id,name,street,city,state,zip
0,1,DeKalb School of the Arts,1192 Clarendon Ave,Avondale Estates,GA,30002
1,2,Museum School Avondale Estates,3191 Covington Hwy,Avondale Estates,GA,30002
2,3,DeKalb Elementary School of the Arts,3131 Old Rockbridge Rd,Avondale Estates,GA,30002
3,4,Avondale Elementary School,8 Lakeshore Dr,Avondale Estates,GA,30002
4,5,Manning Oaks Elementary School,405 Cumming St,Alpharetta,GA,30004
