In [1]:
import pandas as pd
from sqlalchemy import create_engine

# EXTRACTION 

In [3]:
# Store CSV into a DF
csv_file = "./Resources/customer_data.csv"
customer_data_df = pd.read_csv(csv_file)
customer_data_df

Unnamed: 0,id,first_name,last_name,email,gender,car
0,1,Benetta,Cancott,bcancott0@studiopress.com,Female,Scion
1,2,Lilyan,Cherry,lcherry1@deliciousdays.com,Female,Chrysler
2,3,Ezekiel,Benasik,ebenasik2@wikia.com,Male,Mercedes-Benz
3,4,Kennedy,Atlay,katlay3@so-net.ne.jp,Male,Buick
4,5,Sanford,Salmen,ssalmen4@reuters.com,Male,Lincoln
...,...,...,...,...,...,...
995,996,Clare,Freshwater,cfreshwaterrn@fema.gov,Female,Nissan
996,997,Viole,Letrange,vletrangero@hao123.com,Female,Chevrolet
997,998,Tim,Colvill,tcolvillrp@storify.com,Female,GMC
998,999,Benny,Wafer,bwaferrq@tinypic.com,Female,Citroën


In [6]:
new_customer_data_df = customer_data_df[["id", "first_name", "last_name"]].copy()
new_customer_data_df 

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen
...,...,...,...
995,996,Clare,Freshwater
996,997,Viole,Letrange
997,998,Tim,Colvill
998,999,Benny,Wafer


# TRANSFORMATIONS

In [7]:
# Store JSON data into DataFrame 
json_file = './Resources/customer_location.json'
customer_location_df = pd.read_json(json_file)
customer_location_df.head()


Unnamed: 0,id,address,longitude,latitude,us_state
0,1,043 Mockingbird Place,-86.5186,39.1682,Indiana
1,2,4 Prentice Point,-85.0707,41.0938,Indiana
2,3,46 Derek Junction,-96.7776,32.7673,Texas
3,4,11966 Old Shore Place,-94.3567,39.035,Missouri
4,5,5 Evergreen Circle,-73.9772,40.7808,New York


In [9]:
# clean dataframe 
new_customer_location_df = customer_location_df[["id", "address", "us_state"]].copy()
new_customer_location_df.head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York


#### Create a schema for where data will be loaded this is the SQL PART

```sql
CREATE TABLE customer_name (
	id INT PRIMARY KEY,
	first_name TEXT,
	last_name TEXT
);

CREATE TABLE customer_location (
	id INT PRIMARY KEY,
	address TEXT,
	us_state TEXT
);

SELECT * FROM customer_name;
SELECT * FROM customer_location;
```

In [10]:
# Connect to the db 
connection_string = "postgres:2d9c92d9c9@localhost:5432/etl"
engine = create_engine(f'postgresql://{connection_string}')

In [11]:
# Check the tables
engine.table_names()

['customer_name', 'customer_location']

# LOADING 

In [13]:
# use pandas to load csv converted to DF into database
new_customer_data_df.to_sql(name="customer_name", con=engine, if_exists='append', index=False)

In [14]:
# Use pandas to load json converted to DF into database 
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists="append", index=False)

In [15]:

# Confirm data is in the customer_name table
pd.read_sql_query('select * from customer_name', con=engine)


Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen
...,...,...,...
995,996,Clare,Freshwater
996,997,Viole,Letrange
997,998,Tim,Colvill
998,999,Benny,Wafer


In [16]:
# Confirm data is n the customer_location table
pd.read_sql_query('select * from customer_location', con=engine)

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York
...,...,...,...
995,996,6546 Waxwing Circle,Alabama
996,997,604 Parkside Street,Utah
997,998,44 Northwestern Court,New Jersey
998,999,33 Bunting Point,Nevada
