In [None]:
# Import dependencies
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [None]:
# Read CSV - athlete events
csv_events = "Resources/athlete_events.csv"
events_df = pd.read_csv(csv_events)
events_df.head()

In [None]:
# Drop data
events_df.drop(['Team', 'Games'], axis=1, inplace=True)
events_df.head()

In [None]:
# Replace null values
events_df['Medal'].fillna('None', inplace = True)
events_df['Height'].fillna(0, inplace = True)
events_df['Weight'].fillna(0, inplace = True)
events_df['Age'].fillna(0, inplace = True)

events_df = events_df.drop_duplicates()
print(len(events_df))
events_df.head()

### Create new data with select columns

In [None]:
# Create a filtered dataframe from specific columns
events_cols = ["ID", "Name", "Sex", "Age", "Height", "Weight", "NOC", "Year", "Season", "City", "Sport", "Event", "Medal"]
events_transformed = events_df[events_cols].copy()

# Rename the column headers
events_transformed = events_transformed.rename(columns={"ID":"id", "Name":"athlete_name", "Sex":"athlete_sex", 
    "Age":"athlete_age", "Height":"athlete_height", "Weight":"athlete_weight", 
    "NOC":"noc", "Year":"year", "Season":"season", "City":"city_of_games", "Sport":"sport", "Event":"event", "Medal":"medal"})

# Clean the data by setting the index
events_transformed.set_index("id", inplace=True)

events_transformed.head()

In [None]:
# Read CSV - noc
csv_noc = "Resources/noc_regions.csv"
noc_df = pd.read_csv(csv_noc)
noc_df.head()

### Store JSON data into a DataFrame

In [None]:
json_file = "../Resources/customer_location.json"
customer_location_df = pd.read_json(json_file)
customer_location_df.head()

### Clean DataFrame

In [None]:
new_customer_location_df = customer_location_df[["id", "address", "us_state"]].copy()
new_customer_location_df.head()

### Connect to local database

In [None]:
rds_connection_string = "postgres:postgres@localhost:5432/olympics_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [None]:
engine.table_names()

### Use pandas to load csv converted DataFrame into database

In [None]:
events_transformed.to_sql(name='events', con=engine, if_exists='append', index=True)

### Use pandas to load json converted DataFrame into database

In [None]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [None]:
pd.read_sql_query('select * from customer_name', con=engine).head()

### Confirm data has been added by querying the customer_location table

In [None]:
pd.read_sql_query('select * from customer_location', con=engine).head()