In [None]:
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [None]:
csv_file = "../Resources/athlete_events.csv"
athlete_df = pd.read_csv(csv_file)
athlete_df.head()


### Create new data with select columns

In [None]:
csv_file2 = "../Resources/noc_regions.csv"
noc_df = pd.read_csv(csv_file2)
noc_df.head()


In [None]:
merged_df = pd.merge(athlete_df, noc_df, how='left', on='NOC')
merged_df.head()

In [None]:
# Create a filtered dataframe from specific columns
athlete_cols = ["ID", "Name", "NOC"]
athlete_transformed= merged_df[athlete_cols].copy()

# Rename the column headers
athlete_transformed = athlete_transformed.rename(columns={"ID": "id",
                                                          "Name": "name",
                                                          "NOC": "country"})

# Clean the data by dropping duplicates and setting the index
athlete_transformed.drop_duplicates("id", inplace=True)
athlete_transformed.set_index("id", inplace=True)

athlete_transformed.head()

### Clean DataFrame

In [None]:
# Create a filtered dataframe from specific columns
olympic_cols = ["ID", "Year", "Season", "City"]
olympic_transformed= merged_df[olympic_cols].copy()

# Rename the column headers
olympic_transformed = olympic_transformed.rename(columns={"ID": "id",
                                                          "Year": "year",
                                                          "Season": "season",
                                                          "City": "city"})

# Clean the data by dropping duplicates and setting the index
olympic_transformed.drop_duplicates("id", inplace=True)
olympic_transformed.set_index("id", inplace=True)

olympic_transformed.head()

In [None]:
# Create a filtered dataframe from specific columns
region_cols = ["ID", "Sport", "Medal"]
region_transformed= merged_df[region_cols].copy()

# Rename the column headers
region_transformed = region_transformed.rename(columns={"ID": "id",
                                                          "Sport": "sport",
                                                          "Medal": "medal",}
                                                          )

# Clean the data by dropping duplicates and setting the index
region_transformed.drop_duplicates("id", inplace=True)
region_transformed.set_index("id", inplace=True)

region_transformed.head()

### Connect to local database

In [None]:
connection_string = "postgres:postgres@localhost:5432/olympic_db"
engine = create_engine(f'postgresql://{connection_string}')

### Check for tables

In [None]:
engine.table_names()

### Use pandas to load csv converted DataFrame into database

In [None]:
olympic_transformed.to_sql(name='olympic', con=engine, if_exists='append', index=True)

In [None]:
athlete_transformed.to_sql(name='athlete', con=engine, if_exists='append', index=True)

In [None]:
region_transformed.to_sql(name='region', con=engine, if_exists='append', index=True)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [None]:
pd.read_sql_query('select * from athlete', con=engine).head()

### Confirm data has been added by querying the customer_location table

In [None]:
pd.read_sql_query('select * from athlete', con=engine).head()

In [None]:
pd.read_sql_query('select * from region', con=engine).head()