In [21]:
%config Completer.use_jedi = False

import pandas as pd
from sqlalchemy import create_engine

## Extract ExcelFiles into DataFrames

In [22]:
athletes_file = "./Resources/Athletes.xlsx"
athletes_df = pd.read_excel(athletes_file, sheet_name='Details', engine='openpyxl')
athletes_df.head()

Unnamed: 0,Name,NOC,Discipline
0,AALERUD Katrine,Norway,Cycling Road
1,ABAD Nestor,Spain,Artistic Gymnastics
2,ABAGNALE Giovanni,Italy,Rowing
3,ABALDE Alberto,Spain,Basketball
4,ABALDE Tamara,Spain,Basketball


In [23]:
coaches_file = "./Resources/Coaches.xlsx"
coaches_df = pd.read_excel(coaches_file, sheet_name='Details', engine='openpyxl')
coaches_df.head()

  warn("Workbook contains no default style, apply openpyxl's default")


Unnamed: 0,Name,NOC,Discipline,Event
0,ABDELMAGID Wael,Egypt,Football,
1,ABE Junya,Japan,Volleyball,
2,ABE Katsuhiko,Japan,Basketball,
3,ADAMA Cherif,Côte d'Ivoire,Football,
4,AGEBA Yuya,Japan,Volleyball,


In [24]:
teams_file = "./Resources/Teams.xlsx"
teams_df = pd.read_excel(teams_file, sheet_name='Details', engine='openpyxl')
teams_df.head()

Unnamed: 0,Name,Discipline,NOC,Event
0,Belgium,3x3 Basketball,Belgium,Men
1,China,3x3 Basketball,People's Republic of China,Men
2,China,3x3 Basketball,People's Republic of China,Women
3,France,3x3 Basketball,France,Women
4,Italy,3x3 Basketball,Italy,Women


## Transform country DataFrame

In [79]:
country_cols = ['NOC']
discipline_cols = ['Discipline']
athletes_cols = ['Name', 'NOC', 'Discipline']
coaches_cols = ['Name', 'NOC', 'Discipline']

In [80]:
country_transformed = teams_df[country_cols].copy().drop_duplicates().reset_index().rename(columns={'index': 'country_id','NOC': 'country_name'})
country_transformed.head()

Unnamed: 0,country_id,country_name
0,0,Belgium
1,1,People's Republic of China
2,3,France
3,4,Italy
4,5,Japan


## Transform discipline DataFrame

In [81]:
discipline_transformed = athletes_df[discipline_cols].copy().drop_duplicates().reset_index().rename(columns={'index': 'discipline_id','Discipline': 'discipline'})
discipline_transformed.head()

Unnamed: 0,discipline_id,discipline
0,0,Cycling Road
1,1,Artistic Gymnastics
2,2,Rowing
3,3,Basketball
4,5,Handball


## Transform Atheletes & Coaches DataFrame

In [103]:
atheletes_transformed = athletes_df[athletes_cols].copy().drop_duplicates().reset_index().rename(columns={'index': 'athelete_id','Name': 'athelete_name', 'NOC': 'country_name', 'Discipline': 'discipline'})
atheletes_transformed.head()

Unnamed: 0,athelete_id,athelete_name,country_name,discipline
0,0,AALERUD Katrine,Norway,Cycling Road
1,1,ABAD Nestor,Spain,Artistic Gymnastics
2,2,ABAGNALE Giovanni,Italy,Rowing
3,3,ABALDE Alberto,Spain,Basketball
4,4,ABALDE Tamara,Spain,Basketball


In [105]:
coaches_transformed = coaches_df[coaches_cols].copy().drop_duplicates().reset_index().rename(columns={'index': 'coache_id','NOC': 'country_name','Name': 'coache_name', 'Discipline':'discipline'})
coaches_transformed.head()

Unnamed: 0,coache_id,coache_name,country_name,discipline
0,0,ABDELMAGID Wael,Egypt,Football
1,1,ABE Junya,Japan,Volleyball
2,2,ABE Katsuhiko,Japan,Basketball
3,3,ADAMA Cherif,Côte d'Ivoire,Football
4,4,AGEBA Yuya,Japan,Volleyball


In [115]:
coaches_transformed_with_country_id = coaches_transformed.merge(country_transformed, how="inner", on=["country_name"]).rename(columns={'country_id': 'coache_country_id'})
coaches_transformed_with_discipline_id = coaches_transformed_with_country_id.merge(discipline_transformed, how="inner", on=["discipline"]).rename(columns={'discipline_id': 'coache_discipline_id'}).drop(columns=['country_name', 'discipline'])
coaches_transformed_with_discipline_id.head()

Unnamed: 0,coache_id,coache_name,coache_country_id,coache_discipline_id
0,0,ABDELMAGID Wael,29,25
1,12,ALY Kamal,29,25
2,316,SALAM Mohamed,29,25
3,329,SHAWKY Gharib,29,25
4,330,SIAM Zaghloul,29,25


In [119]:
atheletes_transformed_with_country_id = atheletes_transformed.merge(country_transformed, how="inner", on=["country_name"]).rename(columns={'country_id': 'athelete_country_id'})
atheletes_transformed_with_discipline_id = atheletes_transformed_with_country_id.merge(discipline_transformed, how="inner", on=["discipline"]).rename(columns={'discipline_id': 'athelete_discipline_id'}).drop(columns=['country_name', 'discipline'])
atheletes_transformed_with_discipline_id.head()

Unnamed: 0,athelete_id,athelete_name,athelete_country_id,athelete_discipline_id
0,0,AALERUD Katrine,272,0
1,1153,BORGLI Stine,272,0
2,2979,FOSS Tobias S.,272,0
3,3982,HOELGAARD Markus,272,0
4,4446,JOHANNESSEN Tobias Halland,272,0


## Create Database Connection

In [125]:
connection_string = "{}:{}@localhost:5432/customer_db".format('postgres', 'wns7267')
engine = create_engine(f'postgresql://{connection_string}')

## Load DataFrames into Database

In [126]:
country_transformed.to_sql(name='countries', con=engine, if_exists='append', index=True)

In [127]:
discipline_transformed.to_sql(name='disciplines', con=engine, if_exists='append', index=True)

In [128]:
atheletes_transformed_with_discipline_id.to_sql(name='atheletes', con=engine, if_exists='append', index=True)

In [129]:
coaches_transformed_with_discipline_id.to_sql(name='coaches', con=engine, if_exists='append', index=True)