In [5]:
%config Completer.use_jedi = False

import pandas as pd
from sqlalchemy import create_engine

## Extract ExcelFiles into DataFrames

In [35]:
athletes_file = "./Resources/Athletes.xlsx"
athletes_df = pd.read_excel(athletes_file, sheet_name='Details', engine='openpyxl')
athletes_df.head()

Unnamed: 0,Name,NOC,Discipline
0,AALERUD Katrine,Norway,Cycling Road
1,ABAD Nestor,Spain,Artistic Gymnastics
2,ABAGNALE Giovanni,Italy,Rowing
3,ABALDE Alberto,Spain,Basketball
4,ABALDE Tamara,Spain,Basketball


In [36]:
coaches_file = "./Resources/Coaches.xlsx"
coaches_df = pd.read_excel(coaches_file, sheet_name='Details', engine='openpyxl')
coaches_df.head()

  warn("Workbook contains no default style, apply openpyxl's default")


Unnamed: 0,Name,NOC,Discipline,Event
0,ABDELMAGID Wael,Egypt,Football,
1,ABE Junya,Japan,Volleyball,
2,ABE Katsuhiko,Japan,Basketball,
3,ADAMA Cherif,Côte d'Ivoire,Football,
4,AGEBA Yuya,Japan,Volleyball,


In [37]:
teams_file = "./Resources/Teams.xlsx"
teams_df = pd.read_excel(teams_file, sheet_name='Details', engine='openpyxl')
teams_df.head()

Unnamed: 0,Name,Discipline,NOC,Event
0,Belgium,3x3 Basketball,Belgium,Men
1,China,3x3 Basketball,People's Republic of China,Men
2,China,3x3 Basketball,People's Republic of China,Women
3,France,3x3 Basketball,France,Women
4,Italy,3x3 Basketball,Italy,Women


## Transform country DataFrame

In [38]:
country_cols = ['NOC']
discipline_cols = ['Discipline']
athletes_cols = ['Name', 'NOC', 'Discipline']
coaches_cols = ['Name', 'NOC', 'Discipline']

In [39]:
country_transformed = teams_df[country_cols].copy().drop_duplicates().reset_index().rename(columns={'index': 'country_id','NOC': 'country_name'})
country_transformed

Unnamed: 0,country_id,country_name
0,0,Belgium
1,1,People's Republic of China
2,3,France
3,4,Italy
4,5,Japan
...,...,...
79,501,Fiji
80,645,Croatia
81,673,Singapore
82,674,Slovakia


## Transform discipline DataFrame

In [42]:
discipline_transformed = teams_df[discipline_cols].copy().drop_duplicates().reset_index().rename(columns={'index': 'discipline_id','Discipline': 'discipline'})
discipline_transformed

Unnamed: 0,discipline_id,discipline
0,0,3x3 Basketball
1,16,Archery
2,69,Artistic Gymnastics
3,93,Artistic Swimming
4,125,Athletics
5,204,Baseball/Softball
6,216,Basketball
7,240,Beach Volleyball
8,288,Cycling Track
9,352,Fencing


## Transform Atheletes & Coaches DataFrame

In [26]:
atheletes_transformed = athletes_df[athletes_cols].copy().drop_duplicates().reset_index().rename(columns={'index': 'athelete_id','Name': 'athelete_name', 'NOC': 'country_name', 'Discipline': 'discipline'})
atheletes_transformed

Unnamed: 0,athelete_id,athelete_name,country_name,discipline
0,0,AALERUD Katrine,Norway,Cycling Road
1,1,ABAD Nestor,Spain,Artistic Gymnastics
2,2,ABAGNALE Giovanni,Italy,Rowing
3,3,ABALDE Alberto,Spain,Basketball
4,4,ABALDE Tamara,Spain,Basketball
...,...,...,...,...
11079,11080,ZWICKER Martin Detlef,Germany,Hockey
11080,11081,ZWOLINSKA Klaudia,Poland,Canoe Slalom
11081,11082,ZYKOVA Yulia,ROC,Shooting
11082,11083,ZYUZINA Ekaterina,ROC,Sailing


In [27]:
coaches_transformed = coaches_df[coaches_cols].copy().drop_duplicates().reset_index().rename(columns={'index': 'coache_id','NOC': 'country_name','Name': 'coache_name', 'Discipline':'discipline'})
coaches_transformed

Unnamed: 0,coache_id,coache_name,country_name,discipline
0,0,ABDELMAGID Wael,Egypt,Football
1,1,ABE Junya,Japan,Volleyball
2,2,ABE Katsuhiko,Japan,Basketball
3,3,ADAMA Cherif,Côte d'Ivoire,Football
4,4,AGEBA Yuya,Japan,Volleyball
...,...,...,...,...
376,389,ZAMORA PEDREIRA Javier,Spain,Basketball
377,390,ZAMPIERI Francesca,Liechtenstein,Artistic Swimming
378,391,ZHANG Xiaohuan,People's Republic of China,Artistic Swimming
379,392,ZIJP Simon,Netherlands,Hockey


In [28]:
coaches_transformed_with_country_id = coaches_transformed.merge(country_transformed, how="inner", on=["country_name"]).rename(columns={'country_id': 'coache_country_id'})
coaches_transformed_with_discipline_id = coaches_transformed_with_country_id.merge(discipline_transformed, how="inner", on=["discipline"]).rename(columns={'discipline_id': 'coache_discipline_id'}).drop(columns=['country_name', 'discipline'])
coaches_transformed_with_discipline_id

Unnamed: 0,coache_id,coache_name,coache_country_id,coache_discipline_id
0,0,ABDELMAGID Wael,29,404
1,12,ALY Kamal,29,404
2,316,SALAM Mohamed,29,404
3,329,SHAWKY Gharib,29,404
4,330,SIAM Zaghloul,29,404
...,...,...,...,...
373,199,KNEZEVIC Nemanja,46,721
374,235,MARTIN Paul,119,721
375,247,MENTOOR Delaine,119,721
376,131,GOJKOVIC Vladimir,445,721


In [25]:
atheletes_transformed_with_country_id = atheletes_transformed.merge(country_transformed, how="outer", on=["country_name"]).rename(columns={'country_id': 'athelete_country_id'})
atheletes_transformed_with_discipline_id = atheletes_transformed_with_country_id.merge(discipline_transformed, how="inner", on=["discipline"]).rename(columns={'discipline_id': 'athelete_discipline_id'}).drop(columns=['country_name', 'discipline'])
atheletes_transformed_with_discipline_id

Unnamed: 0,athelete_id,athelete_name,athelete_country_id,athelete_discipline_id
0,50,ABELVIK ROED Magnus,272.0,432
1,941,BERGERUD Torbjoern,272.0,432
2,1044,BJOERNSEN Kristian,272.0,432
3,1244,BRATTSET DALE Kari,272.0,432
4,1253,BREDAL OFTEDAL Stine,272.0,432
...,...,...,...,...
6828,9906,TUIVUAKA Asaeli,501.0,494
6829,9929,TUWAI Jerry,501.0,494
6830,9965,ULUNISAU Reapi,501.0,494
6831,10013,VAKURUNABILI Josua,501.0,494


## Create Database Connection

In [66]:
connection_string = "{}:{}@localhost:5432/olympics".format(username, password)
engine = create_engine(f'postgresql://{connection_string}')

In [44]:
# Confirm tables
engine.table_names()

  


['disciplines', 'atheletes', 'countries', 'coaches']

## Load DataFrames into Database

In [52]:
country_transformed.to_sql(name='countries', con=engine, if_exists='replace', index=False)

InternalError: (psycopg2.errors.DependentObjectsStillExist) cannot drop table countries because other objects depend on it
DETAIL:  constraint atheletes_athelete_country_id_fkey on table atheletes depends on table countries
constraint coaches_coache_country_id_fkey on table coaches depends on table countries
HINT:  Use DROP ... CASCADE to drop the dependent objects too.

[SQL: 
DROP TABLE countries]
(Background on this error at: https://sqlalche.me/e/14/2j85)

In [61]:
try: 
    discipline_transformed.to_sql(name='disciplines', con=engine, if_exists='fail', index=False)
except:
    pass

In [62]:
try: 
    atheletes_transformed_with_discipline_id.to_sql(name='atheletes', con=engine, if_exists='replace', index=False)
except:
    pass

In [63]:
try:
    coaches_transformed_with_discipline_id.to_sql(name='coaches', con=engine, if_exists='replace', index=False)
except:
    pass

In [193]:
# confirm the insertion
country_table=pd.read_sql_query('select * from countries',con=engine)
country_table

Unnamed: 0,country_id,country_name
0,0,Belgium
1,1,People's Republic of China
2,3,France
3,4,Italy
4,5,Japan
...,...,...
79,501,Fiji
80,645,Croatia
81,673,Singapore
82,674,Slovakia


In [195]:
disciplines=pd.read_sql_query('select * from disciplines',con=engine)
disciplines

Unnamed: 0,discipline_id,discipline
0,0,3x3 Basketball
1,16,Archery
2,69,Artistic Gymnastics
3,93,Artistic Swimming
4,125,Athletics
5,204,Baseball/Softball
6,216,Basketball
7,240,Beach Volleyball
8,288,Cycling Track
9,352,Fencing


In [196]:
atheletes=pd.read_sql_query('select * from atheletes',con=engine)
atheletes

Unnamed: 0,athelete_id,athelete_name,athelete_discipline_id,athelete_country_id
0,50,ABELVIK ROED Magnus,432,272
1,941,BERGERUD Torbjoern,432,272
2,1044,BJOERNSEN Kristian,432,272
3,1244,BRATTSET DALE Kari,432,272
4,1253,BREDAL OFTEDAL Stine,432,272
...,...,...,...,...
6336,9906,TUIVUAKA Asaeli,494,501
6337,9929,TUWAI Jerry,494,501
6338,9965,ULUNISAU Reapi,494,501
6339,10013,VAKURUNABILI Josua,494,501


In [197]:
coaches=pd.read_sql_query('select * from coaches',con=engine)
coaches

Unnamed: 0,coache_id,coache_name,coache_discipline_id,coache_country_id
0,0,ABDELMAGID Wael,404,29
1,12,ALY Kamal,404,29
2,316,SALAM Mohamed,404,29
3,329,SHAWKY Gharib,404,29
4,330,SIAM Zaghloul,404,29
...,...,...,...,...
373,199,KNEZEVIC Nemanja,721,46
374,235,MARTIN Paul,721,119
375,247,MENTOOR Delaine,721,119
376,131,GOJKOVIC Vladimir,721,445
