In [1]:
import psycopg2

In [2]:
def create_connection(db_name, db_user, db_password, db_host, db_port):
    connection = None
    try:
        connection = psycopg2.connect(
            database=db_name,
            user=db_user,
            password=db_password,
            host=db_host,
            port=db_port,
        )
        print("Connection to PostgreSQL DB successful")
    except OperationalError as e:
        print(f"The error '{e}' occurred")
    return connection

In [3]:
db_name = "Project1"
db_user = "postgres"
db_password = "postgres"  
db_host = "localhost"  
db_port = "5432"

In [4]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

Connection to PostgreSQL DB successful


In [5]:
cursor = connection.cursor()

In [6]:
import pandas as pd

In [7]:
from sqlalchemy import create_engine

In [8]:
db_server = "dw_2024"

In [9]:
connection_string = f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
engine = create_engine(connection_string)

In [10]:
print(engine)

Engine(postgresql+psycopg2://postgres:***@localhost:5432/Project1)


In [11]:
from pathlib import Path
script_path = Path.cwd().parent

In [12]:
data_path = script_path / "data" / "Project1"
countrycsv = data_path / "list-of-countries_areas-by-continent-2024.csv"

In [13]:
countrydf = pd.read_csv("./list-of-countries_areas-by-continent-2024.csv", header=None)

In [14]:
countrydf.columns = ["country", "continent"]

In [15]:
countrydf

Unnamed: 0,country,continent
0,country,region
1,India,Asia
2,China,Asia
3,United States,North America
4,Indonesia,Asia
...,...,...
230,Montserrat,North America
231,Falkland Islands,South America
232,Niue,Oceania
233,Tokelau,Oceania


In [16]:
cursor.execute("CREATE TABLE olympicHosts (hostID VARCHAR(255) PRIMARY KEY, endDate TIMESTAMP WITH TIME ZONE, startDate TIMESTAMP WITH TIME ZONE, location VARCHAR(255), name VARCHAR(255), season VARCHAR(255), year INT);")

In [17]:
connection.commit()

In [25]:
olympic_hosts = pd.read_csv("./olympic_hosts.csv", header=0)

In [30]:
olympic_hosts['game_end_date'] = pd.to_datetime(olympic_hosts['game_end_date'], format='%Y-%m-%dT%H:%M:%SZ')

In [32]:
olympic_hosts['game_start_date'] = pd.to_datetime(olympic_hosts['game_start_date'], format='%Y-%m-%dT%H:%M:%SZ')

In [38]:
olympic_hosts['game_slug'] = olympic_hosts['game_slug'].astype(str)

In [42]:
olympic_hosts['game_year'] = pd.to_numeric(olympic_hosts['game_year'])

In [43]:
print(olympic_hosts.dtypes)

game_slug                  object
game_end_date      datetime64[ns]
game_start_date    datetime64[ns]
game_location              object
game_name                  object
game_season                object
game_year                   int64
dtype: object


In [46]:
olympic_hosts

Unnamed: 0,hostID,endDate,startDate,location,name,season,year
0,beijing-2022,2022-02-20 12:00:00,2022-02-04 15:00:00,China,Beijing 2022,Winter,2022
1,tokyo-2020,2021-08-08 14:00:00,2021-07-23 11:00:00,Japan,Tokyo 2020,Summer,2020
2,pyeongchang-2018,2018-02-25 08:00:00,2018-02-08 23:00:00,Republic of Korea,PyeongChang 2018,Winter,2018
3,rio-2016,2016-08-21 21:00:00,2016-08-05 12:00:00,Brazil,Rio 2016,Summer,2016
4,sochi-2014,2014-02-23 16:00:00,2014-02-07 04:00:00,Russian Federation,Sochi 2014,Winter,2014
5,london-2012,2012-08-12 19:00:00,2012-07-27 07:00:00,Great Britain,London 2012,Summer,2012
6,vancouver-2010,2010-02-28 04:00:00,2010-02-12 16:00:00,Canada,Vancouver 2010,Winter,2010
7,beijing-2008,2008-08-24 12:00:00,2008-08-08 00:00:00,China,Beijing 2008,Summer,2008
8,turin-2006,2006-02-26 19:00:00,2006-02-10 07:00:00,Italy,Turin 2006,Winter,2006
9,athens-2004,2004-08-29 18:00:00,2004-08-13 06:00:00,Greece,Athens 2004,Summer,2004


In [48]:
olympic_hosts.columns = ["hostid", "enddate", "startdate","location","name","season","year"]

In [49]:
olympic_hosts.to_sql("olympichosts", con=engine, if_exists="append", index=False)

53