In [2]:
import pandas as pd
from sqlalchemy import create_engine, types
from sqlalchemy import text # to be able to pass string

In [3]:
# Let's load values from the .env file
from dotenv import dotenv_values

config = dotenv_values()

# define variables for the login
pg_user = config['POSTGRES_USER']  # align the key label with your .env file !
pg_host = config['POSTGRES_HOST']
pg_port = config['POSTGRES_PORT']
pg_db = config['POSTGRES_DB']
pg_schema = config['POSTGRES_SCHEMA']
pg_pass = config['POSTGRES_PASS']

In [4]:
# Now building the URL with the values from the .env file

url = f'postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}'
url

'postgresql://matthias:t1Kq9MB45k0hnlOi@data-analytics-course-2.c8g8r1deus2v.eu-central-1.rds.amazonaws.com:5432/postgres'

In [5]:
engine = create_engine(url, echo=False)

In [7]:
with engine.begin() as conn: 
    result = conn.execute(text(f'SET search_path TO {pg_schema};'))

In [8]:
flights = pd.read_csv('data/flights_from_2021_7_until_2021_9.csv')
flights.head()

Unnamed: 0,flight_date,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,airline,tail_number,flight_number,origin,dest,air_time,actual_elapsed_time,distance,cancelled,diverted
0,2021-07-01,136.0,14,82.0,729.0,614,75.0,F9,N312FR,1490,ONT,ORD,193.0,233.0,1700,0,0
1,2021-07-01,17.0,15,2.0,656.0,652,4.0,DL,N917DU,929,FAI,SLC,267.0,279.0,2183,0,0
2,2021-07-01,130.0,16,74.0,654.0,556,58.0,F9,N706FR,678,DEN,MCO,183.0,204.0,1546,0,0
3,2021-07-01,28.0,24,4.0,738.0,734,4.0,F9,N381FR,1286,PHX,MCO,228.0,250.0,1849,0,0
4,2021-07-01,22.0,25,-3.0,703.0,720,-17.0,F9,N233FR,2022,LAS,CVG,193.0,221.0,1678,0,0


In [9]:
# defining data types for the DB
flights_dtypes = {
    'flight_date': types.DateTime,
    'dep_time': types.Integer,
    'sched_dep_time': types.Integer,
    'dep_delay': types.Integer,
    'arr_time': types.Integer,
    'sched_arr_time': types.Integer,
    'arr_delay': types.Integer,
    'airline': types.String,
    'tail_number': types.String,
    'flight_number': types.Integer,
    'origin': types.String,
    'dest': types.String,
    'air_time': types.Integer,
    'actual_elapsed_time': types.Integer,
    'distance': types.Integer,
    'cancelled': types.Integer,
    'diverted': types.Integer
}

In [None]:
flights.to_sql('flights', engine, if_exists='replace', dtype=flights_dtypes)