In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
from sqlalchemy import create_engine
engine=create_engine(f'postgresql://{os.environ.get("username")}:{os.environ.get("password")}@localhost:5432')

In [3]:
# You cannot use engine.execute() however, because postgres does not allow you to create databases inside 
# transactions, and sqlalchemy always tries to run queries in a transaction. To get around this, get the 
# underlying connection from the engine
# But the connection will still be inside a transaction, so you have to end the open transaction with a commit
with engine.connect() as conn: 
    conn.execute('commit')
    conn.execute('create database pagila')

ProgrammingError: (psycopg2.ProgrammingError) database "pagila" already exists

[SQL: create database pagila]
(Background on this error at: http://sqlalche.me/e/13/f405)

In [4]:
import pandas as pd

In [5]:
tables_dir={each_table.split('.csv')[0]: each_table for each_table in os.listdir('data/') if each_table.split('.')[-1]=='csv'}

In [6]:
engine=create_engine(f'postgresql://{os.environ.get("username")}:{os.environ.get("password")}@localhost:5432/pagila')
for each_table in tables_dir.keys(): 
    pd.read_csv(f'data/{tables_dir[each_table]}').to_sql(each_table, con=engine, index=False, if_exists='replace')

In [15]:
for each_table in tables_dir.keys(): 
    print(f'Schema for {each_table}: ')
    query_string=f"select column_name, data_type from information_schema.columns\
                   where table_name='{each_table}'"
    display(engine.execute(query_string).fetchall())

Schema for staff: 


[('staff_id', 'bigint'),
 ('first_name', 'text'),
 ('last_name', 'text'),
 ('address_id', 'bigint'),
 ('email', 'text'),
 ('store_id', 'bigint'),
 ('active', 'boolean'),
 ('username', 'text'),
 ('password', 'text'),
 ('last_update', 'text'),
 ('picture', 'double precision')]

Schema for actor: 


[('actor_id', 'bigint'),
 ('first_name', 'text'),
 ('last_name', 'text'),
 ('last_update', 'text')]

Schema for film: 


[('film_id', 'bigint'),
 ('title', 'text'),
 ('description', 'text'),
 ('release_year', 'bigint'),
 ('language_id', 'bigint'),
 ('original_language_id', 'double precision'),
 ('rental_duration', 'bigint'),
 ('rental_rate', 'double precision'),
 ('length', 'bigint'),
 ('replacement_cost', 'double precision'),
 ('rating', 'text'),
 ('last_update', 'text'),
 ('special_features', 'text'),
 ('fulltext', 'text')]

Schema for customer: 


[('customer_id', 'bigint'),
 ('store_id', 'bigint'),
 ('first_name', 'text'),
 ('last_name', 'text'),
 ('email', 'text'),
 ('address_id', 'bigint'),
 ('activebool', 'boolean'),
 ('create_date', 'text'),
 ('last_update', 'text'),
 ('active', 'bigint')]

Schema for film_actor: 


[('actor_id', 'bigint'), ('film_id', 'bigint'), ('last_update', 'text')]

Schema for address: 


[('address_id', 'bigint'),
 ('address', 'text'),
 ('address2', 'double precision'),
 ('district', 'text'),
 ('city_id', 'bigint'),
 ('postal_code', 'double precision'),
 ('phone', 'double precision'),
 ('last_update', 'text')]

Schema for city: 


[('city_id', 'bigint'),
 ('city', 'text'),
 ('country_id', 'bigint'),
 ('last_update', 'text')]

Schema for country: 


[('country_id', 'bigint'), ('country', 'text'), ('last_update', 'text')]

Schema for payment: 


[('payment_id', 'bigint'),
 ('customer_id', 'bigint'),
 ('staff_id', 'bigint'),
 ('rental_id', 'bigint'),
 ('amount', 'double precision'),
 ('payment_date', 'text')]

Schema for customer_list: 


[('id', 'bigint'),
 ('name', 'text'),
 ('address', 'text'),
 ('zip code', 'bigint'),
 ('phone', 'bigint'),
 ('city', 'text'),
 ('country', 'text'),
 ('notes', 'text'),
 ('sid', 'bigint')]

Schema for rental: 


[('rental_id', 'bigint'),
 ('rental_date', 'text'),
 ('inventory_id', 'bigint'),
 ('customer_id', 'bigint'),
 ('return_date', 'text'),
 ('staff_id', 'bigint'),
 ('last_update', 'text')]

Schema for store: 


[('store_id', 'bigint'),
 ('manager_staff_id', 'bigint'),
 ('address_id', 'bigint'),
 ('last_update', 'date')]

Schema for inventory: 


[('inventory_id', 'bigint'),
 ('film_id', 'bigint'),
 ('store_id', 'bigint'),
 ('last_update', 'text')]

In [7]:
query_string='select * from store'
pd.read_sql(query_string, con=engine)

Unnamed: 0,store_id,manager_staff_id,address_id,last_update
0,1,1,1,2006-02-15 09:57:12
1,2,2,2,2006-02-15 09:57:12


In [11]:
pd.read_sql(query_string, con=engine)

Unnamed: 0,store_id,manager_staff_id,address_id,last_update
0,1,1,1,2006-02-15
1,2,2,2,2006-02-15


In [12]:
# engine.execute("alter table store alter column last_update type date using to_date(last_update, 'YYYY-MM-DD')")
engine.execute("alter table actor alter column last_update type date using to_date(last_update, 'YYYY-MM-DD')")

ProgrammingError: (psycopg2.ProgrammingError) function to_date(date, unknown) does not exist
LINE 1: ...le store alter column last_update type date using to_date(la...
                                                             ^
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.

[SQL: alter table store alter column last_update type date using to_date(last_update, 'YYYY-MM-DD')]
(Background on this error at: http://sqlalche.me/e/13/f405)