## Extract Dataset

In [2]:
import requests
import pandas as pd
import urllib.request, json
import psycopg2
from sqlalchemy import create_engine

In [2]:
city_url = 'https://raw.githubusercontent.com/rahilpacmann/case-data-wrangling-api/main/city.csv'
country_url = 'https://raw.githubusercontent.com/rahilpacmann/case-data-wrangling-api/main/country.csv'
requirements_table_url = 'https://rahilpacmann.github.io/case-data-wrangling-api/requirements_table.json'

city_raw = pd.read_csv(city_url)
country_raw = pd.read_csv(country_url)

In [3]:
with urllib.request.urlopen(requirements_table_url) as url:
    requirements_table = json.load(url)

requirements_table

{'actor': [{'column_name': 'actor_id', 'data_type': 'int64'},
  {'column_name': 'last_update', 'data_type': 'datetime64[ns]'},
  {'column_name': 'first_name', 'data_type': 'object'},
  {'column_name': 'last_name', 'data_type': 'object'}],
 'store': [{'column_name': 'store_id', 'data_type': 'int64'},
  {'column_name': 'manager_staff_id', 'data_type': 'int64'},
  {'column_name': 'address_id', 'data_type': 'int64'},
  {'column_name': 'last_update', 'data_type': 'datetime64[ns]'}],
 'address': [{'column_name': 'last_update', 'data_type': 'datetime64[ns]'},
  {'column_name': 'city_id', 'data_type': 'int64'},
  {'column_name': 'address_id', 'data_type': 'int64'},
  {'column_name': 'district', 'data_type': 'object'},
  {'column_name': 'phone', 'data_type': 'object'},
  {'column_name': 'postal_code', 'data_type': 'object'},
  {'column_name': 'address', 'data_type': 'object'},
  {'column_name': 'address2', 'data_type': 'object'}],
 'category': [{'column_name': 'category_id', 'data_type': 'int64

In [5]:
dbname = "dvdrental"
user = "postgres"
password = "password123"
host = "localhost"
port = "5439"

engine_str = f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
engine = create_engine(engine_str)

In [6]:
def get_table_data(table_name, engine):
    try:
        query = f"SELECT * FROM {table_name}"
        df = pd.read_sql(query, engine)

        return df
    except Exception as e:
        print(f"Error: {e}")

        return pd.DataFrame()

In [7]:
actor_df = get_table_data('actor', engine)
store_df = get_table_data('store', engine)
address_df = get_table_data('address', engine)
category_df = get_table_data('category', engine)
customer_df = get_table_data('customer', engine)
film_actor_df = get_table_data('film_actor', engine)
film_category_df = get_table_data('film_category', engine)
inventory_df = get_table_data('inventory',engine)
language_df = get_table_data('language',engine)
rental_df = get_table_data('rental',engine)
staff_df = get_table_data('staff',engine)
payment_df = get_table_data('payment',engine)
film_df = get_table_data('film',engine)

In [11]:
table_dict = {
    'actor': actor_df,
    'store': store_df,
    'address': address_df,
    'category': category_df,
    'customer': customer_df,
    'film_actor': film_actor_df,
    'film_category': film_category_df,
    'inventory': inventory_df,
    'language': language_df,
    'rental': rental_df,
    'staff': staff_df,
    'payment': payment_df,
    'film': film_df
}

## Read Data

In [27]:
city_raw.head()

Unnamed: 0,city_id,city,country
0,1,A Corua (La Corua),Spain
1,2,Abha,Saudi Arabia
2,3,Abu Dhabi,United Arab Emirates
3,4,Acua,Mexico
4,5,Adana,Turkey


In [28]:
country_raw.head()

Unnamed: 0,country,last_update
0,Afghanistan,2006-02-15 09:44:00
1,Algeria,2006-02-15 09:44:00
2,American Samoa,2006-02-15 09:44:00
3,Angola,2006-02-15 09:44:00
4,Anguilla,2006-02-15 09:44:00


In [10]:
actor_df.head()

Unnamed: 0,actor_id,first_name,last_name,last_update
0,1,Penelope,Guiness,2013-05-26 14:47:57.620
1,2,Nick,Wahlberg,2013-05-26 14:47:57.620
2,3,Ed,Chase,2013-05-26 14:47:57.620
3,4,Jennifer,Davis,2013-05-26 14:47:57.620
4,5,Johnny,Lollobrigida,2013-05-26 14:47:57.620
