# Read relational database
- Supports sqlalchemy
- Connection string: dialect+driver://username:password@host:port/database
- https://docs.sqlalchemy.org/en/13/core/engines.html
- Up the sample database. `cd postgres_sample_pagila && docker-compose up` 
- Use any sql client of your choice - dbeaver, dbvisualizer, pgAdmin etc.

![PAGILA_ER](./postgres_sample_pagila/pagila_er.png)

In [8]:
# Constants
import os
DB_USER = os.getenv('DB_USER','postgres')
DB_PASSWORD = os.getenv('DB_PASSWORD','postgres')
CONNECTION_STRING = "postgresql://{0}:{1}@localhost/pagila".format(DB_USER,DB_PASSWORD)

# read_sql_table
- Given a table name and a SQLAlchemy connectable, returns a DataFrame. 
- This function does not support DBAPI connections

In [None]:
import pandas as pd
df_film = pd.read_sql_table('film',
                            con=CONNECTION_STRING, 
                            schema='public')

df_film

# read_sql_query
- Returns a DataFrame corresponding to the result set of the query string.

In [None]:
import pandas as pd
df_film = pd.read_sql_query("select * from film",con=CONNECTION_STRING)
df_film

# read_sql
- This function is a convenience wrapper around read_sql_table and read_sql_query (for backward compatibility). 
- It will delegate to the specific function depending on the provided input. 
- A SQL query will be routed to read_sql_query, while a database table name will be routed to read_sql_table.

In [None]:
import pandas as pd
df_film = pd.read_sql("select * from film",con=CONNECTION_STRING)
df_film

# Select specific columns

In [None]:
import pandas as pd
df_film = pd.read_sql_table('film',
                            con=CONNECTION_STRING, 
                            schema='public', columns=['title','description','release_year'])

df_film

# Using chunksize

In [None]:
# rental has 16k rows, so chunk it out - If specified, returns an iterator where chunksize is the number of rows to include in each chunk.
import pandas as pd
df_rental_iterator = pd.read_sql_table('rental',
                            con=CONNECTION_STRING, 
                            schema='public', chunksize=5000)

# Define your final aggregated dataframe
df_rental = pd.DataFrame()
# Iterate and append frames. The idea of chunk is to accommodate system resource constraints, not necessarily batch. Real time processing -ish
for df in df_rental_iterator:
    df_rental=df_rental.append(df)

df_rental