In [40]:
# CHANGE INFO TO RUN ON YOUR LOCAL
host = "127.0.0.1"
port = "5432"
# database = "MSDS691"
database = 'msds691'
user = "postgres"


In [41]:
import psycopg2
import pandas as pd
import numpy as np

conn = psycopg2.connect(host=host, port=port, database=database, user=user)
cur = conn.cursor()


In [42]:
def select_query(query):
    df = pd.read_sql_query(query, conn, coerce_float=False)
    df.replace([None], np.nan, inplace=True)

    return df


In [43]:
from IPython.core.display import display, HTML


def display_side_by_side(dfs: list, captions: list):
    """Display tables side by side to save vertical space
    Input:
        dfs: list of pandas.DataFrame
        captions: list of table captions
    """

    output = ""
    combined = dict(zip(captions, dfs))
    for caption, df in combined.items():
        output += df.style.set_table_attributes("style='display:inline'")\
                    .set_caption(caption)._repr_html_()
        output += "\xa0\xa0\xa0"
    display(HTML(output))


In [44]:
cur.execute(f'''DROP TABLE IF EXISTS names;''')
cur.execute(f'''DROP TABLE IF EXISTS transactions;''')


In [45]:
create = f'''

DROP TABLE IF EXISTS names;

CREATE TABLE names
(id INTEGER,
name VARCHAR,
PRIMARY KEY (id))
;

INSERT INTO names
VALUES
(1, 'Jon Smith'),
(2, 'Sarah Adams'),
(3, 'Maria Lopez')
;'''

cur.execute(create)
conn.commit()

create = f'''

DROP TABLE IF EXISTS transactions;

CREATE TABLE transactions
(id INTEGER,
 amount NUMERIC,
 PRIMARY KEY (id))
;

INSERT INTO transactions
VALUES
(1, 10),
(3, 20),
(7, 50)
;'''

cur.execute(create)
conn.commit()


In [46]:
query = f'''

SELECT *
FROM names

;'''
df_names = select_query(query)
df_names


Unnamed: 0,id,name
0,1,Jon Smith
1,2,Sarah Adams
2,3,Maria Lopez


In [47]:
query = f'''

SELECT *
FROM transactions

;'''
df_transactions = select_query(query)
df_transactions


Unnamed: 0,id,amount
0,1,10
1,3,20
2,7,50


### Inner Join

In [11]:
query = f'''

SELECT  names.id, names.name, transactions.amount
FROM names INNER JOIN transactions
    ON (names.id = transactions.id)

;'''
df_inner_join = select_query(query)
display_side_by_side([df_names, df_transactions, df_inner_join],
                     ['Left Table', 'Right Table', 'Inner Join Output'])


Unnamed: 0,id,name
0,1,Jon Smith
1,2,Sarah Adams
2,3,Maria Lopez

Unnamed: 0,id,amount
0,1,10
1,3,20
2,7,50

Unnamed: 0,id,name,amount
0,1,Jon Smith,10
1,3,Maria Lopez,20


### Left Join

In [12]:
query = f'''

SELECT  names.id, names.name, transactions.amount
FROM names LEFT JOIN transactions
    ON (names.id = transactions.id)

;'''
df_left_join = select_query(query)
display_side_by_side([df_names, df_transactions, df_left_join],
                     ['Left Table', 'Right Table', 'Left Join Output'])


Unnamed: 0,id,name
0,1,Jon Smith
1,2,Sarah Adams
2,3,Maria Lopez

Unnamed: 0,id,amount
0,1,10
1,3,20
2,7,50

Unnamed: 0,id,name,amount
0,1,Jon Smith,10.0
1,2,Sarah Adams,
2,3,Maria Lopez,20.0


### Right Join

In [14]:
query = f'''

SELECT  transactions.id, names.name, transactions.amount
FROM names RIGHT JOIN transactions
    ON (names.id = transactions.id)

;'''
df_right_join = select_query(query)
display_side_by_side([df_names, df_transactions, df_right_join],
                     ['Left Table', 'Right Table', 'Right Join Output'])


Unnamed: 0,id,name
0,1,Jon Smith
1,2,Sarah Adams
2,3,Maria Lopez

Unnamed: 0,id,amount
0,1,10
1,3,20
2,7,50

Unnamed: 0,id,name,amount
0,1,Jon Smith,10
1,3,Maria Lopez,20
2,7,,50


### Full Join

In [15]:
query = f'''

SELECT COALESCE(names.id,transactions.id) as id,
       names.name, transactions.amount
FROM names FULL JOIN transactions
    ON (names.id = transactions.id)

;'''
df_full_join = select_query(query)
display_side_by_side([df_names, df_transactions, df_full_join],
                     ['Left Table', 'Right Table', 'Full Join Output'])


Unnamed: 0,id,name
0,1,Jon Smith
1,2,Sarah Adams
2,3,Maria Lopez

Unnamed: 0,id,amount
0,1,10
1,3,20
2,7,50

Unnamed: 0,id,name,amount
0,1,Jon Smith,10.0
1,2,Sarah Adams,
2,3,Maria Lopez,20.0
3,7,,50.0


### Cross Join

In [29]:
import pandas.io.sql as sqlio

In [32]:
query = f'''

SELECT *
FROM names CROSS JOIN transactions

;'''
# df_cross_join = select_query(query)
# display_side_by_side([df_names, df_transactions, df_cross_join],
# ['Left Table', 'Right Table', 'Cross Join Output'])
dat = sqlio.read_sql_query(query, conn)
dat


Unnamed: 0,id,name,id.1,amount
0,1,Jon Smith,1,10.0
1,1,Jon Smith,3,20.0
2,1,Jon Smith,7,50.0
3,2,Sarah Adams,1,10.0
4,2,Sarah Adams,3,20.0
5,2,Sarah Adams,7,50.0
6,3,Maria Lopez,1,10.0
7,3,Maria Lopez,3,20.0
8,3,Maria Lopez,7,50.0


### Add dob_table

In [33]:
create = f'''
DROP TABLE IF EXISTS dob_table;


CREATE TABLE dob_table
(id INTEGER,
 dob date,
 PRIMARY KEY (id));

INSERT INTO dob_table
VALUES
(1, '1982-09-29'),
(3, '1996-02-16')
;'''

cur.execute(create)
conn.commit()


### Select all dob_table

In [34]:
query = f'''

SELECT *
FROM dob_table

;'''
df_dob = select_query(query)
df_dob


Unnamed: 0,id,dob
0,1,1982-09-29
1,3,1996-02-16


### Join 3 tables

In [35]:
query = f'''

SELECT  names.id, names.name, transactions.amount, dob_table.dob
FROM names
    INNER JOIN transactions
        ON (names.id = transactions.id)
    INNER JOIN dob_table
        ON (names.id = dob_table.id)

;'''
df_three_tables = select_query(query)
display_side_by_side([df_names, df_transactions, df_dob, df_three_tables],
                     ['names', 'transactions', 'dob', 'all three tables'])


Unnamed: 0,id,name
0,1,Jon Smith
1,2,Sarah Adams
2,3,Maria Lopez

Unnamed: 0,id,amount
0,1,10
1,3,20
2,7,50

Unnamed: 0,id,dob
0,1,1982-09-29
1,3,1996-02-16

Unnamed: 0,id,name,amount,dob
0,1,Jon Smith,10,1982-09-29
1,3,Maria Lopez,20,1996-02-16


### One-to-many Join

In [36]:
create = f'''

DROP TABLE IF EXISTS long_transactions;

CREATE TABLE long_transactions
(id INTEGER,
amount NUMERIC);

INSERT INTO long_transactions
VALUES
(1, 10),
(1, 50),
(2, 45),
(3, 20),
(3, 70),
(7, 50)
;'''
cur.execute(create)
conn.commit()


### Select all from long_transactions

In [37]:
query = f'''

SELECT  *
FROM long_transactions

;'''
df_long_tran = select_query(query)
df_long_tran


Unnamed: 0,id,amount
0,1,10
1,1,50
2,2,45
3,3,20
4,3,70
5,7,50


### One-to-many

In [38]:
query = f'''

SELECT  names.id, names.name, long_transactions.amount
FROM names LEFT JOIN long_transactions
    ON (names.id = long_transactions.id)

;'''
df_one_many = select_query(query)
display_side_by_side([df_names, df_long_tran, df_one_many],
                     ['names', 'transactions', 'One-to-many'])


Unnamed: 0,id,name
0,1,Jon Smith
1,2,Sarah Adams
2,3,Maria Lopez

Unnamed: 0,id,amount
0,1,10
1,1,50
2,2,45
3,3,20
4,3,70
5,7,50

Unnamed: 0,id,name,amount
0,1,Jon Smith,10
1,1,Jon Smith,50
2,2,Sarah Adams,45
3,3,Maria Lopez,20
4,3,Maria Lopez,70


In [48]:
cur.close()
conn.close()
