## Get Denormalized tables from 3NF normalized
The besics of modeling data from normalized to denormalized form

In [1]:
# Importing library
import psycopg2

In [2]:
# Connection
try:
    conn = psycopg2.connect("host=127.0.0.1 dbname=denormdb user=myuser password=password")
except psycopg2.Error as e:
    print("Error: Could not connect to database denormdb")
    print(e)

# Cursor 
try:
    cur = conn.cursor()
except psycopg2.Error as e:
    print("Error: Could not creat the cursor")
    print(e)

# Autocommit
conn.set_session(autocommit=True)

#### Let's start with our normalized (3NF) database set of tables 

`Table Name: transactions2 
column 0: transaction Id
column 1: Customer Name
column 2: Cashier Id
column 3: Year `

`Table Name: albums_sold
column 0: Album Id
column 1: Transaction Id
column 3: Album Name` 

`Table Name: employees
column 0: Employee Id
column 1: Employee Name `

`Table Name: sales
column 0: Transaction Id
column 1: Amount Spent`

<img src="images/table2.png" width="350" height="350"> <img src="images/table3.png" width="180" height="180">

In [6]:
# Creating tables
try:
    cur.execute("CREATE TABLE IF NOT EXISTS transactions2(\
    transaction_id INT, customer_name VARCHAR, cashier_id INT, year INT)")
    
    cur.execute("CREATE TABLE IF NOT EXISTS albums_sold(\
    album_id INT, transaction_id INT, album_name VARCHAR)")
    
    cur.execute("CREATE TABLE IF NOT EXISTS employees(\
    employee_id INT, employee_name VARCHAR)")
    
    cur.execute("CREATE TABLE IF NOT EXISTS sales(\
    transaction_id INT, amount_spent INT)")

except psycopg2.Error as e:
    print("Error: Could not create database")
    print(e)

In [7]:
# Inserting values
try:
    cur.execute("INSERT INTO transactions2(transaction_id, customer_name, cashier_id, year) \
    VALUES (%s, %s, %s, %s)", (1, "Amanda", 1, 2000))
    
    cur.execute("INSERT INTO transactions2(transaction_id, customer_name, cashier_id, year) \
    VALUES (%s, %s, %s, %s)", (2, "Toby", 1, 2000))
    
    cur.execute("INSERT INTO transactions2(transaction_id, customer_name, cashier_id, year) \
    VALUES (%s, %s, %s, %s)", (3, "Max", 2, 2018))
    
    cur.execute("INSERT INTO albums_sold(album_id, transaction_id, album_name) \
    VALUES (%s, %s, %s)", (1, 1, "Rubber_Soul"))
    
    cur.execute("INSERT INTO albums_sold(album_id, transaction_id, album_name) \
    VALUES (%s, %s, %s)", (2, 1, "Let It Be"))
    
    cur.execute("INSERT INTO albums_sold(album_id, transaction_id, album_name) \
    VALUES (%s, %s, %s)", (3, 2, "My Generation"))
    
    cur.execute("INSERT INTO albums_sold(album_id, transaction_id, album_name) \
    VALUES (%s, %s, %s)", (4, 3, "Meet the Beatles"))
    
    cur.execute("INSERT INTO albums_sold(album_id, transaction_id, album_name) \
    VALUES (%s, %s, %s)", (5, 3, "Help!"))
    
    cur.execute("INSERT INTO employees(employee_id, employee_name) \
    VALUES (%s, %s)", (1, "Sam"))
    
    cur.execute("INSERT INTO employees(employee_id, employee_name) \
    VALUES (%s, %s)", (2, "Bob"))
    
    cur.execute("INSERT INTO sales(transaction_id, amount_spent) \
    VALUES (%s, %s)", (1, 40))
    
    cur.execute("INSERT INTO sales(transaction_id, amount_spent) \
    VALUES (%s, %s)", (2, 19))
    
    cur.execute("INSERT INTO sales(transaction_id, amount_spent) \
    VALUES (%s, %s)", (3, 45))   
    
    
except psycopg2.Error as e:
    print("Error: Could not insert values")
    print(e)

In [11]:
# Printing tables
print("\n---- Table transactions2 ----")
cur.execute("SELECT * FROM transactions2")
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()
    
print("\n---- Table albums_sold ----")
cur.execute("SELECT * FROM albums_sold")
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()
    
print("\n---- Table employees ----")
cur.execute("SELECT * FROM employees")
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()
        
print("\n---- Table sales ----")
cur.execute("SELECT * FROM sales")
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()


---- Table transactions2 ----
(1, 'Amanda', 1, 2000)
(2, 'Toby', 1, 2000)
(3, 'Max', 2, 2018)

---- Table albums_sold ----
(1, 1, 'Rubber_Soul')
(2, 1, 'Let It Be')
(3, 2, 'My Generation')
(4, 3, 'Meet the Beatles')
(5, 3, 'Help!')

---- Table employees ----
(1, 'Sam')
(2, 'Bob')

---- Table sales ----
(1, 40)
(2, 19)
(3, 45)


### Lets say we need a query that gives:

`transaction_id
 customer_name
 cashier name
 year 
 albums sold
 amount sold` 

In [28]:
try:
    cur.execute("SELECT transactions2.transaction_id, customer_name, employee_name, year, album_name, amount_spent \
    FROM ((transactions2 JOIN albums_sold ON transactions2.transaction_id = albums_sold.transaction_id) \
    JOIN employees ON employees.employee_id = transactions2.cashier_id) \
    JOIN sales ON sales.transaction_id = transactions2.transaction_id;") 

except psycopg2.Error as e:
    print("Error: Could not execute the query")
    print(e)
    
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'Amanda', 'Sam', 2000, 'Rubber_Soul', 40)
(1, 'Amanda', 'Sam', 2000, 'Let It Be', 40)
(2, 'Toby', 'Sam', 2000, 'My Generation', 19)
(3, 'Max', 'Bob', 2018, 'Meet the Beatles', 45)
(3, 'Max', 'Bob', 2018, 'Help!', 45)


We had to perform a 3 way `JOIN` to get there. While it's great we had that flexibility, we need to remember that `JOINS` are slow and if we have a read heavy workload that required low latency queries we want to reduce the number of `JOINS`.  Let's think about denormalizing our normalized tables.

### Let's denormalize 

###  Query 1: `select transaction_id, customer_name, amount_spent FROM <min number of tables>`

One way to do this would be to do a JOIN on the `sales` and `transactions2` table but we want to minimize the use of `JOINS`.  

To reduce the number of tables, first add `amount_spent` to the `transactions` table so that you will not need to do a JOIN at all. 

`Table Name: transactions 
column 0: transaction Id
column 1: Customer Name
column 2: Cashier Id
column 3: Year
column 4: amount_spent`

<img src="images/table4.png" width="350" height="350">


In [30]:
# Creating table
try:
    cur.execute("CREATE TABLE IF NOT EXISTS transactions(\
    transaction_id INT, customer_name VARCHAR, cashier_id INT, year INT, amount_spent INT)")

except psycopg2.Error as e:
    print("Error: Could not create transactions TABLE")
    print(e)

In [32]:
# Inserting values
try:
    cur.execute("INSERT INTO transactions(transaction_id, customer_name, cashier_id, year, amount_spent) \
    VALUES (%s, %s, %s, %s, %s)", (1, "Amanda", 1, 2000, 40))
    
    cur.execute("INSERT INTO transactions(transaction_id, customer_name, cashier_id, year, amount_spent) \
    VALUES (%s, %s, %s, %s, %s)", (2, "Toby", 1, 2000, 19))
    
    cur.execute("INSERT INTO transactions(transaction_id, customer_name, cashier_id, year, amount_spent) \
    VALUES (%s, %s, %s, %s, %s)", (3, "Max", 2, 2018, 45))

except psycopg2.Error as e:
    print("Error: Could not insert values in transactions TABLE")
    print(e)

In [33]:
try:
    cur.execute("SELECT transaction_id, customer_name, amount_spent FROM transactions")
except psycopg2.Error as e:
    print("Error: Could not execute SELECT")
    print(e)
    
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'Amanda', 40)
(2, 'Toby', 19)
(3, 'Max', 45)


### Query 2: `select cashier_name, SUM(amount_spent) FROM <min number of tables> GROUP BY cashier_name` 

To avoid using any `JOINS`, first create a new table with just the information we need. 

`Table Name: cashier_sales
col: Transaction Id
Col: Cashier Name
Col: Cashier Id
col: Amount_Spent
`

<img src="images/table5.png" width="350" height="350">

In [35]:
# Creating tables
try:
    cur.execute("CREATE TABLE IF NOT EXISTS cashier_sales(\
    transaction_id INT, cashier_name VARCHAR, cashier_id INT, amount_spent INT);")

except psycopg2.Error as e:
    print("Error: Could not create cashier_sales TABLE")
    print(e)

In [36]:
# Inserting values
try:
    cur.execute("INSERT INTO cashier_sales(transaction_id, cashier_name, cashier_id, amount_spent) \
    VALUES (%s, %s, %s, %s)", (1, "Sam", 1, 40))
    
    cur.execute("INSERT INTO cashier_sales(transaction_id, cashier_name, cashier_id, amount_spent) \
    VALUES (%s, %s, %s, %s)", (2, "Sam", 1, 19))
    
    cur.execute("INSERT INTO cashier_sales(transaction_id, cashier_name, cashier_id, amount_spent) \
    VALUES (%s, %s, %s, %s)", (3, "Bob", 2, 45))

except psycopg2.Error as e:
    print("Error: Could not insert values")
    print(e)

In [37]:
# Run the query
try:
    cur.execute("SELECT cashier_name, SUM(amount_spent) FROM cashier_sales \
    GROUP BY cashier_name;")
except psycopg2.Error as e:
    print("Error: Could not run the query")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

('Sam', 59)
('Bob', 45)


### Drop the tables

In [38]:
try:
    cur.execute("DROP TABLE transactions2;")
    cur.execute("DROP TABLE albums_sold;")
    cur.execute("DROP TABLE employees;")
    cur.execute("DROP TABLE sales;")
    cur.execute("DROP TABLE transactions;")
    cur.execute("DROP TABLE cashier_sales;")

except psycopg2.Error as e:
    print("Error: Could not delete tables")
    print(e)

### Close cursor and connection

In [39]:
cur.close()
conn.close()