## Import libraries

In [1]:
import psycopg2

## Create a connection to the database and get a cursor

In [2]:
try: 
    conn = psycopg2.connect("host=localhost dbname=udacity_db user=root password=root")
    conn.autocommit = True
except psycopg2.Error as e: 
    print("Error: Could not make connection to the Postgres database")
    print(e)
    
try: 
    cur = conn.cursor()
except psycopg2.Error as e: 
    print("Error: Could not get curser to the Database")
    print(e)

## Music store library

Let's imagine we have a table called music_store. 

Table Name: music_store  
column 0: transaction_id  
column 1: customer_name  
column 2: cashier_name  
column 3: year  
column 4: albums_purchased

In [3]:
create_table_query = """
CREATE TABLE IF NOT EXISTS music_store (
    transaction_id INT,
    customer_name VARCHAR,
    cashier_name VARCHAR,
    year INT,
    albums_purchased VARCHAR[]
);
"""

try: 
    cur.execute(create_table_query)
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

insert_query = """
INSERT INTO music_store (transaction_id, customer_name, cashier_name, year, albums_purchased)
VALUES (%s, %s, %s, %s, %s)
"""

try: 
    cur.execute(insert_query, (1, "Amanda", "Sam", 2000, ["Rubber Soul", "Let it Be"]))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute(insert_query, (2, "Toby", "Sam", 2000, ["My Generation"]))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_query, (3, "Max", "Bob", 2018, ["Meet the Beatles", "Help!"]))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
 
    
try: 
    cur.execute("SELECT * FROM music_store;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

(1, 'Amanda', 'Sam', 2000, ['Rubber Soul', 'Let it Be'])
(2, 'Toby', 'Sam', 2000, ['My Generation'])
(3, 'Max', 'Bob', 2018, ['Meet the Beatles', 'Help!'])


## Moving to 1st Normal Form (1NF)

This data has not been normalized. To get this data into 1st normal form, we will need to remove any collections or list of data. We need to break up the list of songs into individual rows.  

Table Name: music_store2  
column 0: transaction_id  
column 1: customer_name  
column 2: cashier_name  
column 3: year  
column 4: album_purchased

In [6]:
create_music_store2_query = """
CREATE TABLE IF NOT EXISTS music_store2 (
    transaction_id INT,
    customer_name VARCHAR,
    cashier_name VARCHAR,
    year INT,
    album_purchased VARCHAR
);
"""

try: 
    cur.execute(create_music_store2_query)
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

insert_music_store2 = """
INSERT INTO music_store2 (transaction_id, customer_name, cashier_name, year, album_purchased)
VALUES (%s, %s, %s, %s, %s)
"""
    
try: 
    cur.execute(insert_music_store2, (1, "Amanda", "Sam", 2000, "Rubber Soul"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute(insert_music_store2, (1, "Amanda", "Sam", 2000, "Let it Be"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute(insert_music_store2, (2, "Toby", "Sam", 2000, "My Generation"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute(insert_music_store2, (3, "Max", "Bob", 2018, "Meet the Beatles"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute(insert_music_store2, (3, "Max", "Bob", 2018, "Help!"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

    
try: 
    cur.execute("SELECT * FROM music_store2;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

(1, 'Amanda', 'Sam', 2000, 'Rubber Soul')
(1, 'Amanda', 'Sam', 2000, 'Let it Be')
(2, 'Toby', 'Sam', 2000, 'My Generation')
(3, 'Max', 'Bob', 2018, 'Meet the Beatles')
(3, 'Max', 'Bob', 2018, 'Help!')


## Moving to 2nd Normal Form (2NF)

We have moved our data to be in 1NF which is the first step in moving to 2nd Normal Form. Our table is not yet in 2nd Normal Form. While each of our records in our table is unique, our Primary key (transaction_id) is not unique. We need to break this up into two tables, transactions and albums sold. 

Table Name: transactions  
column 0: transaction_id  
column 1: customer_name  
column 2: cashier_name  
column 3: year  

Table Name: albums_sold  
column 0: album_id  
column 1: album_name  
column 2: transaction_id 

In [7]:
create_transactions_query = """
CREATE TABLE IF NOT EXISTS transactions (
    transaction_id INT,
    customer_name VARCHAR,
    cashier_name VARCHAR,
    year INT
);
"""

create_albums_sold_query = """
CREATE TABLE IF NOT EXISTS albums_sold (
    album_id INT,
    album_name VARCHAR,
    transaction_id INT
);
"""

try: 
    cur.execute(create_transactions_query)
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

try: 
    cur.execute(create_albums_sold_query)
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

insert_transactions_query = """
INSERT INTO transactions (transaction_id, customer_name, cashier_name, year)
VALUES (%s, %s, %s, %s)
"""

insert_albums_sold_query = """
INSERT INTO albums_sold (album_id, album_name, transaction_id)
VALUES (%s, %s, %s)
"""
    
try: 
    cur.execute(insert_transactions_query, (1, "Amanda", "Sam", 2000))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_transactions_query, (2, "Toby", "Sam", 2000))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_transactions_query, (3, "Max", "Bob", 2018))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute(insert_albums_sold_query, (1, "Rubber Soul", 1))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

try: 
    cur.execute(insert_albums_sold_query, (2, "Let it Be", 1))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_albums_sold_query, (3, "My Generation", 2))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_albums_sold_query, (4, "Meet the Beatles", 3))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_albums_sold_query, (5, "Help!", 3))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)


print("Table: transactions\n")
try: 
    cur.execute("SELECT * FROM transactions;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

print("\nTable: albums_sold\n")
try: 
    cur.execute("SELECT * FROM albums_sold;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)
row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

Table: transactions

(1, 'Amanda', 'Sam', 2000)
(2, 'Toby', 'Sam', 2000)
(3, 'Max', 'Bob', 2018)

Table: albums_sold

(1, 'Rubber Soul', 1)
(2, 'Let it Be', 1)
(3, 'My Generation', 2)
(4, 'Meet the Beatles', 3)
(5, 'Help!', 3)


Let's do a JOIN on these tables so we can get all the information we had in our first table. 

In [8]:
try: 
    cur.execute("""SELECT * FROM transactions t JOIN albums_sold a 
    ON t.transaction_id = a.transaction_id ;
    """)
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

(1, 'Amanda', 'Sam', 2000, 1, 'Rubber Soul', 1)
(1, 'Amanda', 'Sam', 2000, 2, 'Let it Be', 1)
(2, 'Toby', 'Sam', 2000, 3, 'My Generation', 2)
(3, 'Max', 'Bob', 2018, 4, 'Meet the Beatles', 3)
(3, 'Max', 'Bob', 2018, 5, 'Help!', 3)


## Moving to 3rd Normal Form (3NF)

Let's check our table for any transitive dependencies. Transactions can remove cashier_name to its own table, called employees, which will leave us with 3 tables.  


Table Name: transactions2  
column 0: transaction_id  
column 1: customer_name  
column 2: cashier_id  
column 3: year  

Table Name: albums_sold  
column 0: album_id  
column 1: album_name  
column 2: transaction_id

Table Name: employees  
column 0: employee_id  
column 1: employee_name

In [9]:
create_transactions2_query = """
CREATE TABLE IF NOT EXISTS transactions2 (
    transaction_id INT,
    customer_name VARCHAR,
    cashier_id INT,
    year INT
);
"""

create_employees_query = """
CREATE TABLE IF NOT EXISTS employees (
    employee_id INT,
    employee_name VARCHAR
);
"""

try: 
    cur.execute(create_transactions2_query)
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

try: 
    cur.execute(create_employees_query)
except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)


insert_transactions2_query = """
INSERT INTO transactions2 (transaction_id, customer_name, cashier_id, year)
VALUES (%s, %s, %s, %s)
"""

insert_employees_query = """
INSERT INTO employees (employee_id, employee_name)
VALUES (%s, %s)
"""

    
try: 
    cur.execute(insert_transactions2_query, (1, "Amanda", 1, 2000))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_transactions2_query, (2, "Toby", 1, 2000))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_transactions2_query, (3, "Max", 2, 2018))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_employees_query, (1, "Sam"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)
    
try: 
    cur.execute(insert_employees_query, (2, "Bob"))
except psycopg2.Error as e: 
    print("Error: Inserting Rows")
    print (e)

   

print("Table: transactions2\n")
try: 
    cur.execute("SELECT * FROM transactions2;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

print("\nTable: albums_sold\n")
try: 
    cur.execute("SELECT * FROM albums_sold;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

print("\nTable: employees\n")
try: 
    cur.execute("SELECT * FROM employees;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

Table: transactions2

(1, 'Amanda', 1, 2000)
(2, 'Toby', 1, 2000)
(3, 'Max', 2, 2018)

Table: albums_sold

(1, 'Rubber Soul', 1)
(2, 'Let it Be', 1)
(3, 'My Generation', 2)
(4, 'Meet the Beatles', 3)
(5, 'Help!', 3)

Table: employees

(1, 'Sam')
(2, 'Bob')


Let's do two JOIN on these 3 tables so we can get all the information we had in our first table.

In [10]:
try: 
    cur.execute("SELECT * FROM (transactions2 t JOIN employees e ON t.cashier_id = e.employee_id) \
                JOIN albums_sold a ON t.transaction_id = a.transaction_id;")
except psycopg2.Error as e: 
    print("Error: select *")
    print (e)

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

(1, 'Amanda', 1, 2000, 1, 'Sam', 1, 'Rubber Soul', 1)
(1, 'Amanda', 1, 2000, 1, 'Sam', 2, 'Let it Be', 1)
(2, 'Toby', 1, 2000, 1, 'Sam', 3, 'My Generation', 2)
(3, 'Max', 2, 2018, 2, 'Bob', 4, 'Meet the Beatles', 3)
(3, 'Max', 2, 2018, 2, 'Bob', 5, 'Help!', 3)


## DONE! We have Normalized our dataset!

## Drop the tables

In [11]:
try: 
    cur.execute("DROP table music_store")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table music_store2")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table albums_sold")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table employees")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table transactions")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)
try: 
    cur.execute("DROP table transactions2")
except psycopg2.Error as e: 
    print("Error: Dropping table")
    print (e)

## Close cursor and connection

In [12]:
cur.close()
conn.close()