In [1]:
import csv
from DATA225utils import make_connection
import pandas as pd
import os
from configparser import ConfigParser
from mysql.connector import MySQLConnection

In [2]:
def read_config(config_file = 'config.ini', section = 'mysql'):
    """
    Read a configuration file config_file and the given section. 
    If successful, return the configuration as a dictionary,
    else raise an exception. 
    """
    parser = ConfigParser()
    
    # Does the configuration file exist?
    if os.path.isfile(config_file):
        parser.read(config_file)
    else:
        raise Exception(f"Configuration file '{config_file}' "
                        "doesn't exist.")
    
    config = {}
    
    # Does it have the right section?
    if parser.has_section(section):
        
        # Parse the configuration file.
        items = parser.items(section)
        
        # Construct the parameter dictionary.
        for item in items:
            config[item[0]] = item[1]
            
    else:
        raise Exception(f"Section '{section}' missing "
                        f"in configuration file '{config_file}'.")
    
    return config

In [3]:
db_config = read_config('zagi.ini')
db_config

{'host': 'localhost',
 'database': 'zig_sales',
 'user': 'root',
 'password': 'seekrit1'}

In [4]:
def make_connection(config_file = 'config.ini', section = 'mysql'):
    """
    Make a connection to a database with the configuration file
    config_file and the given section. If successful, return 
    the connection, else raise an exception.
    """
    try:
        db_config = read_config(config_file, section)            
        conn = MySQLConnection(**db_config)
        
        if conn.is_connected():
            return conn
                
    except Error as e:
        raise Exception(f'Connection failed.\n{e}')

In [5]:
conn = make_connection('zagi.ini')
conn

<mysql.connector.connection.MySQLConnection at 0x7fb8a0c8c0a0>

In [6]:
cursor = conn.cursor()
cursor

<mysql.connector.cursor.MySQLCursor at 0x7fb8a0c8cd00>

# Tables

In [7]:
cursor.execute('DROP TABLE IF EXISTS Movie_Metadata')

sql = ( """
        CREATE TABLE Movie_Metadata
        (
          ReleaseDate DATE NOT NULL,
          IMDB_ID VARCHAR(255) NOT NULL,
          Original_Title VARCHAR(255) NOT NULL,
          Homepage VARCHAR(255),
          Budget INT NOT NULL,
          PRIMARY KEY (IMDB_ID)
        )
        """
      )

cursor.execute(sql)

DatabaseError: 3730 (HY000): Cannot drop table 'movie_metadata' referenced by a foreign key constraint 'movie_metadata_genres_ibfk_1' on table 'Movie_Metadata_Genres'.

In [8]:
cursor.execute('DROP TABLE IF EXISTS Users')

sql = ( """
        CREATE TABLE Users
        (
          User_ID INT NOT NULL,
          First_Name CHAR(255) NOT NULL,
          Last_Name CHAR(255) NOT NULL,
          User_Name VARCHAR(255) NOT NULL,
          PRIMARY KEY (First_Name, Last_Name, User_Name)
        )
        """
      )

cursor.execute(sql)

DatabaseError: 3730 (HY000): Cannot drop table 'users' referenced by a foreign key constraint 'rates_ibfk_2' on table 'Rates'.

In [9]:
cursor.execute('DROP TABLE IF EXISTS Movie_Metadata_Genres')

sql = ( """
        CREATE TABLE Movie_Metadata_Genres
        (
          Genres CHAR(255) NOT NULL,
          IMDB_ID VARCHAR(255) NOT NULL,
          PRIMARY KEY (Genres, IMDB_ID),
          FOREIGN KEY (IMDB_ID) REFERENCES Movie_Metadata(IMDB_ID)
        )
        """
      )

cursor.execute(sql)

In [10]:
cursor.execute('DROP TABLE IF EXISTS Movie_Links')

sql = ( """
        CREATE TABLE Movie_Links
        (
          Movie_ID INT NOT NULL,
          TMDB_ID INT NOT NULL,
          IMDB_ID VARCHAR(255) NOT NULL,
          PRIMARY KEY (Movie_ID),
          FOREIGN KEY (IMDB_ID) REFERENCES Movie_Metadata(IMDB_ID)
        )
        """
      )

cursor.execute(sql)


DatabaseError: 3730 (HY000): Cannot drop table 'movie_links' referenced by a foreign key constraint 'movie_ratings_ibfk_1' on table 'Movie_Ratings'.

In [11]:
cursor.execute('DROP TABLE IF EXISTS Movie_Ratings')

sql = ( """
        CREATE TABLE Movie_Ratings
        (
          Timestamp INT NOT NULL,
          Ratings FLOAT NOT NULL,
          User_ID INT NOT NULL,
          Receipt INT NOT NULL,
          Movie_ID INT NOT NULL,
          PRIMARY KEY (Receipt),
          FOREIGN KEY (Movie_ID) REFERENCES Movie_Links(Movie_ID)
        )
        """
      )

cursor.execute(sql)

DatabaseError: 3730 (HY000): Cannot drop table 'movie_ratings' referenced by a foreign key constraint 'rates_ibfk_1' on table 'Rates'.

In [12]:
cursor.execute('DROP TABLE IF EXISTS Rates')

sql = ( """
        CREATE TABLE Rates
        (
          Receipt INT NOT NULL,
          First_Name CHAR(255) NOT NULL,
          Last_Name CHAR(255) NOT NULL,
          User_Name VARCHAR(255) NOT NULL,
          PRIMARY KEY (Receipt, First_Name, Last_Name, User_Name),
          FOREIGN KEY (Receipt) REFERENCES Movie_Ratings(Receipt),
          FOREIGN KEY (First_Name, Last_Name, User_Name) REFERENCES Users(First_Name, Last_Name, User_Name)
        );
        """
      )

cursor.execute(sql)

# Question 1

In [31]:
#Create and load a database table. Then write one or more INSERT INTO command
#with an embedded SELECT to create one or more tables from the first table. Use CASE
#with the SELECT to perform data transformation(s). Display the contents of the first table
#and the newly created table(s).

In [36]:
sql_MovieMetadata = (   """
              INSERT INTO Movie_Metadata
              VALUES
                    ('1995-10-30', 'tt0114709','Toy Story','http://toystory.disney.com/toy-story',30000000),
                    ('1995-11-16', 'tt0113189', 'GoldenEye', 'http://www.mgm.com/view/movie/757/Goldeneye/', 58000000),
                    ('1995-10-27', 'tt0113627', 'Leaving Las Vegas', 'http://www.mgm.com/title_title.do?title_star=LEAVINGL', 3600000),
                    ('1995-09-22', 'tt0114369', 'Se7en', 'http://www.sevenmovie.com/', 33000000),
                    ('1995-07-19', 'tt0114814', 'The Usual Suspects', 'http://www.mgm.com/#/our-titles/2083/The-Usual-Suspects', 6000000);
              """
          )
cursor.execute(sql_MovieMetadata)
conn.commit()

InternalError: Unread result found

In [37]:
cursor.execute('DROP TABLE IF EXISTS MM')

sql = ( """
        CREATE TABLE MM
        (
          ReleaseDate DATE NOT NULL,
          IMDB_ID VARCHAR(255) NOT NULL,
          Original_Title VARCHAR(255) NOT NULL,
          Budget CHAR(255) NOT NULL,
          PRIMARY KEY (IMDB_ID)
        )
        """
      )

cursor.execute(sql)

InternalError: Unread result found

In [38]:
sql = ("""
    INSERT INTO MM
    SELECT ReleaseDate, IMDB_ID, Original_Title, 
        CASE 
            WHEN Budget < 6000000 THEN 'Low'
            WHEN Budget > 6000000 THEN 'High'
            ELSE 'Right-Amount'
        END AS Budget
    FROM Movie_Metadata
    WHERE ReleaseDate > '1995-07-18'

    """)

cursor.execute(sql)
conn.commit()

InternalError: Unread result found

In [17]:
# Here we classified budget as Low: if less than 6000000 dollars, High: if more than 6000000 dollars, and Right-Amount: exactly 6000000 dollars.

In [35]:
select_vendor = ("""
                 SELECT * FROM MM
                 """
                  )
cursor.execute(select_vendor)
df1 = pd.DataFrame(cursor.fetchall(), columns = ['Release_Date', 'IMDB_ID', 'Original_Title', 'Budget'])
display(df1)

InternalError: Unread result found

# Question 2

In [19]:
#Use one or more aggregate functions with GROUP BY. Explain in a sentence or two what
#the nested query is supposed to do and display the result.

In [34]:
sql2 =  ("""
        SELECT Budget
        FROM MM
        """)

cursor.execute(sql2)

InternalError: Unread result found

In [25]:
select_vendor = ("""
                 SELECT * FROM MM
                 """
                  )
cursor.execute(select_vendor)
df1 = pd.DataFrame(cursor.fetchall(), columns = ['ReleaseDate', 'IMDB_ID', 'Original_Title', 'Budget'])
display(df1)

InternalError: Unread result found

# Question 3

In [None]:
#Use one or more aggregate functions with GROUP BY HAVING. Explain in a sentence or
#two what the nested query is supposed to do and display the result.

In [None]:
select_vendor = ("""
                 SELECT p.category_id, SUM(so.no_of_items)
                 FROM zig_sales.product as p
                 JOIN zig_sales.sold_via as so
                 ON p.product_id = so.product_id
                 GROUP BY p.category_id
                 """
                  )
cursor.execute(select_vendor)
df1 = pd.DataFrame(cursor.fetchall(), columns = ['Category_id', 'Total Number of items'])
display(df1)
conn.commit()

# Question 4

In [None]:
#Write a SELECT query with a nested SELECT. Explain in a sentence or two what the
#nested query is supposed to do and display the result.

In [None]:
select_vendor = ("""
                 SELECT tid, SUM(no_of_items)
                 FROM zig_sales.sold_via
                 GROUP BY tid
                 HAVING SUM(no_of_items) > 5
                 """
                  )
cursor.execute(select_vendor)
df1 = pd.DataFrame(cursor.fetchall(), columns = ['tid', 'Total number_of_items'])
display(df1)
conn.commit()

# Question 5

In [None]:
# Perform a left outer join between two tables and display the result.

In [None]:
select_vendor = ("""
                 SELECT p.product_id, p.product_name
                 FROM zig_sales.product as p
                 WHERE p.product_price = (SELECT MIN(p.product_price) 
                                          FROM zig_sales.product as p)
                 
                 """
                  )
cursor.execute(select_vendor)
df1 = pd.DataFrame(cursor.fetchall(), columns = ['product_id', 'product_name'])
display(df1)
conn.commit()

# Question 6

In [None]:
# Perform a right outer join between two tables and display the result.

In [None]:
select_vendor = ("""
                 SELECT p.product_id
                 FROM zig_sales.product as p
                 JOIN zig_sales.sold_via as s
                 ON p.product_id = s.product_id
                 WHERE s.no_of_items = (SELECT MAX(s.no_of_items) 
                                          FROM zig_sales.sold_via as s)
                 
                 """
                  )
cursor.execute(select_vendor)
df1 = pd.DataFrame(cursor.fetchall(), columns = ['product_id'])
display(df1)
conn.commit()

# Question 7

In [None]:
# Perform a full outer join between two tables and display the result.

In [None]:
# given query
select_vendor = ("""
                SELECT product_id, product_name, product_price
                FROM product
                WHERE product_id IN (SELECT product_id
                                     FROM sold_via
                                     GROUP BY product_id
                                     HAVING SUM(no_of_items) > 3)
                 
                 """
                  )
cursor.execute(select_vendor)
df1 = pd.DataFrame(cursor.fetchall())
display(df1)
conn.commit()

# Question 8

In [None]:
# Create a view and display its contents. Use the view in a join with other table(s) and display the result.

In [None]:
# given query
select_vendor = ("""
                SELECT product_id, product_name, product_price
                FROM product
                WHERE product_id IN (SELECT product_id
                                     FROM sold_via
                                     GROUP BY product_id
                                     HAVING COUNT(*) > 1)
                 
                 """
                  )
cursor.execute(select_vendor)
df1 = pd.DataFrame(cursor.fetchall())
display(df1)
conn.commit()