# Assignment #9

In [1]:
from pandas import DataFrame
from DATA225utils import make_connection, dataframe_query

In [2]:
def make_table(table, sql):
    cursor_warehouse.execute(f"DROP TABLE IF EXISTS {table}")
    cursor_warehouse.execute(sql)

In [3]:
def display_table(table, order_by=''):
    sql = f"SELECT * FROM {table}"
    
    if order_by != '':
        sql = sql + " ORDER BY " + order_by
        
    _, df = dataframe_query(conn_warehouse, sql)    
    return df

## The dimensional model (data warehouse)

In [4]:
conn = make_connection(config_file = 'movie-warehouse.ini')
cursor = conn.cursor()

# movie dimension

In [5]:
cursor.execute('DROP TABLE IF EXISTS movie')

sql = ( """
        CREATE TABLE movie
        (
            Movie_ID INT, 
            Movie_Title VARCHAR(255), 
            Budget INT,
            RunTime INT,
            PRIMARY KEY (Movie_ID)
        )
        """
      )
cursor.execute(sql)

In [6]:
sql_Movie = ( """
              INSERT INTO movie VALUES
              (862, 'Toy Story', 30000000, 81.0), 
              (8844, 'Jumanji', 65000000, 104.0), 
              (949, 'Heat', 60000000, 170.0), 
              (710, 'GoldenEye', 58000000, 130.0),
              (1408, 'Cutthroat Island', 98000000, 119.0)
              """
          )
cursor.execute(sql_Movie)
conn.commit()

# users dimension

In [7]:
cursor.execute('DROP TABLE IF EXISTS users')

sql = ( """
        CREATE TABLE users
        (
            User_ID VARCHAR(255), 
            First_Name CHAR(255),  
            Last_Name CHAR(255),      
            User_Name VARCHAR(255),
            PRIMARY KEY (User_ID)
        )
        """
      )

cursor.execute(sql)

In [8]:
sql_Users = ( """
              INSERT INTO users VALUES
              ('111','Joseph','Chang','joseph123'),
              ('222','Shrey','Jain','shrey01'),
              ('333','Satyaprakash','Mishra','sp17mishra'),
              ('444','Justin','Wang','justinw002'),
              ('555','Ron','Mak','ronmak145')
              """
          )
cursor.execute(sql_Users)
conn.commit()

# production_location dimension

In [9]:
cursor.execute('DROP TABLE IF EXISTS production_location')

sql = ( """
        CREATE TABLE production_location
        (
            Country_Code CHAR(255), 
            Continent CHAR(255), 
            Country CHAR(255), 
            City CHAR(255),
            PRIMARY KEY (Country_Code)
        )
        """
      )

cursor.execute(sql)

In [10]:
sql_PL = ( """
              INSERT INTO production_location VALUES
              ('USA', 'North_America', 'United_States', 'New York'),
              ('UK', 'Europe', 'Great Britain', 'London'),
              ('CHI', 'Asia', 'China', 'Beijing'),
              ('FRA', 'Europe', 'France', 'Paris')
              
              """
          )
cursor.execute(sql_PL)
conn.commit()

# date dimension

In [11]:
cursor.execute('DROP TABLE IF EXISTS date')

sql = ( """
        CREATE TABLE date
        (
            FullDate DATE, 
            Year INT, 
            Month INT, 
            Day INT,
            PRIMARY KEY (FullDate)
        )
        """
      )

cursor.execute(sql)

In [12]:
sql_date = ( """
              INSERT INTO date VALUES
              ('2001-12-06', 2001, 12, 06),
              ('1994-10-13', 1994, 10, 13),
              ('2020-11-10', 2020, 11, 10),
              ('2021-01-01', 2021, 01, 01),
              ('2000-08-26', 2000, 08, 26)
              
              """
          )
cursor.execute(sql_date)
conn.commit()

# movie_fact Table

In [13]:
cursor.execute('DROP TABLE IF EXISTS movie_fact')

sql = ( """
        CREATE TABLE movie_fact
        (
            Movie_ID INT, 
            Country_Code CHAR(255), 
            FullDate DATE, 
            Revenue INT,
            PRIMARY KEY(Movie_ID, Country_Code, FullDate)
        )
        """
      )

cursor.execute(sql)

In [14]:
sql_movie_fact = ( """
              INSERT INTO movie_fact VALUES
              (710, 'CHI', '2000-08-26', 4000000), 
              (862, 'USA', '1994-10-13', 20000000),
              (8844, 'FRA', '2001-12-06', 1000000000)
              
              """
          )
cursor.execute(sql_movie_fact)
conn.commit()

# rating_fact Table

In [15]:
cursor.execute('DROP TABLE IF EXISTS rating_fact')

sql = ( """
        CREATE TABLE rating_fact
        (
            User_ID INT, 
            Movie_ID INT,  
            FullDate DATE, 
            Rating FLOAT,
            PRIMARY KEY(User_ID, Movie_ID, FullDate)
        )
        """
      )

cursor.execute(sql)

In [16]:
sql_RF = ( """
              INSERT INTO rating_fact VALUES
              ('12', 13, '2023-11-10', 5.6), 
              ('111', 2333, '2021-01-01', 3.2), 
              ('333', 1, '2000-08-26', 9.2)
              
              """
          )
cursor.execute(sql_RF)
conn.commit()

# Queries for movies_fact

## Query 1

In [17]:
sql = ( """
        SELECT mm.Movie_ID, mm.Movie_Title, mm.Budget, mw.Revenue, mw.Revenue - mm.Budget AS Profit  
        FROM movie mm, movie_fact mw, date md
        WHERE mm.Movie_ID = mw.Movie_ID
        AND   mw.FullDate = md.FullDate
        AND   md.FullDate = "1994-10-13";
        """
      )

cursor.execute(sql)
df1 = DataFrame(cursor.fetchall(), columns = ["Movie_ID", 'Movie_Title', 'Budget', 'Revenue', 'Profit'])
display(df1)


Unnamed: 0,Movie_ID,Movie_Title,Budget,Revenue,Profit
0,862,Toy Story,30000000,20000000,-10000000


In [18]:
# This query finds the the budget, revenue, and profit for a movie on October 13th, 1994. 
# We see that Movie ID 862 is shown.

## Query 2

In [19]:
sql = ( """
        SELECT mm.Movie_ID, mm.Movie_Title, mp.Country_Code, mp.City
        FROM movie mm, movie_fact mw, production_location mp
        WHERE mw.Country_Code = mp.Country_Code
        AND mw.Movie_ID = mm.Movie_ID
        AND mm.Movie_Title = "Goldeneye"
        """
      )

cursor.execute(sql)
df2 = DataFrame(cursor.fetchall(), columns = ["Movie_ID", 'Movie_Title', 'Country', 'City'])
display(df2)


Unnamed: 0,Movie_ID,Movie_Title,Country,City
0,710,GoldenEye,CHI,Beijing


In [20]:
# This query finds the production location for the movie GoldenEye. We see it was filmed in China. 
# We also see that GoldenEye is Movie_ID 710.

# Queries for rating_fact

## Query 1

In [21]:
sql = ( """
        SELECT rw.User_ID, rw.Movie_ID, rw.Rating, md.FullDate
        FROM users mu, rating_fact rw, date md
        WHERE mu.User_ID = rw.User_ID
        AND   rw.FullDate = md.FullDate
        AND   rw.FullDate = "2021-01-01"
        """
      )

cursor.execute(sql)
df3 = DataFrame(cursor.fetchall(), columns = ["User_ID", 'Movie_ID', 'Rating', 'Date'])
display(df3)

Unnamed: 0,User_ID,Movie_ID,Rating,Date
0,111,2333,3.2,2021-01-01


In [22]:
# This query finds the rating given on a specific date. In this case, the date is January 1st, 2021. 
# User_ID 111 gave this rating to Movie_ID 2333.

## Query 2

In [23]:
sql = ( """
        SELECT rw.User_ID, rw.Movie_ID, rw.Rating
        FROM users mu, rating_fact rw
        WHERE mu.User_ID = rw.User_ID
        AND mu.User_ID = "111"
        """
      )

cursor.execute(sql)
df4 = DataFrame(cursor.fetchall(), columns = ['User_ID', 'Movie_ID', 'Rating'])
display(df4)

Unnamed: 0,User_ID,Movie_ID,Rating
0,111,2333,3.2


In [24]:
# This query finds what are the movies rated by user 111. We see the user rated movie id 2333 as 3.2.