In [1]:
import csv
import pandas as pd
from DATA225utils import make_connection

In [2]:
conn = make_connection(config_file = 'movies.ini')
cursor = conn.cursor()

# Tables

In [3]:
cursor.execute('DROP TABLE IF EXISTS Movie_Metadata')

sql = ( """
        CREATE TABLE Movie_Metadata
        (
          ReleaseDate DATE NOT NULL,
          IMDB_ID VARCHAR(255) NOT NULL,
          Original_Title VARCHAR(255) NOT NULL,
          Homepage VARCHAR(255),
          Budget INT NOT NULL,
          PRIMARY KEY (IMDB_ID)
        )
        """
      )

cursor.execute(sql)

In [4]:
cursor.execute('DROP TABLE IF EXISTS Users')

sql = ( """
        CREATE TABLE Users
        (
          User_ID INT NOT NULL,
          First_Name CHAR(255) NOT NULL,
          Last_Name CHAR(255) NOT NULL,
          User_Name VARCHAR(255) NOT NULL,
          PRIMARY KEY (First_Name, Last_Name, User_Name)
        )
        """
      )

cursor.execute(sql)

In [5]:
cursor.execute('DROP TABLE IF EXISTS Movie_Metadata_Genres')

sql = ( """
        CREATE TABLE Movie_Metadata_Genres
        (
          Genres CHAR(255) NOT NULL,
          IMDB_ID VARCHAR(255) NOT NULL,
          PRIMARY KEY (Genres, IMDB_ID),
          FOREIGN KEY (IMDB_ID) REFERENCES Movie_Metadata(IMDB_ID)
        )
        """
      )

cursor.execute(sql)

In [6]:
cursor.execute('DROP TABLE IF EXISTS Movie_Links')

sql = ( """
        CREATE TABLE Movie_Links
        (
          Movie_ID INT NOT NULL,
          TMDB_ID INT NOT NULL,
          IMDB_ID VARCHAR(255) NOT NULL,
          PRIMARY KEY (Movie_ID),
          FOREIGN KEY (IMDB_ID) REFERENCES Movie_Metadata(IMDB_ID)
        )
        """
      )

cursor.execute(sql)

In [7]:
cursor.execute('DROP TABLE IF EXISTS Movie_Ratings')

sql = ( """
        CREATE TABLE Movie_Ratings
        (
          Timestamp INT NOT NULL,
          Ratings FLOAT NOT NULL,
          User_ID INT NOT NULL,
          Receipt INT NOT NULL,
          Movie_ID INT NOT NULL,
          PRIMARY KEY (Receipt),
          FOREIGN KEY (Movie_ID) REFERENCES Movie_Links(Movie_ID)
        )
        """
      )

cursor.execute(sql)


In [8]:
cursor.execute('DROP TABLE IF EXISTS Rates')

sql = ( """
        CREATE TABLE Rates
        (
          Receipt INT NOT NULL,
          First_Name CHAR(255) NOT NULL,
          Last_Name CHAR(255) NOT NULL,
          User_Name VARCHAR(255) NOT NULL,
          PRIMARY KEY (Receipt, First_Name, Last_Name, User_Name),
          FOREIGN KEY (Receipt) REFERENCES Movie_Ratings(Receipt),
          FOREIGN KEY (First_Name, Last_Name, User_Name) REFERENCES Users(First_Name, Last_Name, User_Name)
        );
        """
      )

cursor.execute(sql)

# Insertions

In [9]:
sql_MovieMetadata = (   """
              INSERT INTO Movie_Metadata
              VALUES
                    ('1995-10-30', 'tt0114709','Toy Story','http://toystory.disney.com/toy-story',30000000),
                    ('1995-11-16', 'tt0113189', 'GoldenEye', 'http://www.mgm.com/view/movie/757/Goldeneye/', 58000000),
                    ('1995-10-27', 'tt0113627', 'Leaving Las Vegas', 'http://www.mgm.com/title_title.do?title_star=LEAVINGL', 3600000),
                    ('1995-09-22', 'tt0114369', 'Se7en', 'http://www.sevenmovie.com/', 33000000),
                    ('1995-07-19', 'tt0114814', 'The Usual Suspects', 'http://www.mgm.com/#/our-titles/2083/The-Usual-Suspects', 6000000);
              """
          )
cursor.execute(sql_MovieMetadata)
conn.commit()

In [10]:
sql_genre = ("""
            INSERT INTO Movie_Metadata_Genres
            VALUES
                ('Comedy', 'tt0114709'),
                ('Adventure', 'tt0113189'),
                ('Drama', 'tt0113627'),
                ('Crime', 'tt0114369'),
                ('Drama', 'tt0114814')
            """)
cursor.execute(sql_genre)
conn.commit()

In [11]:
sql_user = (   """
              INSERT INTO Users
              VALUES
                    ('111','Joseph','Chang','joseph123'),
                    ('222','Shrey','Jain','shrey01'),
                    ('333','Satyaprakash','Mishra','sp17mishra'),
                    ('444','Justin','Wang','justinw002'),
                    ('555','Ron','Mak','ronmak145');
              """
          )
cursor.execute(sql_user)
conn.commit()

In [16]:
sql_MovieLinks = (   """
              INSERT INTO Movie_Links VALUES
              (1, 862, 'tt0114709'),
              (10, 710, 'tt0113189'),
              (25, 451, 'tt0113627' ),
              (47, 807, 'tt0114369'),
              (50, 629, 'tt0114814')
              """
          )
cursor.execute(sql_MovieLinks)
conn.commit()

In [19]:
sql_movieratings = (   """
              INSERT INTO Movie_Ratings
              VALUES
                    ('1425941529','6.5','111','001','1'),
                    ('1425942435','5.5','222','002','10'),
                    ('1425941300','6.0','333','003','25'),
                    ('1425942007','8.0','444','004','47'),
                    ('1425942139','9.5','555','005','50');
              """
          )
cursor.execute(sql_movieratings)
conn.commit()

In [20]:
sql_rates = (   """
              INSERT INTO Rates
              VALUES
                    (001,'Joseph','Chang','joseph123'),
                    (002, 'Shrey','Jain','shrey01'),
                    (003,'Satyaprakash','Mishra','sp17mishra'),
                    (004,'Justin','Wang','justinw002'),
                    (005, 'Ron','Mak','ronmak145');
              """
          )
cursor.execute(sql_rates)
conn.commit()

# Alter Table w/ Named Constraint

In [22]:
sql_alter = ("""
            ALTER TABLE Movie_Metadata_Genres
            ADD CONSTRAINT genreID
            FOREIGN KEY (IMDB_ID) REFERENCES Movie_Metadata(IMDB_ID)
            """)
cursor.execute(sql_alter)
conn.commit()

# Dataframes

In [25]:
# DataFrame for Movies_Metadata
select_m = ("""
            SELECT * FROM Movie_Metadata
            """)
cursor.execute(select_m)
df2 = pd.DataFrame(cursor.fetchall(), columns = ["Release Date", "IMDB ID", "Title", "Homepage", "Budget"])
display(df2)
conn.commit()

Unnamed: 0,Release Date,IMDB ID,Title,Homepage,Budget
0,1995-11-16,tt0113189,GoldenEye,http://www.mgm.com/view/movie/757/Goldeneye/,58000000
1,1995-10-27,tt0113627,Leaving Las Vegas,http://www.mgm.com/title_title.do?title_star=L...,3600000
2,1995-09-22,tt0114369,Se7en,http://www.sevenmovie.com/,33000000
3,1995-10-30,tt0114709,Toy Story,http://toystory.disney.com/toy-story,30000000
4,1995-07-19,tt0114814,The Usual Suspects,http://www.mgm.com/#/our-titles/2083/The-Usual...,6000000


In [26]:
# DataFrame for Users
select_m = ("""
            SELECT * FROM Users
            """)
cursor.execute(select_m)
df2 = pd.DataFrame(cursor.fetchall(), columns = ["UserID", "First Name", "Last Name", "User Name"])
display(df2)
conn.commit()

Unnamed: 0,UserID,First Name,Last Name,User Name
0,111,Joseph,Chang,joseph123
1,444,Justin,Wang,justinw002
2,555,Ron,Mak,ronmak145
3,333,Satyaprakash,Mishra,sp17mishra
4,222,Shrey,Jain,shrey01


In [27]:
# DataFrame for Genres
select_m = ("""
            SELECT * FROM Movie_Metadata_Genres
            """)
cursor.execute(select_m)
df2 = pd.DataFrame(cursor.fetchall(), columns = ["Genre", "ID"])
display(df2)
conn.commit()

Unnamed: 0,Genre,ID
0,Adventure,tt0113189
1,Drama,tt0113627
2,Crime,tt0114369
3,Comedy,tt0114709
4,Drama,tt0114814


In [28]:
# DataFrame for Movie_Links
select_m = ("""
            SELECT * FROM Movie_Links
            """)
cursor.execute(select_m)
df2 = pd.DataFrame(cursor.fetchall(), columns = ["MovieID", "TMDB ID", "IMDB ID"])
display(df2)
conn.commit()

Unnamed: 0,MovieID,TMDB ID,IMDB ID
0,1,862,tt0114709
1,10,710,tt0113189
2,25,451,tt0113627
3,47,807,tt0114369
4,50,629,tt0114814


In [29]:
# DataFrame for Ratings
select_m = ("""
            SELECT * FROM Movie_Ratings
            """)
cursor.execute(select_m)
df2 = pd.DataFrame(cursor.fetchall(), columns = ["Timestamp", "Rating", "User ID", "Receipt", "Movie ID"])
display(df2)
conn.commit()

Unnamed: 0,Timestamp,Rating,User ID,Receipt,Movie ID
0,1425941529,6.5,111,1,1
1,1425942435,5.5,222,2,10
2,1425941300,6.0,333,3,25
3,1425942007,8.0,444,4,47
4,1425942139,9.5,555,5,50


In [30]:
# DataFrame for Rates
select_m = ("""
            SELECT * FROM Rates
            """)
cursor.execute(select_m)
df2 = pd.DataFrame(cursor.fetchall(), columns = ["Receipt", "First Name", "Last Name", "User Name"])
display(df2)
conn.commit()

Unnamed: 0,Receipt,First Name,Last Name,User Name
0,1,Joseph,Chang,joseph123
1,4,Justin,Wang,justinw002
2,5,Ron,Mak,ronmak145
3,3,Satyaprakash,Mishra,sp17mishra
4,2,Shrey,Jain,shrey01
