In [35]:
import csv
import os
import pandas as pd
from configparser import ConfigParser
from mysql.connector import MySQLConnection, Error

# Utils Functions

In [36]:
def read_config(config_file = 'config.ini', section = 'mysql'):
    """
    Read the configuration file config_file with the given section.
    If successful, return the configuration as a dictionary,
    else raise an exception.
    """
    parser = ConfigParser()
    
    # Does the configuration file exist?
    if os.path.isfile(config_file):
        parser.read(config_file)
    else:
        raise Exception(f"Configuration file '{config_file}' "
                        "doesn't exist.")
    
    config = {}
    
    if parser.has_section(section):
        # Parse the configuration file.
        items = parser.items(section)
        
        # Construct the parameter dictionary.
        for item in items:
            config[item[0]] = item[1]
            
    else:
        raise Exception(f'Section [{section}] missing ' + \
                        f'in config file {config_file}')
    
    return config

In [37]:
def make_connection(config_file = 'config.ini', section = 'mysql'):
    """
    Make a database connection with the configuration file config_file
    with the given section. If successful, return the connection,
    else raise an exception.
    """
    try:
        db_config = read_config(config_file, section)
        conn = MySQLConnection(**db_config)

        if conn.is_connected():
            return conn

    except Error as e:
        raise Exception(f'Connection failed: {e}')


In [38]:
def dataframe_query(conn, sql):
    """
    Use the database connection conn to execute
    the SQL code. Return the resulting row count
    and the rows as a dataframe or (0, None) 
    if there were no rows. If the query failed,
    raise an exception.
    """
    try:
        cursor = conn.cursor()
        cursor.execute(sql)

        rows  = cursor.fetchall()
        count = cursor.rowcount

        if count > 0:

            # Get the names of the columns.
            columns = cursor.description
            column_names = [column_info[0] 
                            for column_info in columns]

            # Return the query results in a dataframe.
            df = DataFrame(rows)
            df.columns = column_names
            cursor.close()
            return count, df

        else:
            cursor.close()
            return 0, None
        
    except Error as e:
        raise Exception(f'Query failed: {e}')

# Making Connection

In [41]:
conn = make_connection(config_file='movies.ini')
cursor = conn.cursor()

# Creating Tables

#### We will use three of the csv files and convert to tables. Those three are links_small, ratings, and movies_metadata.

In [42]:
cursor.execute('DROP TABLE IF EXISTS links_small')

sql = ( """
        CREATE TABLE links_small
        (
            movieId    int,
            imdbId   int,
            tmdbId    int,
            PRIMARY KEY(movieId)
        )
        """
      )

cursor.execute(sql)

The Primary key for this table is movieId column. The foreign key is the ...

In [43]:
# added an AUTO_INCREMENT column (also known as index) to be set as a primary key since userId has duplicates

cursor.execute('DROP TABLE IF EXISTS ratings')

sql = ( """
        CREATE TABLE ratings
        (
            ID_column int AUTO_INCREMENT,
            userId    int,
            movieId   int,
            rating    double,
            timestamp double,
            PRIMARY KEY(ID_column)
        )
        """
      )

cursor.execute(sql)

The Primary key for this table is ID_Column, which is also the index. The foreign key is the ....

In [44]:
# Could use.
cursor.execute('DROP TABLE IF EXISTS movies_metadata')
sql = ( """
        CREATE TABLE movies_metadata
        (
            budget int,
            id int,
            imdb_id varchar(255),
            original_language char(255),
            original_title varchar(255),
            PRIMARY KEY(id)
        )
        """
      )
cursor.execute(sql)

The Primary key for this table is the id column. The foreign key is the ...

# Insert Commands

In [45]:
sql_links_small = ("""
                   INSERT INTO links_small
                   VALUES (%s,%s,%s)
                   """
                  )
sql_ratings = ("""
                INSERT INTO ratings (userId, movieId, rating, timestamp)
                VALUES (%s,%s,%s,%s)
                """
              )
sql_metadata = ("""
                INSERT INTO movies_metadata
                VALUES (%s,%s,%s,%s,%s)
                """
               )

# Inserting Data into the Tables

In [48]:
count = 0
first = True
with open('links_small.csv', newline='') as csv_file:
    data = csv.reader(csv_file,delimiter = ',', quotechar='"')
    for row in data:
        if not first:
            count+=1
            if count > 100:
                break
            cursor.execute(sql_links_small, row)
        first = False
conn.commit()

In [49]:
count = 0
first = True
with open('ratings.csv', newline='') as csv_file:
    data = csv.reader(csv_file,delimiter = ',', quotechar='"')
    for row in data:
        if not first:
            count+=1
            if count > 100:
                break
            cursor.execute(sql_ratings, row)
        first = False
conn.commit()

In [50]:
# Only want speciic columns?; Could -perform ETL and convert the language to its full name.
count = 0
first = True
testLis = [2, 5,6,7,8]
with open('movies_metadata.csv', newline='', encoding="utf-8") as csv_file:
    data = csv.reader(csv_file,delimiter = ',', quotechar='"')
    for row in data:
        if not first:
            test = [row[i] for i in testLis]
            count+=1
            if count > 100:
                break
            cursor.execute(sql_metadata, test)
        first = False
conn.commit()

# Display Tables in a DataFrame

In [70]:
select_links_small = ("""
                   SELECT * FROM links_small LIMIT 25
                   """
                  )
cursor.execute(select_links_small)
df1 = pd.DataFrame(cursor.fetchall(), columns = ["movieId","imdbId","tmdbId"])
display(df1)
conn.commit()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862
1,2,113497,8844
2,3,113228,15602
3,4,114885,31357
4,5,113041,11862
5,6,113277,949
6,7,114319,11860
7,8,112302,45325
8,9,114576,9091
9,10,113189,710


In [69]:
select_ratings = ("""
                   SELECT * FROM ratings LIMIT 25
                   """
                  )
cursor.execute(select_ratings)
df2 = pd.DataFrame(cursor.fetchall(), columns = ["Index", "userId", "movieId", "rating", "timestamp"])
display(df2)
conn.commit()

Unnamed: 0,Index,userId,movieId,rating,timestamp
0,1,1,110,1.0,1425942000.0
1,2,1,147,4.5,1425942000.0
2,3,1,858,5.0,1425942000.0
3,4,1,1221,5.0,1425942000.0
4,5,1,1246,5.0,1425942000.0
5,6,1,1968,4.0,1425942000.0
6,7,1,2762,4.5,1425941000.0
7,8,1,2918,5.0,1425942000.0
8,9,1,2959,4.0,1425942000.0
9,10,1,4226,4.0,1425942000.0


In [68]:
select_metadata = ("""
                   SELECT * FROM movies_metadata LIMIT 25
                   """
                  )
cursor.execute(select_metadata)
df3 = pd.DataFrame(cursor.fetchall(), columns = ["Budget", "id", "imdb_id", "original_language", "original_title"])
display(df3)
conn.commit()

Unnamed: 0,Budget,id,imdb_id,original_language,original_title
0,4000000,5,tt0113101,en,Four Rooms
1,29500000,63,tt0114746,en,Twelve Monkeys
2,8000000,400,tt0114660,en,Things to Do in Denver When You're Dead
3,3000000,406,tt0113247,fr,La Haine
4,3600000,451,tt0113627,en,Leaving Las Vegas
5,52000000,524,tt0112641,en,Casino
6,20000000,577,tt0114681,en,To Die For
7,6000000,629,tt0114814,en,The Usual Suspects
8,11000000,687,tt0112818,en,Dead Man Walking
9,58000000,710,tt0113189,en,GoldenEye


# One to One

In [66]:
# query to show the id and original title 
oto1 = (
        """
        SELECT m.id, m.original_title from movies_metadata as m, links_small as l
        WHERE m.id = l.movieId
        
        """
)
cursor.execute(oto1)
results = cursor.fetchall()
print(results)
conn.commit()

[(5, 'Four Rooms'), (63, 'Twelve Monkeys')]


# One to Many

In [67]:
# one to many
# This one might not work within the first 100 in the table.
otm1 = ("""
        SELECT l.movieId, r.rating from links_small as l, ratings as r
        WHERE l.movieId = r.movieId
        """)
cursor.execute(otm1)
results = cursor.fetchall()
print(results)
conn.commit()

[(5, 3.0), (25, 3.0), (32, 2.0), (58, 3.0), (64, 4.0), (79, 4.0)]


# Many to Many

# Closing Connection

In [31]:
cursor.close()
conn.close()

In [76]:

df_test = pd.read_csv('movies_metadata.csv')


df_test

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45461,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n...",http://www.imdb.com/title/tt6209470/,439050,tt6209470,fa,رگ خواب,Rising and falling between a man and woman.,...,,0.0,90.0,"[{'iso_639_1': 'fa', 'name': 'فارسی'}]",Released,Rising and falling between a man and woman,Subdue,False,4.0,1.0
45462,False,,0,"[{'id': 18, 'name': 'Drama'}]",,111109,tt2028550,tl,Siglo ng Pagluluwal,An artist struggles to finish his work while a...,...,2011-11-17,0.0,360.0,"[{'iso_639_1': 'tl', 'name': ''}]",Released,,Century of Birthing,False,9.0,3.0
45463,False,,0,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",,67758,tt0303758,en,Betrayal,"When one of her hits goes wrong, a professiona...",...,2003-08-01,0.0,90.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A deadly game of wits.,Betrayal,False,3.8,6.0
45464,False,,0,[],,227506,tt0008536,en,Satana likuyushchiy,"In a small town live two brothers, one a minis...",...,1917-10-21,0.0,87.0,[],Released,,Satan Triumphant,False,0.0,0.0
