# Connecting to mysql server and query the data

## Import and Settings

In [1]:
# Imports
import os

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Function to close a sqlite db-connection
def check_conn(conn):
     try:
        conn.cursor()
        return True
     except Exception as ex:
        return False

# Show current working directory
print(os.getcwd())

e:\ZHAW\data_analytics\data_analytics_project\db


## Connect to database

In [2]:
# Imports
import pandas as pd
import mysql.connector

# Connect to database
conn = mysql.connector.connect(
    user = 'root',
    password = '1234',
    database = 'movie_database') 
cursor = conn.cursor()

# Read data from file
df = pd.read_csv('../IMDB_Merged_Movies.csv', sep=';', encoding='utf-8')

# Convert data to sql format
df = df.rename(columns={'runtime_(mins)': 'runtime'})

## Write data to table in db

In [3]:
# Create table in database if not already exists
cursor.execute('''DROP TABLE IF EXISTS movies_table''')
cursor.execute('''CREATE TABLE IF NOT EXISTS movies_table (
    title VARCHAR(255),
    genres VARCHAR(255),
    runtime_mins INT,
    age_rating INT,
    director VARCHAR(255),
    movie_rating FLOAT,
    votes_count INT,
    worldwide_lifetime_gross BIGINT,
    domestic_lifetime_gross BIGINT,
    foreign_lifetime_gross FLOAT,
    release_year INT
)''')

# Insert data into the SQL table
insert_query = '''
INSERT INTO movies_table
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
'''

for index, row in df.iterrows():
    cursor.execute(insert_query, tuple(row))
    
# Commit the transaction
conn.commit()

## Query the table

In [4]:
# Query the SQL-table
cursor.execute('''SELECT * from movies_table where genres like '%Crime%' limit 5''')

results = cursor.fetchall()

# Convert the result into a pandas dataframe
df_mysql = pd.DataFrame(results)
df_mysql.columns = df.columns

# Print the result
df_mysql

Unnamed: 0,title,genres,runtime,age_rating,director,movie_rating,votes_count,worldwide_lifetime_gross,domestic_lifetime_gross,foreign_lifetime_gross,release_year
0,the dark knight,"Action, Crime, Drama, Thriller",152,16,Christopher Nolan,9.0,2950798,1009053678,534987076,0.47,2008
1,pulp fiction,"Crime, Drama",154,16,Quentin Tarantino,8.9,2279743,213928762,107928762,0.496,1994
2,joker,"Crime, Drama, Thriller",122,16,Todd Phillips,8.4,1563871,1078958629,335477657,0.689,2019
3,the silence of the lambs,"Crime, Drama, Thriller",118,16,Jonathan Demme,8.6,1594731,272742922,130742922,0.521,1991
4,the green mile,"Crime, Drama, Fantasy, Mystery",189,12,Frank Darabont,8.6,1448376,286801374,136801374,0.523,1999


## Close db connection (if open)

In [5]:
# Close db connection (if open)
try:
    if check_conn(conn):
        conn.close()
    else:
        pass
except:
    pass

# Status (True = open, False = closed)
print(check_conn(conn))

False


### Jupyter notebook --footer info-- (please always provide this at the end of each submitted notebook)

In [6]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
NT
Windows | 10
Datetime: 2025-01-10 12:06:24
Python Version: 3.12.4
-----------------------------------
