In [406]:
# Import dependencies
import pandas as pd
import numpy as np
import json
import re
import pymysql
from sqlalchemy import create_engine
from pprint import pprint

import warnings
warnings.filterwarnings("ignore")

In [405]:
!pip install pymysql

Collecting pymysql
  Downloading PyMySQL-0.10.1-py2.py3-none-any.whl (47 kB)
Installing collected packages: pymysql
Successfully installed pymysql-0.10.1


# 1. Extract

In [340]:
# Reading the movies_ratings csv file
df_ratings = pd.read_csv(r'Resources/movies_ratings.csv')
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [341]:
# Reading the movies_metadata csv file
df_movies = pd.read_csv(r'Resources\movies_metadata.csv')
df_movies.sample(2)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
3313,False,"{'id': 14377, 'name': 'Death Wish Collection',...",0,"[{'id': 53, 'name': 'Thriller'}, {'id': 28, 'n...",,26263,tt0092857,en,Death Wish 4: The Crackdown,After the death of his girlfriend's daughter f...,...,1987-06-11,0.0,99.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,This time it's war!,Death Wish 4: The Crackdown,False,5.6,51.0
15147,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 53, 'name...",,41597,tt0032842,en,Night Train to Munich,"When the Germans march into Prague, armour-pla...",...,1940-08-31,0.0,95.0,"[{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso...",Released,,Night Train to Munich,False,7.1,21.0


In [342]:
# Reading the wikipedia-movies json file
with open(r'Resources/wikipedia-movies.json',mode='r') as file:
    movies_wiki_raw = json.load(file)
len(movies_wiki_raw)

7311

# 2. Tranform

In [343]:
df_ratings.shape

(100004, 4)

In [344]:
df_ratings.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [345]:
df_ratings['timestamp'] = pd.to_datetime(df_ratings['timestamp'], unit = 's')

In [346]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,2009-12-14 02:52:24
1,1,1029,3.0,2009-12-14 02:52:59
2,1,1061,3.0,2009-12-14 02:53:02
3,1,1129,2.0,2009-12-14 02:53:05
4,1,1172,4.0,2009-12-14 02:53:25


In [347]:
df_movies.sample(2)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
25891,False,"{'id': 326795, 'name': 'Freche Mädchen Filmrei...",0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",http://www.frechemaedchen.film.de/,11752,tt1224151,de,Freche Mädchen,Three teen-aged girlfriends navigate multiple ...,...,2008-07-17,0.0,97.0,"[{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso...",Released,,Cheeky Girls,False,5.5,8.0
7319,False,,0,"[{'id': 99, 'name': 'Documentary'}]",,238952,tt0051561,fr,Du côté de la côte,"Tongue-in-cheek look at the French Riviera, es...",...,1958-01-01,0.0,25.0,"[{'iso_639_1': 'fr', 'name': 'Français'}]",Released,A humorous travelogue of the French Riviera.,Along the Coast,False,9.0,4.0


In [348]:
df_movies.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

In [349]:
df_movies.shape

(45466, 24)

In [350]:
# Checking null values percentage
percent = df_movies.isnull().sum()*100/len(df_movies)
total = df_movies.isnull().sum()
movies_null = pd.DataFrame({'Total':total, 'Percent':percent})
movies_null

Unnamed: 0,Total,Percent
adult,0,0.0
belongs_to_collection,40972,90.115691
budget,0,0.0
genres,0,0.0
homepage,37684,82.883913
id,0,0.0
imdb_id,17,0.037391
original_language,11,0.024194
original_title,0,0.0
overview,954,2.098271


In [351]:
# Keeping columns with null values less than 10% only
drop_list = movies_null[movies_null['Percent']>10].index.tolist()
df_movies.drop(drop_list, axis = 1, inplace = True)
df_movies.columns

Index(['adult', 'budget', 'genres', 'id', 'imdb_id', 'original_language',
       'original_title', 'overview', 'popularity', 'poster_path',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

In [352]:
df_movies.dtypes

adult                    object
budget                   object
genres                   object
id                       object
imdb_id                  object
original_language        object
original_title           object
overview                 object
popularity               object
poster_path              object
production_companies     object
production_countries     object
release_date             object
revenue                 float64
runtime                 float64
spoken_languages         object
status                   object
title                    object
video                    object
vote_average            float64
vote_count              float64
dtype: object

In [353]:
df_movies.adult.value_counts()

False                                                                                                                             45454
True                                                                                                                                  9
 Avalanche Sharks tells the story of a bikini contest that turns into a horrifying affair when it is hit by a shark avalanche.        1
 - Written by Ørnås                                                                                                                   1
 Rune Balot goes to a casino connected to the October corporation to try to wrap up her case once and for all.                        1
Name: adult, dtype: int64

In [354]:
# Keeping rows where adult column is false and dropping adult column
df_movies = df_movies[df_movies.adult == 'False'].drop('adult', axis = 'columns')

In [355]:
df_movies.video.value_counts()

False    45358
True        93
Name: video, dtype: int64

In [356]:
# Keeping rows with video is false
df_movies.video = df_movies.video == 'True'
df_movies.video.value_counts()

False    45454
Name: video, dtype: int64

In [357]:
# Converting columns to numeric types
df_movies.budget = df_movies.budget.astype(int)
df_movies.id = pd.to_numeric(df_movies.id, errors = 'raise')
df_movies.popularity = pd.to_numeric(df_movies.popularity, errors = 'raise')

In [358]:
# Converting column to datetime datatype
df_movies['release_date'] = pd.to_datetime(df_movies['release_date'])

In [359]:
df_movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 45454 entries, 0 to 45465
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   budget                45454 non-null  int32         
 1   genres                45454 non-null  object        
 2   id                    45454 non-null  int64         
 3   imdb_id               45437 non-null  object        
 4   original_language     45443 non-null  object        
 5   original_title        45454 non-null  object        
 6   overview              44500 non-null  object        
 7   popularity            45451 non-null  float64       
 8   poster_path           45068 non-null  object        
 9   production_companies  45451 non-null  object        
 10  production_countries  45451 non-null  object        
 11  release_date          45368 non-null  datetime64[ns]
 12  revenue               45451 non-null  float64       
 13  runtime         

In [360]:
movies_wiki_raw[0]

{'url': 'https://en.wikipedia.org/wiki/The_Adventures_of_Ford_Fairlane',
 'year': 1990,
 'imdb_link': 'https://www.imdb.com/title/tt0098987/',
 'title': 'The Adventures of Ford Fairlane',
 'Directed by': 'Renny Harlin',
 'Produced by': ['Steve Perry', 'Joel Silver'],
 'Screenplay by': ['David Arnott', 'James Cappe', 'Daniel Waters'],
 'Story by': ['David Arnott', 'James Cappe'],
 'Based on': ['Characters', 'by Rex Weiner'],
 'Starring': ['Andrew Dice Clay',
  'Wayne Newton',
  'Priscilla Presley',
  'Lauren Holly',
  'Morris Day',
  'Robert Englund',
  "Ed O'Neill"],
 'Narrated by': 'Andrew "Dice" Clay',
 'Music by': ['Cliff Eidelman', 'Yello'],
 'Cinematography': 'Oliver Wood',
 'Edited by': 'Michael Tronick',
 'Productioncompany ': 'Silver Pictures',
 'Distributed by': '20th Century Fox',
 'Release date': ['July 11, 1990', '(', '1990-07-11', ')'],
 'Running time': '102 minutes',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$20 million',
 'Box office': '$21.4 milli

In [361]:

def clean_movie(movie):
    """This function will take the each movie will combine alternate titles and change column names."""
    movie = dict(movie)
    alt_titles = {}
#     Combine alternate titles into one list alt_titles    
    for key in ['Also known as','Arabic','Cantonese','Chinese','French',
                    'Hangul','Hebrew','Hepburn','Japanese','Literally',
                    'Mandarin','McCune-Reischauer','Original title','Polish',
                    'Revised Romanization','Romanized','Russian',
                    'Simplified','Traditional','Yiddish']:
        if key in movie:
            alt_titles[key] = movie[key]
            movie.pop(key)
    if len(alt_titles) > 0:
        movie['alt_titles'] = alt_titles

    def change_column_name(old_name, new_name):
        """This function will replace the values of old name in new name and pop the old name from list"""
        if old_name in movie:
            movie[new_name] = movie.pop(old_name)

    change_column_name('Adaptation by', 'Writer(s)')
    change_column_name('Country of origin', 'Country')
    change_column_name('Directed by', 'Director')
    change_column_name('Distributed by', 'Distributor')
    change_column_name('Edited by', 'Editor(s)')
    change_column_name('Length', 'Running time')
    change_column_name('Original release', 'Release date')
    change_column_name('Music by', 'Composer(s)')
    change_column_name('Produced by', 'Producer(s)')
    change_column_name('Producer', 'Producer(s)')
    change_column_name('Productioncompanies ', 'Production company(s)')
    change_column_name('Productioncompany ', 'Production company(s)')
    change_column_name('Released', 'Release Date')
    change_column_name('Release Date', 'Release date')
    change_column_name('Screen story by', 'Writer(s)')
    change_column_name('Screenplay by', 'Writer(s)')
    change_column_name('Story by', 'Writer(s)')
    change_column_name('Theme music composer', 'Composer(s)')
    change_column_name('Written by', 'Writer(s)')

    return movie

In [362]:

def clean_movies_json(movies_wiki_json):
    # Using list comprehension to filter out Tv shows from Movies
    movies_wiki = [movie for movie in movies_wiki_raw
                   if ('Director' in movie or 'Directed by') in movie 
                   and 'imdb_link' in movie   
                   and 'No. of episodes' not in movie]
    
    # Using a list comprehension to clean each movie with function clean_movie
    clean_movies = [clean_movie(movie) for movie in movies_wiki]
    
    movies_wiki_df = pd.DataFrame(clean_movies)
    
    # Using regexp to extract id from link and drop duplicate rows from id
    movies_wiki_df['imdb_id'] = movies_wiki_df['imdb_link'].str.extract(r'(tt\d{7})')
    movies_wiki_df.drop_duplicates('imdb_id', inplace = True)
    
    # Using list comprehension to keep only columns with null values less than 10%
    wiki_columns_keeping = [column for column in movies_wiki_df.columns 
                            if movies_wiki_df[column].isnull().sum() < len(movies_wiki_df) * 0.9]
    movies_wiki_df = movies_wiki_df[wiki_columns_keeping]
    
    box_office = movies_wiki_df['Box office'].dropna()
    
    # Convert box_office to string values using lambda function
    box_office = box_office.apply(lambda x: ' '.join(x) if type(x) ==  list else x)
    
    # Using regular expressions to match two types of patterns in box office data
    type_one = r'\$\d+\.?\d*\s*[mb]illion'
    type_two = r'\$\d{1,3}(?:,\d{3})+'
    
    def parse_dollars(s):
        """ This function will parse the box_office column using regular expressions."""
        if type(s) != str:
            return np.nan
        # Matches only the input of type $###.# million
        if re.match(r'\$\s*\d+\.?\d*\s*milli?on', s, flags=re.IGNORECASE):
            s = re.sub('\$|\s|[a-zA-Z]', '', s)  # Removes the $ sign and ' million'
            value = float(s) * 10 ** 6  # Convert string to float and multiply by a million
            return value
        # Matches only the input of type $###.# billion
        elif re.match(r'\$\s*\d+\.?\d*\s*billi?on', s, flags=re.IGNORECASE):
            s = re.sub('\$|\s|[a-zA-Z]', '', s)  # Removes the $ sign and ' billion'
            value = float(s) * 10 ** 9  # Convert string to float and multiply by a billion
            return value
        # Matches only the input of type $###.# billion
        elif re.match(r'\$\s*\d{1,3}(?:[,\.]\d{3})+(?!\s[mb]illion)', s, flags=re.IGNORECASE):
            s = re.sub('\$|,', '', s)  # Removes $ dollar sign and commas
            value = float(s)  # Convert string to float
            return value
        else:
            return np.nan
    # Cleaning the box_office column using the parse_dollar function
    movies_wiki_df['box_office'] = box_office.str.extract(f'({type_one}|{type_two})', flags=re.IGNORECASE)[0].apply(parse_dollars)
    movies_wiki_df.drop("Box office", axis=1, inplace = True)
    
    # Cleaning the release_date column using regular expressions
    release_date = movies_wiki_df['Release date'].dropna()
    release_date = release_date.apply(lambda x: ' '.join(x) if type(x) == list else x)
    date_type_one = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s[123]\d,\s\d{4}'
    date_type_two = r'\d{4}.[01]\d.[123]\d'
    date_type_three = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{4}'
    date_type_four = r'\d{4}'
    movies_wiki_df['release_date'] = pd.to_datetime(release_date.str.extract(f'({date_type_one}|{date_type_two}|{date_type_three}|{date_type_four})')[0], infer_datetime_format=True)
    movies_wiki_df.drop('Release date', axis=1, inplace= True)
    
    # Cleaning the budget column using regular expressions and parse_dollars funtion with patterns used in box_office cleaning
    budget = movies_wiki_df['Budget'].dropna()
    budget = budget.apply(lambda x: ' '.join(x) if type(x) == list else x)
    budget = budget.str.replace(r'\$.*[-—–](?![a-z])', '$', regex=True)
    movies_wiki_df['budget'] = budget.str.extract(f'({type_one}|{type_two})', flags=re.IGNORECASE)[0].apply(parse_dollars)
    movies_wiki_df.drop('Budget', axis=1, inplace=True)
    
    # Cleaning running time column using regular expresssions
    running_time = movies_wiki_df['Running time'].dropna()
    running_time = running_time.apply(lambda x: ' '.join(x) if type(x) == list else x)
    running_time_extract = running_time.str.extract(r'(\d+)\s*ho?u?r?s?\s*(\d*)|(\d+)|s*m')
    running_time_extract = running_time_extract.apply(lambda x: pd.to_numeric(x, errors='coerce')).fillna(0)
    movies_wiki_df['running_time'] = running_time_extract.apply(lambda x: x[0]*60 + x[1] if x[2] == 0 else x[2], axis=1)
    movies_wiki_df.drop('Running time', axis=1, inplace = True)
    
    return movies_wiki_df

In [363]:
movies_wiki_df = clean_movies_json(movies_wiki_raw)

In [364]:
movies_wiki_df.columns

Index(['url', 'year', 'imdb_link', 'title', 'Based on', 'Starring',
       'Cinematography', 'Country', 'Language', 'Director', 'Distributor',
       'Editor(s)', 'Composer(s)', 'Producer(s)', 'Production company(s)',
       'Writer(s)', 'imdb_id', 'box_office', 'release_date', 'budget',
       'running_time'],
      dtype='object')

In [365]:
df_movies = pd.merge(df_movies, movies_wiki_df, on='imdb_id', suffixes=['_wiki','_movies.met'])
df_movies.head()

Unnamed: 0,budget_wiki,genres,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,...,Distributor,Editor(s),Composer(s),Producer(s),Production company(s),Writer(s),box_office,release_date_movies.met,budget_movies.met,running_time
0,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]",...,Buena Vista Pictures Distribution,"[Robert Gordon, Lee Unkrich]",Randy Newman,"[Ralph Guggenheim, Bonnie Arnold]","[Walt Disney Pictures, Pixar Animation Studios]","[John Lasseter, Pete Docter, Andrew Stanton, J...",373600000.0,1995-11-19,30000000.0,81.0
1,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...",...,TriStar Pictures,Robert Dalva,James Horner,"[Scott Kroopf, William Teitler]","[Interscope Communications, Teitler Film]","[Greg Taylor, Jim Strain, Chris Van Allsburg]",262800000.0,1995-12-15,65000000.0,104.0
2,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...",...,Warner Bros. Pictures,"[Billy Weber, Seth Flaum, Maryann Brandon]",Alan Silvestri,"[John Davis, Richard C. Berman]",,Mark Steven Johnson,71500000.0,1995-12-22,25000000.0,101.0
3,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,...,20th Century Fox,Richard Chew,"Kenneth ""Babyface"" Edmonds","[Terry McMillan, Ronald Bass, Deborah Schindle...",,"[Terry McMillan, Ronald Bass]",82000000.0,1995-12-22,16000000.0,124.0
4,0,"[{'id': 35, 'name': 'Comedy'}]",11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...",...,Buena Vista Pictures,"[Adam Bernardi, Stephen A. Rotter]",Alan Silvestri,Nancy Meyers,Touchstone Pictures,"[Nancy Meyers, Charles Shyer]",76600000.0,1995-01-01,30000000.0,106.0


In [366]:
df_movies = df_movies.drop(df_movies[(df_movies['release_date_wiki'] > '1996-01-01') & (df_movies['release_date_movies.met'] < '1965-01-01')].index)

In [367]:
df_movies['Language'].apply(lambda x: tuple(x) if type(x) == list else x).value_counts(dropna=False)

English                                  5480
NaN                                       134
(English, Spanish)                         68
(English, French)                          35
(English, Japanese)                        25
                                         ... 
(English, Jamaican Patois, French)          1
(English, Italian, Swedish)                 1
(English, Yiddish, Hebrew)                  1
(English, Mende, Spanish, Portuguese)       1
(English, Italian, French)                  1
Name: Language, Length: 198, dtype: int64

In [368]:
df_movies.drop(['title_wiki','release_date_wiki','Language','Production company(s)'], inplace=True, axis = 1)

In [369]:
def merge_common_data(df, movies_met_col, wiki_col):
    """This function merges the data from two common columns
    and drops the redundant column."""
    df[movies_met_col] = df.apply(lambda x: x[wiki_col] if x[movies_met_col] == 0 else x[movies_met_col], axis=1)
    df.drop(wiki_col, inplace=True, axis = 1)

In [370]:
merge_common_data(df_movies, 'runtime', 'running_time')
merge_common_data(df_movies, 'budget_movies.met', 'budget_wiki')
merge_common_data(df_movies, 'revenue', 'box_office')

In [380]:
df_movies.status.value_counts()

/kHaBqrrozaG7rj6GJg3sUCiM29B.jpg    2
/xGhDPrBz9mJN8CsIjA23jQSd3sc.jpg    2
/o3Im9nPLAgtlw1j2LtpMebAotSe.jpg    2
/qHznx2AVQKgS7W3ykLgd0x9l1TE.jpg    1
/3dvaBYcT76fH81QesDc5S7l1yan.jpg    1
                                   ..
/8BSVXtlIXveRrOT1lADaycs3KJW.jpg    1
/hsuD7X5sf430D5A1iSmDtRTA84p.jpg    1
/twn8J0uDkZXmSx702nkGJNVX23n.jpg    1
/bmRrkQPeCLuiiB6LmqiCreTAZUm.jpg    1
/3RcoBGv5DwDKP01GonTufZ8tWCn.jpg    1
Name: poster_path, Length: 6047, dtype: int64

In [374]:
df_movies = df_movies.loc[:, ['id', 'imdb_id', 'original_language', 'original_title',
       'overview', 'popularity', 'production_companies',
       'production_countries', 'revenue', 'runtime', 'spoken_languages',
        'vote_average', 'vote_count', 'url', 'year',
       'imdb_link', 'title_movies.met', 'Based on', 'Starring',
       'Cinematography', 'Country', 'Director', 'Distributor', 'Editor(s)',
       'Composer(s)', 'Producer(s)', 'Writer(s)', 'release_date_movies.met',
       'budget_movies.met']]

In [375]:
df_movies.rename({'title_movies.met':'title',
                  'id':'movie_id',
                  'url':'wikipedia_url',
                  'budget_movies.met':'budget',
                  'release_date_movies.met':'release_date'},
                   axis=1, inplace=True)

In [376]:
rating_counts = (df_ratings.groupby(['movieId','rating'], as_index=False).count() 
                .rename({'userId':'count'}, axis=1) 
                .pivot(index='movieId',columns='rating', values='count'))
rating_counts.columns = ['rating_' + str(col) for col in rating_counts.columns] 

In [377]:
df_movies = pd.merge(df_movies, rating_counts, left_on='movie_id', right_index=True, how='left')
df_movies[rating_counts.columns] = df_movies[rating_counts.columns].fillna(0)

In [379]:
df_movies.sample(2)

Unnamed: 0,movie_id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,revenue,...,rating_0.5,rating_1.0,rating_1.5,rating_2.0,rating_2.5,rating_3.0,rating_3.5,rating_4.0,rating_4.5,rating_5.0
74,40154,tt0113403,en,A Midwinter's Tale,Out of work actor Joe volunteers to help try a...,0.513391,/6u2N0gn6uUWF4cSdkXmVB8E8q2X.jpg,"[{'name': 'Castle Rock Entertainment', 'id': 9...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",469571.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4423,62522,tt1652287,en,35 and Ticking,"Centers around the lives of Victoria, Zenobia,...",1.658429,/c6dYyuaIDtgJpcx0XF2wxn1kUBK.jpg,[],[],113794.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 3. Load

In [414]:

# except Error as e:
#     print(e)

In [408]:
try:
    engine = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}"
                       .format(user="root",
                               pw="password",
                               db="movies_db"))
    print('Connected to MySQL Database')
except:
    print('Connection Not Established!')

Connected to MySQL Database


In [409]:
cur = conn.cursor()

In [410]:
cur.execute("CREATE DATABASE IF NOT EXISTS movies_db;")

In [411]:
cur.execute("SHOW DATABASES;")
pprint(cur.fetchall())

[('employees_db',),
 ('information_schema',),
 ('movies_db',),
 ('mydb',),
 ('mysql',),
 ('performance_schema',),
 ('sql_hr',),
 ('sql_inventory',),
 ('sql_invoicing',),
 ('sql_store',),
 ('sys',)]


In [413]:
df_movies.to_sql(name = 'movies_data', con = engine, if_exists = 'replace', index= False)

OperationalError: (pymysql.err.OperationalError) (1241, 'Operand should contain 1 column(s)')
[SQL: INSERT INTO movies_data (movie_id, imdb_id, original_language, original_title, overview, popularity, poster_path, production_companies, production_countries, revenue, runtime, spoken_languages, vote_average, vote_count, wikipedia_url, year, imdb_link, title, `Based on`, `Starring`, `Cinematography`, `Country`, `Director`, `Distributor`, `Editor(s)`, `Composer(s)`, `Producer(s)`, `Writer(s)`, release_date, budget, `rating_0.5`, `rating_1.0`, `rating_1.5`, `rating_2.0`, `rating_2.5`, `rating_3.0`, `rating_3.5`, `rating_4.0`, `rating_4.5`, `rating_5.0`) VALUES (%(movie_id)s, %(imdb_id)s, %(original_language)s, %(original_title)s, %(overview)s, %(popularity)s, %(poster_path)s, %(production_companies)s, %(production_countries)s, %(revenue)s, %(runtime)s, %(spoken_languages)s, %(vote_average)s, %(vote_count)s, %(wikipedia_url)s, %(year)s, %(imdb_link)s, %(title)s, %(Based on)s, %(Starring)s, %(Cinematography)s, %(Country)s, %(Director)s, %(Distributor)s, %(Editor(s))s, %(Composer(s))s, %(Producer(s))s, %(Writer(s))s, %(release_date)s, %(budget)s, %(rating_0.5)s, %(rating_1.0)s, %(rating_1.5)s, %(rating_2.0)s, %(rating_2.5)s, %(rating_3.0)s, %(rating_3.5)s, %(rating_4.0)s, %(rating_4.5)s, %(rating_5.0)s)]
[parameters: ({'movie_id': 862, 'imdb_id': 'tt0114709', 'original_language': 'en', 'original_title': 'Toy Story', 'overview': "Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's h ... (5 characters truncated) ...  Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.", 'popularity': 21.946943, 'poster_path': '/rhIRbceoE9lR4veEXuwCC2wARtG.jpg', 'production_companies': "[{'name': 'Pixar Animation Studios', 'id': 3}]", 'production_countries': "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': 373554033.0, 'runtime': 81.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}]", 'vote_average': 7.7, 'vote_count': 5415.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Toy_Story', 'year': 1995, 'imdb_link': 'https://www.imdb.com/title/tt0114709/', 'title': 'Toy Story', 'Based on': None, 'Starring': ['Tom Hanks', 'Tim Allen', 'Don Rickles', 'Jim Varney', 'Wallace Shawn', 'John Ratzenberger', 'Annie Potts', 'John Morris', 'Erik von Detten'], 'Cinematography': None, 'Country': 'United States', 'Director': 'John Lasseter', 'Distributor': 'Buena Vista Pictures Distribution', 'Editor(s)': ['Robert Gordon', 'Lee Unkrich'], 'Composer(s)': 'Randy Newman', 'Producer(s)': ['Ralph Guggenheim', 'Bonnie Arnold'], 'Writer(s)': ['John Lasseter', 'Pete Docter', 'Andrew Stanton', 'Joe Ranft'], 'release_date': datetime.datetime(1995, 11, 19, 0, 0), 'budget': 30000000.0, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0}, {'movie_id': 8844, 'imdb_id': 'tt0113497', 'original_language': 'en', 'original_title': 'Jumanji', 'overview': "When siblings Judy and Peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite Alan -- an adult who's  ... (97 characters truncated) ...  is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures.", 'popularity': 17.015539, 'poster_path': '/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg', 'production_companies': "[{'name': 'TriStar Pictures', 'id': 559}, {'name': 'Teitler Film', 'id': 2550}, {'name': 'Interscope Communications', 'id': 10201}]", 'production_countries': "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': 262797249.0, 'runtime': 104.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'fr', 'name': 'Français'}]", 'vote_average': 6.9, 'vote_count': 2413.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Jumanji_(film)', 'year': 1995, 'imdb_link': 'https://www.imdb.com/title/tt0113497/', 'title': 'Jumanji', 'Based on': ['Jumanji', 'by Chris Van Allsburg'], 'Starring': ['Robin Williams', 'Kirsten Dunst', 'David Alan Grier', 'Bonnie Hunt', 'Jonathan Hyde', 'Bebe Neuwirth'], 'Cinematography': 'Thomas E. Ackerman', 'Country': 'United States', 'Director': 'Joe Johnston', 'Distributor': 'TriStar Pictures', 'Editor(s)': 'Robert Dalva', 'Composer(s)': 'James Horner', 'Producer(s)': ['Scott Kroopf', 'William Teitler'], 'Writer(s)': ['Greg Taylor', 'Jim Strain', 'Chris Van Allsburg'], 'release_date': datetime.datetime(1995, 12, 15, 0, 0), 'budget': 65000000.0, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0}, {'movie_id': 15602, 'imdb_id': 'tt0113228', 'original_language': 'en', 'original_title': 'Grumpier Old Men', 'overview': "A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens  ... (29 characters truncated) ... t shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.", 'popularity': 11.7129, 'poster_path': '/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg', 'production_companies': "[{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]", 'production_countries': "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': 71500000.0, 'runtime': 101.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}]", 'vote_average': 6.5, 'vote_count': 92.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Grumpier_Old_Men', 'year': 1995, 'imdb_link': 'https://www.imdb.com/title/tt0113228/', 'title': 'Grumpier Old Men', 'Based on': None, 'Starring': ['Jack Lemmon', 'Walter Matthau', 'Ann-Margret', 'Sophia Loren', 'Kevin Pollak', 'Daryl Hannah'], 'Cinematography': 'Tak Fujimoto', 'Country': 'United States', 'Director': 'Howard Deutch', 'Distributor': 'Warner Bros. Pictures', 'Editor(s)': ['Billy Weber', 'Seth Flaum', 'Maryann Brandon'], 'Composer(s)': 'Alan Silvestri', 'Producer(s)': ['John Davis', 'Richard C. Berman'], 'Writer(s)': 'Mark Steven Johnson', 'release_date': datetime.datetime(1995, 12, 22, 0, 0), 'budget': 25000000.0, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0}, {'movie_id': 31357, 'imdb_id': 'tt0114885', 'original_language': 'en', 'original_title': 'Waiting to Exhale', 'overview': 'Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive "good man" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.', 'popularity': 3.859495, 'poster_path': '/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg', 'production_companies': "[{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]", 'production_countries': "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': 81452156.0, 'runtime': 127.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}]", 'vote_average': 6.1, 'vote_count': 34.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Waiting_to_Exhale', 'year': 1995, 'imdb_link': 'https://www.imdb.com/title/tt0114885/', 'title': 'Waiting to Exhale', 'Based on': ['Waiting for Exhale', 'by Terry McMillan'], 'Starring': ['Whitney Houston', 'Angela Bassett', 'Loretta Devine', 'Lela Rochon'], 'Cinematography': 'Toyomichi Kurita', 'Country': 'United States', 'Director': 'Forest Whitaker', 'Distributor': '20th Century Fox', 'Editor(s)': 'Richard Chew', 'Composer(s)': 'Kenneth "Babyface" Edmonds', 'Producer(s)': ['Terry McMillan', 'Ronald Bass', 'Deborah Schindler', 'Ezra Swerdlow'], 'Writer(s)': ['Terry McMillan', 'Ronald Bass'], 'release_date': datetime.datetime(1995, 12, 22, 0, 0), 'budget': 16000000.0, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0}, {'movie_id': 11862, 'imdb_id': 'tt0113041', 'original_language': 'en', 'original_title': 'Father of the Bride Part II', 'overview': "Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expec ... (20 characters truncated) ... nning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own.", 'popularity': 8.387519000000001, 'poster_path': '/e64sOI48hQXyru7naBFyssKFxVd.jpg', 'production_companies': "[{'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Touchstone Pictures', 'id': 9195}]", 'production_countries': "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': 76578911.0, 'runtime': 106.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}]", 'vote_average': 5.7, 'vote_count': 173.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Father_of_the_Bride_Part_II', 'year': 1995, 'imdb_link': 'https://www.imdb.com/title/tt0113041/', 'title': 'Father of the Bride Part II', 'Based on': ["Father's Little Dividend", 'by', 'Albert Hackett', 'Frances Goodrich'], 'Starring': ['Steve Martin', 'Diane Keaton', 'Martin Short', 'Kimberly Williams', 'George Newbern', 'Kieran Culkin'], 'Cinematography': ['Elliot Davis', 'William A. Fraker'], 'Country': 'United States', 'Director': 'Charles Shyer', 'Distributor': 'Buena Vista Pictures', 'Editor(s)': ['Adam Bernardi', 'Stephen A. Rotter'], 'Composer(s)': 'Alan Silvestri', 'Producer(s)': 'Nancy Meyers', 'Writer(s)': ['Nancy Meyers', 'Charles Shyer'], 'release_date': datetime.datetime(1995, 1, 1, 0, 0), 'budget': 30000000.0, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0}, {'movie_id': 949, 'imdb_id': 'tt0113277', 'original_language': 'en', 'original_title': 'Heat', 'overview': 'Obsessive master thief, Neil McCauley leads a top-notch crew on various insane heists throughout Los Angeles while a mentally unstable detective, Vin ... (37 characters truncated) ... Each man recognizes and respects the ability and the dedication of the other even though they are aware their cat-and-mouse game may end in violence.', 'popularity': 17.924927, 'poster_path': '/zMyfPUelumio3tiDKPffaUpsQTD.jpg', 'production_companies': "[{'name': 'Regency Enterprises', 'id': 508}, {'name': 'Forward Pass', 'id': 675}, {'name': 'Warner Bros.', 'id': 6194}]", 'production_countries': "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': 187436818.0, 'runtime': 170.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'es', 'name': 'Español'}]", 'vote_average': 7.7, 'vote_count': 1886.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Heat_(1995_film)', 'year': 1995, 'imdb_link': 'https://www.imdb.com/title/tt0113277/', 'title': 'Heat', 'Based on': None, 'Starring': ['Al Pacino', 'Robert De Niro', 'Val Kilmer', 'Tom Sizemore', 'Diane Venora', 'Amy Brenneman', 'Dennis Haysbert', 'Ashley Judd', 'Mykelti Williamson', 'Wes Studi', 'Ted Levine', 'William Fichtner', 'Natalie Portman', 'Tom Noonan', 'Jon Voight'], 'Cinematography': 'Dante Spinotti', 'Country': 'United States', 'Director': 'Michael Mann', 'Distributor': 'Warner Bros.', 'Editor(s)': ['Dov Hoenig', 'Pasquale Buba', 'William Goldenberg', 'Tom Rolf'], 'Composer(s)': 'Elliot Goldenthal', 'Producer(s)': ['Michael Mann', 'Art Linson'], 'Writer(s)': 'Michael Mann', 'release_date': datetime.datetime(1995, 12, 15, 0, 0), 'budget': 60000000.0, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 2.0, 'rating_2.5': 0.0, 'rating_3.0': 4.0, 'rating_3.5': 2.0, 'rating_4.0': 5.0, 'rating_4.5': 1.0, 'rating_5.0': 2.0}, {'movie_id': 11860, 'imdb_id': 'tt0114319', 'original_language': 'en', 'original_title': 'Sabrina', 'overview': 'An ugly duckling having undergone a remarkable change, still harbors feelings for her crush: a carefree playboy, but not before his business-focused brother has something to say about it.', 'popularity': 6.677277, 'poster_path': '/jQh15y5YB7bWz1NtffNZmRw0s9D.jpg', 'production_companies': "[{'name': 'Paramount Pictures', 'id': 4}, {'name': 'Scott Rudin Productions', 'id': 258}, {'name': 'Mirage Enterprises', 'id': 932}, {'name': 'Sandol ... (27 characters truncated) ... 2}, {'name': 'Constellation Entertainment', 'id': 14941}, {'name': 'Worldwide', 'id': 55873}, {'name': 'Mont Blanc Entertainment GmbH', 'id': 58079}]", 'production_countries': "[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': 87100000.0, 'runtime': 127.0, 'spoken_languages': "[{'iso_639_1': 'fr', 'name': 'Français'}, {'iso_639_1': 'en', 'name': 'English'}]", 'vote_average': 6.2, 'vote_count': 141.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Sabrina_(1995_film)', 'year': 1995, 'imdb_link': 'https://www.imdb.com/title/tt0114319/', 'title': 'Sabrina', 'Based on': None, 'Starring': ['Harrison Ford', 'Julia Ormond', 'Greg Kinnear', 'Nancy Marchand', 'John Wood'], 'Cinematography': 'Giuseppe Rotunno', 'Country': 'United States', 'Director': 'Sydney Pollack', 'Distributor': 'Paramount Pictures', 'Editor(s)': 'Fredric Steinkamp', 'Composer(s)': 'John Williams', 'Producer(s)': ['Sydney Pollack', 'Scott Rudin'], 'Writer(s)': ['Barbara Benedek', 'David Rayfiel'], 'release_date': datetime.datetime(1995, 12, 15, 0, 0), 'budget': 50000000.0, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0}, {'movie_id': 45325, 'imdb_id': 'tt0112302', 'original_language': 'en', 'original_title': 'Tom and Huck', 'overview': 'A mischievous young boy, Tom Sawyer, witnesses a murder by the deadly Injun Joe. Tom becomes friends with Huckleberry Finn, a boy with no future and  ... (54 characters truncated) ... ship or honoring an oath because the town alcoholic is accused of the murder. Tom and Huck go through several adventures trying to retrieve evidence.', 'popularity': 2.5611610000000002, 'poster_path': '/sGO5Qa55p7wTu7FJcX4H4xIVKvS.jpg', 'production_companies': "[{'name': 'Walt Disney Pictures', 'id': 2}]", 'production_countries': "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': 23900000.0, 'runtime': 97.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'de', 'name': 'Deutsch'}]", 'vote_average': 5.4, 'vote_count': 45.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Tom_and_Huck', 'year': 1995, 'imdb_link': 'https://www.imdb.com/title/tt0112302/', 'title': 'Tom and Huck', 'Based on': None, 'Starring': ['Jonathan Taylor Thomas', 'Brad Renfro'], 'Cinematography': 'Bobby Bukowski', 'Country': 'United States', 'Director': 'Peter Hewitt', 'Distributor': 'Buena Vista Pictures', 'Editor(s)': 'David Freeman', 'Composer(s)': 'Stephen Endelman', 'Producer(s)': ['Laurence Mark', 'Stephen Sommers'], 'Writer(s)': ['The Adventures of Tom Sawyer', 'by', 'Mark Twain'], 'release_date': datetime.datetime(1995, 12, 22, 0, 0), 'budget': None, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0}  ... displaying 10 of 6052 total bound parameter sets ...  {'movie_id': 43656, 'imdb_id': 'tt0276744', 'original_language': 'en', 'original_title': '13 Moons', 'overview': 'Can you solve the mystery of the 13 moons?', 'popularity': 2.4070009999999997, 'poster_path': '/k9jvqeXtc5AARclaBdvrcq7iSbk.jpg', 'production_companies': '[]', 'production_countries': '[]', 'revenue': None, 'runtime': 93.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}]", 'vote_average': 5.8, 'vote_count': 4.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/13_Moons', 'year': 2002, 'imdb_link': 'https://www.imdb.com/title/tt0276744/', 'title': '13 Moons', 'Based on': None, 'Starring': ['Jennifer Beals', 'Elizabeth Bracco', 'Steve Buscemi', 'Peter Dinklage', 'Sam Rockwell', 'Peter Stormare'], 'Cinematography': 'Phil Parmet', 'Country': 'United States', 'Director': 'Alexandre Rockwell', 'Distributor': ['13 Moons Productions LLC', 'Lot 47 Films'], 'Editor(s)': 'John David Allen', 'Composer(s)': ['Brian Kelly', 'Kevin Salem'], 'Producer(s)': ['Brandon Cole', 'Michael Din', 'David Kronemeyer'], 'Writer(s)': ['Brandon Cole', 'Alexandre Rockwell'], 'release_date': datetime.datetime(2002, 1, 1, 0, 0), 'budget': None, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0}, {'movie_id': 183088, 'imdb_id': 'tt0100130', 'original_language': 'en', 'original_title': 'Megaville', 'overview': 'In the future, national boundaries have been broken down and two giant super-states remain—the bleak, oppressive, and totalitarian "Hemisphere," and  ... (545 characters truncated) ... s in the Hemisphere and the people live in fear of the "CKS" (the secret police). All forms of media are illegal in the Hemisphere.  CC wikipedia.org', 'popularity': 0.9057620000000001, 'poster_path': '/k75xJfQYhF8UNuPmO57EW3xX1Pl.jpg', 'production_companies': '[]', 'production_countries': "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 'revenue': None, 'runtime': 95.0, 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}]", 'vote_average': 6.0, 'vote_count': 1.0, 'wikipedia_url': 'https://en.wikipedia.org/wiki/Megaville', 'year': 1990, 'imdb_link': 'https://www.imdb.com/title/tt0100130/', 'title': 'Megaville', 'Based on': None, 'Starring': ['Billy Zane', 'Kristen Cloke', 'Daniel J. Travanti', 'J.C. Quinn', 'Grace Zabriskie'], 'Cinematography': 'Zoltán David', 'Country': 'United States', 'Director': 'Peter Lehner', 'Distributor': 'Live Entertainment', 'Editor(s)': 'Pietro Scalia', 'Composer(s)': 'Stacy Widelitz', 'Producer(s)': ['Robert Michael Steloff', 'Peter Lehner', 'Cynthia Hill', 'Andres Pfäffli', 'Christina Schmidlin'], 'Writer(s)': ['Gordon Chavis', 'Peter Lehner'], 'release_date': datetime.datetime(1990, 1, 1, 0, 0), 'budget': None, 'rating_0.5': 0.0, 'rating_1.0': 0.0, 'rating_1.5': 0.0, 'rating_2.0': 0.0, 'rating_2.5': 0.0, 'rating_3.0': 0.0, 'rating_3.5': 0.0, 'rating_4.0': 0.0, 'rating_4.5': 0.0, 'rating_5.0': 0.0})]
(Background on this error at: http://sqlalche.me/e/13/e3q8)

In [385]:
df_movies.shape

(6052, 40)