In [1]:
from config import access_token_omdb3, access_token_tmdb
import time
import requests
import json
import pandas as pd
import ast
import numpy as np
from datetime import datetime
import re
import sqlite3

### Obtain List of Movie Titles and Prepare for API ###

In [2]:
# read Rotten Tomatoes csv file data to pandas dataframe
df = pd.read_csv('rotten_tomatoes.csv')

# pull the movie titles from the pandas dataframe into a list
movies_titles = df['Movie Title'].tolist()

In [3]:
# add '+' to movie titles for API query
movies = [movie.replace(' ','+') for movie in movies_titles]

In [4]:
# use regex to remove the content between the parantheses to create the final movie titles query list
movies_search = []
for movie in movies:
    movies_search.append(re.sub(r" ?\([^)]+\)", "", movie))

### Make API Calls ###

In [29]:
# test omdb call with one ping first
def omdb_call():
    
    url = 'https://www.omdbapi.com/'
    
    params = {'t': 'Chicken+Run',
                    'type': 'movie',
                    'plot': 'short',
                     'apikey': access_token_omdb1}
        
    # make api call to obtain data
    response = requests.get(url, params=params)
            
    return response.json()

In [30]:
omdb_call()

{'Title': 'Chicken Run',
 'Year': '2000',
 'Rated': 'G',
 'Released': '23 Jun 2000',
 'Runtime': '84 min',
 'Genre': 'Animation, Adventure, Comedy, Drama, Family',
 'Director': 'Peter Lord, Nick Park',
 'Writer': "Peter Lord (original story), Nick Park (original story), Karey Kirkpatrick (screenplay), Mark Burton (additional dialogue), John O'Farrell (additional dialogue)",
 'Actors': 'Phil Daniels, Lynn Ferguson, Mel Gibson, Tony Haygarth',
 'Plot': 'When a cockerel apparently flies into a chicken farm, the chickens see him as an opportunity to escape their evil owners.',
 'Language': 'English',
 'Country': 'UK, USA, France',
 'Awards': 'Nominated for 1 Golden Globe. Another 23 wins & 25 nominations.',
 'Poster': 'https://m.media-amazon.com/images/M/MV5BY2UyYjFkNzAtYzIyMC00MGI1LTlkNDktNzUyOGQ5NTI2ZGFjXkEyXkFqcGdeQXVyNTUyMzE4Mzg@._V1_SX300.jpg',
 'Ratings': [{'Source': 'Internet Movie Database', 'Value': '7.0/10'},
  {'Source': 'Rotten Tomatoes', 'Value': '97%'},
  {'Source': 'Metacrit

In [19]:
# function to make multiple calls to OMDB API by looping through list of movie titles
def omdb_calls():
    
    max_calls = 800
    counter = 0
    
    while counter <= max_calls:
        for movie in movies_search:
            try:
                if True:
                    url = 'https://www.omdbapi.com/'
                    params = {'t': movie,
                            'type': 'movie',
                             'apikey': access_token_omdb3}
                    # make api call to obtain data
                    response = requests.get(url, params=params)
                    counter+=1

                    # write information from api call response in json file as it comes in
                    with open('omdb_data.json', 'w') as f:
                        f.write(str(response.json()))
                        
                else:
                    print('Rate Limit Reached')
                    counter = 0
            except:
                pass
        
    return response.status_code

In [5]:
# function to make calls to The Movie DB API by looping through list of movie titles
def tmdb_calls():
    movie_info = []
    counter = 0

    for movie in movies_search:
        try:
            if counter <=40:
                URL = 'https://api.themoviedb.org/3/search/movie?api_key={}&query={}'.format(access_token_tmdb, movie)
                response = requests.get(URL)
                movie_info.append(response.json())
                counter += 1
            else:
                print("Rate Limit Reached")
                time.sleep(10)
                counter = 0
        except:
            pass
    return movie_info

In [6]:
# obtain list of data from TMDB API call response
movie_info = tmdb_calls()

Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached
Rate Limit Reached


### Work with API Response Data in Pandas DataFrame ###

In [145]:
# convert movie_info list to pandas dataframe
movies_df = pd.DataFrame(movie_info)

In [146]:
movies_df.head()

Unnamed: 0,page,results,status_code,status_message,total_pages,total_results
0,1.0,"[{'vote_count': 2438, 'id': 7443, 'video': Fal...",,,1.0,3.0
1,1.0,"[{'vote_count': 97, 'id': 10697, 'video': Fals...",,,1.0,1.0
2,1.0,"[{'vote_count': 6, 'id': 41276, 'video': False...",,,1.0,1.0
3,1.0,"[{'vote_count': 156, 'id': 25538, 'video': Fal...",,,38.0,749.0
4,1.0,"[{'vote_count': 136, 'id': 14295, 'video': Fal...",,,1.0,1.0


In [147]:
movies_df.tail()

Unnamed: 0,page,results,status_code,status_message,total_pages,total_results
1850,1.0,"[{'vote_count': 1855, 'id': 480530, 'video': F...",,,1.0,2.0
1851,1.0,"[{'vote_count': 5, 'id': 490780, 'video': Fals...",,,1.0,2.0
1852,1.0,"[{'vote_count': 4, 'id': 475220, 'video': Fals...",,,1.0,6.0
1853,1.0,"[{'vote_count': 267, 'id': 474395, 'video': Fa...",,,1.0,1.0
1854,1.0,"[{'vote_count': 17067, 'id': 118340, 'video': ...",,,2.0,35.0


In [148]:
movies_df.shape

(1855, 6)

In [149]:
# function to convert values to workable data
def de_string ():
    if not np.nan:
        df['results'].apply(lambda x : ast.literal_eval(x))
    else:
        pass
    return df['results']

In [150]:
# create a list of the values from the pandas series of results
mdf = movies_df['results'].get_values().tolist()

In [151]:
# loop through list to pull out dicitionaries into a new list
results1 = []
for i in range(len(mdf)):
    if type(mdf[i]) == list:
        for j in range(len(mdf[i])):
            results1.append(mdf[i][j])
    else:
        results1.append(mdf[i])

In [137]:
results1

[{'vote_count': 2438,
  'id': 7443,
  'video': False,
  'vote_average': 6.6,
  'title': 'Chicken Run',
  'popularity': 13.991,
  'poster_path': '/z0MafJgUnVyVbczicYMkPKKHkBi.jpg',
  'original_language': 'en',
  'original_title': 'Chicken Run',
  'genre_ids': [16, 35, 10751],
  'backdrop_path': '/b0Zlbk19oTLjKPvb807suPFKBtN.jpg',
  'adult': False,
  'overview': 'Having been hopelessly repressed and facing eventual certain death at the British chicken farm where they are held, Ginger the chicken along with the help of Rocky the American rooster decide to rebel and lead their fellow chickens in a great escape from the murderous farmers Mr. and Mrs. Tweedy and their farm of doom.',
  'release_date': '2000-06-21'},
 {'vote_count': 0,
  'id': 520758,
  'video': False,
  'vote_average': 0,
  'title': 'Chicken Run 2',
  'popularity': 1.462,
  'poster_path': '/wQJW29RIMFQFV46eTLLmTrorjDB.jpg',
  'original_language': 'en',
  'original_title': 'Chicken Run 2',
  'genre_ids': [16],
  'backdrop_pat

In [152]:
# create empty dataframe with column labels from the dictionary keys
movies_all = pd.DataFrame(columns=results1[0].keys())

In [153]:
# checking types of data in results1 - it's a mixed list of floats and dictionaries
types = []
for i in range(len(results1)):
    types.append(type(results1[i]))

In [154]:
# pull all float values into a list to review
floats = []
for result in results1:
    if type(result) == float:
        floats.append(result)
    else:
        pass

In [155]:
results2 = []
for result in results1:
    if type(result) == dict:
        results2.append(result)
    else:
        pass

In [142]:
results2

[{'vote_count': 2438,
  'id': 7443,
  'video': False,
  'vote_average': 6.6,
  'title': 'Chicken Run',
  'popularity': 13.991,
  'poster_path': '/z0MafJgUnVyVbczicYMkPKKHkBi.jpg',
  'original_language': 'en',
  'original_title': 'Chicken Run',
  'genre_ids': [16, 35, 10751],
  'backdrop_path': '/b0Zlbk19oTLjKPvb807suPFKBtN.jpg',
  'adult': False,
  'overview': 'Having been hopelessly repressed and facing eventual certain death at the British chicken farm where they are held, Ginger the chicken along with the help of Rocky the American rooster decide to rebel and lead their fellow chickens in a great escape from the murderous farmers Mr. and Mrs. Tweedy and their farm of doom.',
  'release_date': '2000-06-21'},
 {'vote_count': 0,
  'id': 520758,
  'video': False,
  'vote_average': 0,
  'title': 'Chicken Run 2',
  'popularity': 1.462,
  'poster_path': '/wQJW29RIMFQFV46eTLLmTrorjDB.jpg',
  'original_language': 'en',
  'original_title': 'Chicken Run 2',
  'genre_ids': [16],
  'backdrop_pat

In [156]:
# checking types of data in results2 
types1 = []
for i in range(len(results2)):
    types1.append(type(results2[i]))

In [157]:
types1

[dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,
 dict,

In [165]:
# create dataframe of API data
movies_all = pd.DataFrame(results2)

In [166]:
movies_all.head()

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,/b0Zlbk19oTLjKPvb807suPFKBtN.jpg,"[16, 35, 10751]",7443,en,Chicken Run,Having been hopelessly repressed and facing ev...,13.991,/z0MafJgUnVyVbczicYMkPKKHkBi.jpg,2000-06-21,Chicken Run,False,6.6,2438
1,False,,[16],520758,en,Chicken Run 2,"A sequel to the 2000 film, ""Chicken Run.""",1.462,/wQJW29RIMFQFV46eTLLmTrorjDB.jpg,2020-06-12,Chicken Run 2,False,0.0,0
2,False,,[],495607,en,Running Around Like A Chicken With Its Head Cu...,"Les Blank's first student film, made while at ...",0.6,/vNTe1R0zg9kEV8bBLEpdovb5Zvr.jpg,1960-01-01,Running Around Like A Chicken With Its Head Cu...,False,0.0,0
3,False,/evNvMSUYYjZBluXuZEV9JpqNUel.jpg,"[18, 35, 10749]",10697,fr,Le Goût des autres,Agnès Jaoui co-writes and directs this romanti...,5.907,/k9R6Omadvp57EoQ4x9nNneig1Nv.jpg,2000-03-01,The Taste of Others,False,6.7,97
4,False,/WeDJNcoIJDC6FvcC19dEHz09XR.jpg,"[99, 10751]",41276,en,The Life and Times of Hank Greenberg,The story of Baseball Hall-of-Famer Hank Green...,0.6,/q5L7XLJ4ylDAs0txMMREbwuoudD.jpg,2000-01-12,The Life and Times of Hank Greenberg,False,6.0,6


In [167]:
# remove unnecessary columns from dataframe
movies_all = movies_all.drop(['adult', 'backdrop_path', 'genre_ids', 'original_language', 'original_title', 'overview', 'poster_path', 'video'], axis=1)

In [168]:
# convert release date column into datetime and separate the year and month into their own columns
movies_all.release_date = pd.to_datetime(movies_all['release_date'])
movies_all['release_year'] = movies_all.release_date.dt.year
movies_all['release_month'] = movies_all.release_date.dt.month

### Move API DataFrame Into CSV File ###

In [169]:
# convert movies_all dataframe to csv
movies_all.to_csv('tmdb_data.csv')

In [170]:
movies_all.head()

Unnamed: 0,id,popularity,release_date,title,vote_average,vote_count,release_year,release_month
0,7443,13.991,2000-06-21,Chicken Run,6.6,2438,2000.0,6.0
1,520758,1.462,2020-06-12,Chicken Run 2,0.0,0,2020.0,6.0
2,495607,0.6,1960-01-01,Running Around Like A Chicken With Its Head Cu...,0.0,0,1960.0,1.0
3,10697,5.907,2000-03-01,The Taste of Others,6.7,97,2000.0,3.0
4,41276,0.6,2000-01-12,The Life and Times of Hank Greenberg,6.0,6,2000.0,1.0


### Merge API Data With Other Data ###

In [173]:
RT = pd.read_csv('RT_all.csv')

In [174]:
RT.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1202 entries, 0 to 1201
Data columns (total 21 columns):
Unnamed: 0              1202 non-null int64
Title                   1202 non-null object
Director                1202 non-null object
Writers                 0 non-null float64
Release Date            1202 non-null object
Revenue                 1202 non-null object
Actors                  0 non-null float64
Wrters                  1202 non-null object
Unnamed: 0.1            1099 non-null float64
Movie Title             1099 non-null object
Review                  1099 non-null object
Number of RT Reviews    1099 non-null float64
Year                    1099 non-null float64
movie_ID                717 non-null object
movie_title             717 non-null object
year                    717 non-null float64
genres                  717 non-null object
avg_rating              717 non-null float64
numVotes                717 non-null float64
directorID              717 non-null object

In [175]:
# remove duplicates based on title
RT = RT.drop_duplicates(subset=['Title'])

In [176]:
RT.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1202 entries, 0 to 1201
Data columns (total 21 columns):
Unnamed: 0              1202 non-null int64
Title                   1202 non-null object
Director                1202 non-null object
Writers                 0 non-null float64
Release Date            1202 non-null object
Revenue                 1202 non-null object
Actors                  0 non-null float64
Wrters                  1202 non-null object
Unnamed: 0.1            1099 non-null float64
Movie Title             1099 non-null object
Review                  1099 non-null object
Number of RT Reviews    1099 non-null float64
Year                    1099 non-null float64
movie_ID                717 non-null object
movie_title             717 non-null object
year                    717 non-null float64
genres                  717 non-null object
avg_rating              717 non-null float64
numVotes                717 non-null float64
directorID              717 non-null object

In [262]:
# merge dataframes
final_movies = pd.merge(RT, movies_all, left_on='Title', right_on='title', how='left', left_index=True)

In [263]:
final_movies.head()

Unnamed: 0.2,Unnamed: 0,Title,Director,Writers,Release Date,Revenue,Actors,Wrters,Unnamed: 0.1,Movie Title,...,directorID,director_name,id,popularity,release_date,title,vote_average,vote_count,release_year,release_month
0,0,Chicken Run,"['Peter Lord', 'Nick Park']",,"Jun 21, 2000","$106,604,314",,"['Jack Rosenthal', 'Karey Kirkpatrick']",0.0,Chicken Run,...,"nm0520485,nm0661910",,7443.0,13.991,2000-06-21,Chicken Run,6.6,2438.0,2000.0,6.0
3,2,The Taste of Others,['Agnès Jaoui'],,"Jan 1, 2000",\n\n 112 minute...,,"['Jean-Pierre Bacri', 'Agnès Jaoui']",,,...,,,10697.0,5.907,2000-03-01,The Taste of Others,6.7,97.0,2000.0,3.0
812,2,The Taste of Others,['Agnès Jaoui'],,"Jan 1, 2000",\n\n 112 minute...,,"['Jean-Pierre Bacri', 'Agnès Jaoui']",,,...,,,10697.0,5.907,2000-03-01,The Taste of Others,6.7,97.0,2000.0,3.0
4,3,The Life and Times of Hank Greenberg,['Aviva Kempner'],,"Jan 12, 2000",\n\n 89 minutes...,,['Aviva Kempner'],3.0,The Life and Times of Hank Greenberg,...,,,41276.0,0.6,2000-01-12,The Life and Times of Hank Greenberg,6.0,6.0,2000.0,1.0
5757,3,The Life and Times of Hank Greenberg,['Aviva Kempner'],,"Jan 12, 2000",\n\n 89 minutes...,,['Aviva Kempner'],3.0,The Life and Times of Hank Greenberg,...,,,41276.0,0.6,2000-01-12,The Life and Times of Hank Greenberg,6.0,6.0,2000.0,1.0


In [264]:
final_movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1982 entries, 0 to 10928
Data columns (total 29 columns):
Unnamed: 0              1982 non-null int64
Title                   1982 non-null object
Director                1982 non-null object
Writers                 0 non-null float64
Release Date            1982 non-null object
Revenue                 1982 non-null object
Actors                  0 non-null float64
Wrters                  1982 non-null object
Unnamed: 0.1            1822 non-null float64
Movie Title             1822 non-null object
Review                  1822 non-null object
Number of RT Reviews    1822 non-null float64
Year                    1822 non-null float64
movie_ID                1272 non-null object
movie_title             1272 non-null object
year                    1272 non-null float64
genres                  1272 non-null object
avg_rating              1272 non-null float64
numVotes                1272 non-null float64
directorID              1272 non-nul

In [265]:
# drop unnecessary columns
final_movies = final_movies.drop(['Unnamed: 0', 'Writers', 'Actors', 'Unnamed: 0.1', 'Movie Title', 'movie_ID', 'movie_title', 'title', 'year', 'directorID', 'director_name', 'id', 'release_date', 'release_year'], axis=1)

In [266]:
final_movies.head()

Unnamed: 0,Title,Director,Release Date,Revenue,Wrters,Review,Number of RT Reviews,Year,genres,avg_rating,numVotes,popularity,vote_average,vote_count,release_month
0,Chicken Run,"['Peter Lord', 'Nick Park']","Jun 21, 2000","$106,604,314","['Jack Rosenthal', 'Karey Kirkpatrick']",97%,171.0,2000.0,"Adventure,Animation,Comedy",7.0,162266.0,13.991,6.6,2438.0,6.0
3,The Taste of Others,['Agnès Jaoui'],"Jan 1, 2000",\n\n 112 minute...,"['Jean-Pierre Bacri', 'Agnès Jaoui']",,,,,,,5.907,6.7,97.0,3.0
812,The Taste of Others,['Agnès Jaoui'],"Jan 1, 2000",\n\n 112 minute...,"['Jean-Pierre Bacri', 'Agnès Jaoui']",,,,,,,5.907,6.7,97.0,3.0
4,The Life and Times of Hank Greenberg,['Aviva Kempner'],"Jan 12, 2000",\n\n 89 minutes...,['Aviva Kempner'],97%,65.0,2000.0,,,,0.6,6.0,6.0,1.0
5757,The Life and Times of Hank Greenberg,['Aviva Kempner'],"Jan 12, 2000",\n\n 89 minutes...,['Aviva Kempner'],97%,65.0,2000.0,,,,0.6,6.0,6.0,1.0


In [267]:
final_movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1982 entries, 0 to 10928
Data columns (total 15 columns):
Title                   1982 non-null object
Director                1982 non-null object
Release Date            1982 non-null object
Revenue                 1982 non-null object
Wrters                  1982 non-null object
Review                  1822 non-null object
Number of RT Reviews    1822 non-null float64
Year                    1822 non-null float64
genres                  1272 non-null object
avg_rating              1272 non-null float64
numVotes                1272 non-null float64
popularity              1830 non-null float64
vote_average            1830 non-null float64
vote_count              1830 non-null float64
release_month           1802 non-null float64
dtypes: float64(8), object(7)
memory usage: 247.8+ KB


In [268]:
# convert all values in dataframe from json strings
if not np.nan:
    final_moviesl.applymap(lambda x : ast.literal_eval(x))
else:
    pass

In [269]:
# remove duplicates based on title
final_movies = final_movies.drop_duplicates(subset=['Title'])

In [270]:
final_movies.head()

Unnamed: 0,Title,Director,Release Date,Revenue,Wrters,Review,Number of RT Reviews,Year,genres,avg_rating,numVotes,popularity,vote_average,vote_count,release_month
0,Chicken Run,"['Peter Lord', 'Nick Park']","Jun 21, 2000","$106,604,314","['Jack Rosenthal', 'Karey Kirkpatrick']",97%,171.0,2000.0,"Adventure,Animation,Comedy",7.0,162266.0,13.991,6.6,2438.0,6.0
3,The Taste of Others,['Agnès Jaoui'],"Jan 1, 2000",\n\n 112 minute...,"['Jean-Pierre Bacri', 'Agnès Jaoui']",,,,,,,5.907,6.7,97.0,3.0
4,The Life and Times of Hank Greenberg,['Aviva Kempner'],"Jan 12, 2000",\n\n 89 minutes...,['Aviva Kempner'],97%,65.0,2000.0,,,,0.6,6.0,6.0,1.0
5,Yi Yi,['Edward Yang'],"Oct 6, 2000",\n\n 173 minute...,['Edward Yang'],96%,83.0,2000.0,"Drama,Romance",8.1,16786.0,6.59,8.0,156.0,9.0
10956,You Can Count On Me,['Kenneth Lonergan'],"Nov 10, 2000","$8,409,329",['Kenneth Lonergan'],95%,103.0,2000.0,,,,,,,


In [271]:
final_movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1202 entries, 0 to 10928
Data columns (total 15 columns):
Title                   1202 non-null object
Director                1202 non-null object
Release Date            1202 non-null object
Revenue                 1202 non-null object
Wrters                  1202 non-null object
Review                  1099 non-null object
Number of RT Reviews    1099 non-null float64
Year                    1099 non-null float64
genres                  717 non-null object
avg_rating              717 non-null float64
numVotes                717 non-null float64
popularity              1050 non-null float64
vote_average            1050 non-null float64
vote_count              1050 non-null float64
release_month           1048 non-null float64
dtypes: float64(8), object(7)
memory usage: 150.2+ KB


In [272]:
final_movies = final_movies.rename({'Wrters':'Writers', 'Review':'Rotten Tomatoes Review', 'genres':'Genres', 'avg_rating': 'Average IMDB Rating', 'numVotes': 'Number of IMDB Votes', 'popularity': 'Popularity', 'vote_average':'Average TMDB Rating', 'vote_count':'Number of TMDB Votes', 'release_month':'Release Month'}, axis=1)

In [273]:
final_movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1202 entries, 0 to 10928
Data columns (total 15 columns):
Title                     1202 non-null object
Director                  1202 non-null object
Release Date              1202 non-null object
Revenue                   1202 non-null object
Writers                   1202 non-null object
Rotten Tomatoes Review    1099 non-null object
Number of RT Reviews      1099 non-null float64
Year                      1099 non-null float64
Genres                    717 non-null object
Average IMDB Rating       717 non-null float64
Number of IMDB Votes      717 non-null float64
Popularity                1050 non-null float64
Average TMDB Rating       1050 non-null float64
Number of TMDB Votes      1050 non-null float64
Release Month             1048 non-null float64
dtypes: float64(8), object(7)
memory usage: 150.2+ KB


In [274]:
# change dtypes of columns
final_movies['Number of RT Reviews'] = final_movies['Number of RT Reviews'].astype('int64', errors='ignore')
final_movies['Year'] = final_movies['Year'].astype('int64', errors='ignore')
final_movies['Number of TMDB Votes'] = final_movies['Number of TMDB Votes'].astype('int64', errors='ignore')
final_movies['Release Month'] = final_movies['Release Month'].astype('int64', errors='ignore')
final_movies['Number of IMDB Votes'] = final_movies['Number of IMDB Votes'].astype('int64', errors='ignore')

In [275]:
# move final overall merged movies dataframe to csv file
final_movies.to_csv('movies_final.csv')

### Move Data into SQL Database ###

In [276]:
# convert dataframe into sql database
connection = sqlite3.connect('movies_final.db')
cursor = connection.cursor()

In [277]:
# move pandas dataframe of overall merged movies data to sql database
final_movies.to_sql('movies_final', connection)

In [278]:
cursor.execute('''SELECT Title FROM movies_final;''').fetchall()

[('Chicken Run',),
 ('The Taste of Others',),
 ('The Life and Times of Hank Greenberg',),
 ('Yi Yi',),
 ('You Can Count On Me',),
 ('Memento',),
 ('Best in Show',),
 ('Croupier',),
 ('Dark Days',),
 ('Not One Less',),
 ('The Filth and the Fury',),
 ('Almost Famous',),
 ('Cast Away',),
 ('Sexy Beast',),
 ('Girlfight',),
 ('State and Main',),
 ('Meet the Parents',),
 ('Erin Brockovich',),
 ('Billy Elliot',),
 ('Nico and Dani',),
 ('Battle Royale',),
 ('Nurse Betty',),
 ('Thirteen Days',),
 ('Chunhyangdyun',),
 ('Shadow of the Vampire',),
 ('Wonder Boys',),
 ('George Washington',),
 ('Merci pour le chocolat',),
 ('Love and Basketball',),
 ('Pollock',),
 ('Fantasia 2000',),
 ('Lakeboat',),
 ('Requiem for a Dream',),
 ('Shanghai Noon',),
 ('The Vertical Ray of the Sun',),
 ('Gladiator',),
 ('Space Cowboys',),
 ('Quills',),
 ('Keep the River on Your Right',),
 ('Finding Forrester',),
 ('Remember the Titans',),
 ('Tigerland',),
 ('Sunshine',),
 ('My Dog Skip',),
 ('The Big Kahuna',),
 ('Code 

In [279]:
cursor.close()
connection.close()