In [1]:
# Importing essential tools
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load movie dataset
movies = pd.read_csv("movies_meta_data.csv")

In [3]:
# Check how many rows and columns 
movies.shape

(10449, 21)

In [4]:
movies['Trailer Link'].isna().sum()

3270

In [5]:
movies.dropna(subset=['Trailer Link'], inplace=True)

In [6]:
movies.shape

(7179, 21)

In [7]:
# Check null values
movies.isna().sum()

Title                   0
Id                      0
Trailer Link            0
Director               48
Cast                    0
genre_ids            7179
Genre                  68
Budget                  0
Revenue                 0
Overview                5
Homepage             3893
Year                    1
Runtime                 0
Popularity              0
Adult                   0
Release_Date            1
Original_Title          0
Original_Language       0
Tagline              1785
Vote_Average            0
Vote_Count              0
dtype: int64

In [8]:
# Select a subset of columns from the 'movies' DataFrame
movies = movies[['Id','Title','Director','Cast','Genre','Overview']]

In [9]:
# Check null values
movies.isna().sum()

Id           0
Title        0
Director    48
Cast         0
Genre       68
Overview     5
dtype: int64

In [10]:
# Check number of duplicated entries in the 'Title' column 
movies.duplicated(subset=['Id']).sum()

1588

In [12]:
# Drops the duplicated rows in the 'Title' column and modifies the 'movies' dataframe in place.
movies.drop_duplicates(subset=['Id'], inplace=True)

In [13]:
movies.shape

(5591, 6)

In [14]:
# Check null values
movies.isna().sum()

Id           0
Title        0
Director    45
Cast         0
Genre       66
Overview     5
dtype: int64

In [15]:
# Reset index
movies = movies.reset_index(drop=True)

### Collecting Reviews of 5591 movies

In [50]:
from tmdbv3api import TMDb
from tmdbv3api import Movie
import requests

# initialize TMDb object
tmdb = TMDb()

# enter your API key here
tmdb.api_key = 'db47f4d8f92f5ca02c7840f594a293ad'

# initialize Movie object
tmdb_movie = Movie()
def get_movie_info(title):
    search_response = tmdb_movie.search(title)
    if not search_response:
        return None
    movie_id = search_response[0].id
    movie = tmdb_movie.details(movie_id)
    # retrieve up to 10 pages of reviews
    page = 1
    reviews = []
    while True:
        response = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}/reviews?api_key={tmdb.api_key}&language=en-US&page={page}')
        results = response.json()['results']
        if not results:
            break
        reviews.extend([review['content'] for review in results])
        if page >= 6:
            break
        else:
            page += 1
            
    return { 'Id': movie.id, 'Reviews': ' '.join(reviews) if reviews else ''}


# create list of movie titles
Titles = []
movie_names = movies['Title'].map(lambda x: Titles.append(x))
Titles = Titles[2500:3000]


# retrieve data for each movie and concatenate into a single DataFrame
data_frames = [pd.DataFrame([get_movie_info(title)]) for title in Titles]

In [51]:
df6 = pd.concat(data_frames, ignore_index=True)

In [54]:
df = pd.concat([df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11],ignore_index=True)

In [80]:
df

Unnamed: 0,Id,Reviews,0
0,391629.0,,
1,25918.0,,
2,1104040.0,,
3,157800.0,,
4,60579.0,,
...,...,...,...
5586,41515.0,,
5587,45324.0,"""The great American past-time has never been b...",
5588,27585.0,"**A frankly well-made film, but very painful t...",
5589,39451.0,,


In [81]:
movies

Unnamed: 0,Id,Title,Director,Cast,Genre,Overview
0,391629,Baaghi,Sabbir Khan,"['Tiger Shroff', 'Shraddha Kapoor', 'Sunil Gro...","Action, Thriller, Romance","Ronny is a rebellious man, who falls in love w..."
1,25918,Champion,Mark Robson,"['Kirk Douglas', 'Marilyn Maxwell', 'Arthur Ke...",Drama,An unscrupulous boxer fights his way to the to...
2,1104040,Gangs of Lagos,Jadesola Osiberu,"['Demi Banwo', 'Adesua Etomi-Wellington', 'Tob...",Crime,A group of friends who each have to navigate t...
3,157800,Har Dil Jo Pyar Karega,Raj Kanwar,"['Salman Khan', 'Rani Mukerji', 'Preity Zinta'...","Comedy, Drama",Raj is a struggling singer chasing his dreams ...
4,60579,Hey Ram,Kamal Haasan,"['Kamal Haasan', 'Shah Rukh Khan', 'Hema Malin...","History, Drama, Crime",Saketh Ram's wife is raped and killed during d...
...,...,...,...,...,...,...
5586,560204,Arkansas,Clark Duke,"['Liam Hemsworth', 'Clark Duke', 'Vince Vaughn...","Crime, Thriller",Kyle and Swin live by the orders of an Arkansa...
5587,19053,Valley Girl,Martha Coolidge,"['Nicolas Cage', 'Deborah Foreman', 'Elizabeth...","Comedy, Romance","Julie, a girl from the valley, meets Randy, a ..."
5588,429422,Capone,Josh Trank,"['Tom Hardy', 'Linda Cardellini', 'Matt Dillon...","Crime, Drama","The 47-year old Al Capone, after 10 years in p..."
5589,582596,The Wrong Missy,Tyler Spindel,"['David Spade', 'Lauren Lapkus', 'Candace Smit...","Comedy, Romance",A guy meets the woman of his dreams and invite...


### Concat reviews in movies dataset

In [82]:
result = pd.merge(movies, df, on='Id', how='inner')

In [83]:
result

Unnamed: 0,Id,Title,Director,Cast,Genre,Overview,Reviews,0
0,391629,Baaghi,Sabbir Khan,"['Tiger Shroff', 'Shraddha Kapoor', 'Sunil Gro...","Action, Thriller, Romance","Ronny is a rebellious man, who falls in love w...",,
1,25918,Champion,Mark Robson,"['Kirk Douglas', 'Marilyn Maxwell', 'Arthur Ke...",Drama,An unscrupulous boxer fights his way to the to...,,
2,1104040,Gangs of Lagos,Jadesola Osiberu,"['Demi Banwo', 'Adesua Etomi-Wellington', 'Tob...",Crime,A group of friends who each have to navigate t...,,
3,157800,Har Dil Jo Pyar Karega,Raj Kanwar,"['Salman Khan', 'Rani Mukerji', 'Preity Zinta'...","Comedy, Drama",Raj is a struggling singer chasing his dreams ...,,
4,60579,Hey Ram,Kamal Haasan,"['Kamal Haasan', 'Shah Rukh Khan', 'Hema Malin...","History, Drama, Crime",Saketh Ram's wife is raped and killed during d...,,
...,...,...,...,...,...,...,...,...
5553,560204,Arkansas,Clark Duke,"['Liam Hemsworth', 'Clark Duke', 'Vince Vaughn...","Crime, Thriller",Kyle and Swin live by the orders of an Arkansa...,A good but not great film that could've been b...,
5554,19053,Valley Girl,Martha Coolidge,"['Nicolas Cage', 'Deborah Foreman', 'Elizabeth...","Comedy, Romance","Julie, a girl from the valley, meets Randy, a ...",,
5555,429422,Capone,Josh Trank,"['Tom Hardy', 'Linda Cardellini', 'Matt Dillon...","Crime, Drama","The 47-year old Al Capone, after 10 years in p...",<em>'Capone'</em> disappoints.\r\n\r\nIt's not...,
5556,582596,The Wrong Missy,Tyler Spindel,"['David Spade', 'Lauren Lapkus', 'Candace Smit...","Comedy, Romance",A guy meets the woman of his dreams and invite...,,


#### Store data in csv as filter_data

In [85]:
result.to_csv("filter_data.csv",index=False)