### Import Required Libraries and Set Up Environment Variables

In [1]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [2]:
# Set environment variables from the .env in the local environment
load_dotenv("mykeys.env")

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [123]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
query_url = f'{url}fq={filter_query}&begin_date={begin_date}&end_date={end_date}&fl={field_list}&api-key={nyt_api_key}'


In [6]:
# Create an empty list to store the reviews
reviews_list = []

# loop through pages 0-19
for i in range(20):
    # create query with a page number
    # API results show 10 articles at a time
    query_url_page = query_url + f'&page={i}'

    # Make a "GET" request and retrieve the JSON
    reviews = requests.get(query_url_page).json()
    
    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)
    
    # Try and save the reviews to the reviews_list
    try:
        # loop through the reviews["response"]["docs"] and append each review to the list
        reviews_list.append(reviews["response"]["docs"])
        
        # Print the page that was just retrieved
        print(f"Checked page {i}")

        # Print the page number that had no results then break from the loop
    except:
        print(f"page {i} had no responses")


Checked page 0
Checked page 1
Checked page 2
Checked page 3
Checked page 4
Checked page 5
Checked page 6
Checked page 7
Checked page 8
Checked page 9
Checked page 10
Checked page 11
Checked page 12
Checked page 13
Checked page 14
Checked page 15
Checked page 16
Checked page 17
Checked page 18
Checked page 19


In [7]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(reviews_list,indent=4))

[
    [
        {
            "web_url": "https://www.nytimes.com/2018/01/18/movies/kangaroo-a-love-hate-story-review.html",
            "snippet": "The documentary looks at the mass killings of kangaroos for pet-food companies, leather processors and ranchers in Australia.",
            "source": "The New York Times",
            "headline": {
                "main": "Review: \u2018Kangaroo: A Love-Hate Story\u2019 Exposes a Wildlife Massacre",
                "kicker": null,
                "content_kicker": null,
                "print_headline": "Kangaroo: A Love-Hate Story",
                "name": null,
                "seo": null,
                "sub": null
            },
            "keywords": [
                {
                    "name": "creative_works",
                    "value": "Kangaroo: A Love-Hate Story (Movie)",
                    "rank": 1,
                    "major": "N"
                },
                {
                    "name": "subject",
             

In [8]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df= pd.json_normalize([item for sublist in reviews_list for item in sublist])
reviews_df

# # alternate method
# flatten = []
# for sublist in reviews_list:
#     for item in sublist:
#         flatten.append(item)
    

# reviews_df = pd.DataFrame(pd.json_normalize(flatten))
# reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2018/01/18/movies/kang...,The documentary looks at the mass killings of ...,The New York Times,"[{'name': 'creative_works', 'value': 'Kangaroo...",2018-01-18T12:00:23+0000,263,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",
1,https://www.nytimes.com/2013/09/20/movies/jewt...,"In “Jewtopia,” a young man asks a childhood fr...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2013-09-19T23:33:15+0000,272,Love’s Eternal Masquerade,Movie Review,,Jewtopia,,,,By David DeWitt,"[{'firstname': 'David', 'middlename': None, 'l...",
2,https://www.nytimes.com/2019/05/21/movies/the-...,Blythe Danner and John Lithgow strain to eleva...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-05-21T11:00:01+0000,280,‘The Tomorrow Man’ Review: Love Among the Neur...,,,"They Whirl, They Twirl, They Tango",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
3,https://www.nytimes.com/2019/11/05/movies/marr...,Adam Driver and Scarlett Johansson self-consci...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-11-05T19:01:02+0000,1244,‘Marriage Story’ Review: Dance Me to the End o...,critic’s pick,,"Friendly Split, Shattering Break",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
4,https://www.nytimes.com/2022/11/17/movies/bone...,Luca Guadagnino’s latest stars Timothée Chalam...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-17T12:00:06+0000,710,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2016/12/27/movies/ocea...,In a 1993 gem from this revered Japanese anima...,The New York Times,"[{'name': 'creative_works', 'value': 'Ocean Wa...",2016-12-27T21:08:58+0000,263,"Review: ‘Ocean Waves,’ a Tale of Young Love, G...",,,"Teenage Romance, Flashbacks and Style",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
196,https://www.nytimes.com/2022/05/26/movies/dinn...,A rage-fueled rock singer on the run from the ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-05-26T11:00:04+0000,254,‘Dinner in America’ Review: A Punk-Rock Love S...,,,Dinner in America,,,,By Concepción de León,"[{'firstname': 'Concepción', 'middlename': Non...",
197,https://www.nytimes.com/2015/04/10/movies/revi...,"In this film, an orphaned girl’s friendship wi...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2015-04-09T23:37:10+0000,491,"Review: In ‘The Harvest,’ Love Is Wielded With...",,,"Love, Wielded With a Chill",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
198,https://www.nytimes.com/2022/11/17/movies/the-...,Elegance Bratton’s autobiographical first feat...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-17T12:00:07+0000,836,"‘The Inspection’ Review: Boot Camp, a Love Story",critic’s pick,,"A Few Good Men, Some With Secrets",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",


In [10]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early


# define function to identify text between unicode characters \u2018 and \u2019
def extract_text(text):
    start = text.find('\u2018')
    # look for endings with just unicode character or unicode character + Review.  Use one with review unless not present.
    end1 = text.find('\u2019', start)
    end2 = text.find('\u2019 Review', start)
    
    if end2 > end1:
        end = end2
    else:
        end = end1
    #print(start,end1, end2)
    if start != -1 and end != -1:
        return text[start + 1:end]
    # if nothing found between characters, then return original text
    return text


reviews_df['title']  = reviews_df['headline.main'].apply(extract_text)
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2018/01/18/movies/kang...,The documentary looks at the mass killings of ...,The New York Times,"[{'name': 'creative_works', 'value': 'Kangaroo...",2018-01-18T12:00:23+0000,263,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,Kangaroo: A Love-Hate Story
1,https://www.nytimes.com/2013/09/20/movies/jewt...,"In “Jewtopia,” a young man asks a childhood fr...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2013-09-19T23:33:15+0000,272,Love’s Eternal Masquerade,Movie Review,,Jewtopia,,,,By David DeWitt,"[{'firstname': 'David', 'middlename': None, 'l...",,Love’s Eternal Masquerade
2,https://www.nytimes.com/2019/05/21/movies/the-...,Blythe Danner and John Lithgow strain to eleva...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-05-21T11:00:01+0000,280,‘The Tomorrow Man’ Review: Love Among the Neur...,,,"They Whirl, They Twirl, They Tango",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Tomorrow Man
3,https://www.nytimes.com/2019/11/05/movies/marr...,Adam Driver and Scarlett Johansson self-consci...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-11-05T19:01:02+0000,1244,‘Marriage Story’ Review: Dance Me to the End o...,critic’s pick,,"Friendly Split, Shattering Break",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,Marriage Story
4,https://www.nytimes.com/2022/11/17/movies/bone...,Luca Guadagnino’s latest stars Timothée Chalam...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-17T12:00:06+0000,710,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,Bones and All
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2016/12/27/movies/ocea...,In a 1993 gem from this revered Japanese anima...,The New York Times,"[{'name': 'creative_works', 'value': 'Ocean Wa...",2016-12-27T21:08:58+0000,263,"Review: ‘Ocean Waves,’ a Tale of Young Love, G...",,,"Teenage Romance, Flashbacks and Style",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",,"Ocean Waves,"
196,https://www.nytimes.com/2022/05/26/movies/dinn...,A rage-fueled rock singer on the run from the ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-05-26T11:00:04+0000,254,‘Dinner in America’ Review: A Punk-Rock Love S...,,,Dinner in America,,,,By Concepción de León,"[{'firstname': 'Concepción', 'middlename': Non...",,Dinner in America
197,https://www.nytimes.com/2015/04/10/movies/revi...,"In this film, an orphaned girl’s friendship wi...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2015-04-09T23:37:10+0000,491,"Review: In ‘The Harvest,’ Love Is Wielded With...",,,"Love, Wielded With a Chill",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,"The Harvest,"
198,https://www.nytimes.com/2022/11/17/movies/the-...,Elegance Bratton’s autobiographical first feat...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-11-17T12:00:07+0000,836,"‘The Inspection’ Review: Boot Camp, a Love Story",critic’s pick,,"A Few Good Men, Some With Secrets",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,The Inspection


In [12]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_df['keywords'] = reviews_df['keywords'].apply(extract_keywords)
reviews_df


Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2018/01/18/movies/kang...,The documentary looks at the mass killings of ...,The New York Times,creative_works: Kangaroo: A Love-Hate Story (M...,2018-01-18T12:00:23+0000,263,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,Kangaroo: A Love-Hate Story
1,https://www.nytimes.com/2013/09/20/movies/jewt...,"In “Jewtopia,” a young man asks a childhood fr...",The New York Times,"subject: Movies;persons: Sergei, Ivan;creative...",2013-09-19T23:33:15+0000,272,Love’s Eternal Masquerade,Movie Review,,Jewtopia,,,,By David DeWitt,"[{'firstname': 'David', 'middlename': None, 'l...",,Love’s Eternal Masquerade
2,https://www.nytimes.com/2019/05/21/movies/the-...,Blythe Danner and John Lithgow strain to eleva...,The New York Times,subject: Movies;creative_works: The Tomorrow M...,2019-05-21T11:00:01+0000,280,‘The Tomorrow Man’ Review: Love Among the Neur...,,,"They Whirl, They Twirl, They Tango",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Tomorrow Man
3,https://www.nytimes.com/2019/11/05/movies/marr...,Adam Driver and Scarlett Johansson self-consci...,The New York Times,"subject: Movies;persons: Baumbach, Noah;creati...",2019-11-05T19:01:02+0000,1244,‘Marriage Story’ Review: Dance Me to the End o...,critic’s pick,,"Friendly Split, Shattering Break",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,Marriage Story
4,https://www.nytimes.com/2022/11/17/movies/bone...,Luca Guadagnino’s latest stars Timothée Chalam...,The New York Times,subject: Movies;creative_works: Bones and All ...,2022-11-17T12:00:06+0000,710,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,Bones and All
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2016/12/27/movies/ocea...,In a 1993 gem from this revered Japanese anima...,The New York Times,creative_works: Ocean Waves (Movie);subject: A...,2016-12-27T21:08:58+0000,263,"Review: ‘Ocean Waves,’ a Tale of Young Love, G...",,,"Teenage Romance, Flashbacks and Style",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",,"Ocean Waves,"
196,https://www.nytimes.com/2022/05/26/movies/dinn...,A rage-fueled rock singer on the run from the ...,The New York Times,"subject: Movies;persons: Rehmeier, Adam;creati...",2022-05-26T11:00:04+0000,254,‘Dinner in America’ Review: A Punk-Rock Love S...,,,Dinner in America,,,,By Concepción de León,"[{'firstname': 'Concepción', 'middlename': Non...",,Dinner in America
197,https://www.nytimes.com/2015/04/10/movies/revi...,"In this film, an orphaned girl’s friendship wi...",The New York Times,"subject: Movies;persons: McNaughton, John;pers...",2015-04-09T23:37:10+0000,491,"Review: In ‘The Harvest,’ Love Is Wielded With...",,,"Love, Wielded With a Chill",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,"The Harvest,"
198,https://www.nytimes.com/2022/11/17/movies/the-...,Elegance Bratton’s autobiographical first feat...,The New York Times,subject: Movies;creative_works: The Inspection...,2022-11-17T12:00:07+0000,836,"‘The Inspection’ Review: Boot Camp, a Love Story",critic’s pick,,"A Few Good Men, Some With Secrets",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",,The Inspection


In [17]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
titles = reviews_df['title'].to_list()
titles

#This function remove commas in the title, only if they are in the last postion
def remove_trailing_comma(text):
    if text.endswith(','):
        return text.rstrip(',')
    return text
titles_cleaned = []
for text in titles:
    titles_cleaned.append(remove_trailing_comma(text))

titles_cleaned


['Kangaroo: A Love-Hate Story',
 'Love’s Eternal Masquerade',
 'The Tomorrow Man',
 'Marriage Story',
 'Bones and All',
 'Love, Antosha',
 'You Can Live Forever',
 'Prem Ratan Dhan Payo',
 'A Promoter Finds a New Cause: God',
 'Now They’ll Love Me, a Twin Sister Schemes',
 'They',
 'A Journal for Jordan',
 'A United Kingdom',
 'Sophie and the Rising Sun',
 'Passengers',
 'Undine',
 'Waiting for Bojangles',
 'Love and Unhappiness, in Soft Shades of Gray',
 'Lost in Paris',
 'Lost and Love',
 'From This Day Forward',
 'How He Fell in Love',
 'Young Love, Interrupted by a Nuclear Bomb',
 'Love the Coopers',
 'Ma Ma',
 'The Shape of Water',
 'Love After Love',
 'Love & Mercy',
 'Women Who Kill',
 'He’s in Love, but No Closer to Figuring It Out',
 'Hard Luck Love Song',
 'See You Then',
 'Kalank',
 'Z for Zachariah',
 'Your Name Engraved Herein',
 'Youth',
 'Tu Me Manques',
 'Together',
 'Alcarràs',
 'Queen & Slim',
 'Falling Inn Love',
 'Tamasha',
 'The Sun Is Also a Star',
 'Cathedrals of

In [83]:
#update title with titles that have trailing comma removed
reviews_df['title'] = titles_cleaned


### Access The Movie Database API

In [31]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

In [122]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple
# of 50 requests
request_counter = 1

# Loop through the titles
for title_text in titles_cleaned:
    # Check if we need to sleep before making a request
    if (request_counter % 50) == 0 :
        time.sleep(1)

    # Add 1 to the request counter
    request_counter = +1
    
    # Perform a "GET" request for The Movie Database
    url_string = url+title_text+tmdb_key_string
    response = requests.get(url_string).json()
    #print(json.dumps(response,indent=4))
    

    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
    try: 
        # Get movie id
        movie_id = response["results"][0]["id"]
        #print (movie_id)

        # Make a request for a the full movie details
        url_id = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}"

        # Execute "GET" request with url
        movie_details = requests.get(url_id).json()
        #print (json.dumps(movie_details,indent=4))
        # Extract the genre names into a list
        genre_names = [movie_details["genres"][x]["name"] for x in range(len(movie_details["genres"]))]
        #print(genre_names)

        # Extract the spoken_languages' English name into a list
        spoken_languages = [movie_details["spoken_languages"][x]["name"] for x in range(len(movie_details["spoken_languages"]))]
        #print(spoken_languages)
        # Extract the production_countries' name into a list
        production_countries = [movie_details["production_countries"][x]["name"] for x in range(len(movie_details["production_countries"]))]
        
        # Add the relevant data to a dictionary and
        # append it to the tmdb_movies_list list
        movie_info = {
            'title' : title_text,
            'original_title' : movie_details['original_title'],
            'budget' : movie_details['budget'],
            'original_language' : movie_details['original_language'],
            'homepage' : movie_details['homepage'],
            'overview' : movie_details['overview'],
            'popularity' : movie_details['popularity'],
            'runtime' : movie_details['runtime'],
            'revenue' : movie_details['revenue'],
            'release_date' : movie_details['release_date'],
            'vote_average' : movie_details['vote_average'],
            'vote_count' : movie_details['vote_count'],
            'genres' : genre_names,
            'spoken_languages' : spoken_languages, 
            'production_countries' : production_countries
        }
        
        tmdb_movies_list.append(movie_info)
        print(f"Found {title_text}")

        # Print out the title that was found
    except:
        print(f"Not found {title_text}")

Found Kangaroo: A Love-Hate Story
Not found Love’s Eternal Masquerade
Found The Tomorrow Man
Found Marriage Story
Found Bones and All
Found Love, Antosha
Found You Can Live Forever
Found Prem Ratan Dhan Payo
Not found A Promoter Finds a New Cause: God
Not found Now They’ll Love Me, a Twin Sister Schemes
Found They
Found A Journal for Jordan
Found A United Kingdom
Found Sophie and the Rising Sun
Found Passengers
Found Undine
Found Waiting for Bojangles
Not found Love and Unhappiness, in Soft Shades of Gray
Found Lost in Paris
Found Lost and Love
Found From This Day Forward
Found How He Fell in Love
Not found Young Love, Interrupted by a Nuclear Bomb
Found Love the Coopers
Found Ma Ma
Found The Shape of Water
Found Love After Love
Found Love & Mercy
Found Women Who Kill
Not found He’s in Love, but No Closer to Figuring It Out
Found Hard Luck Love Song
Found See You Then
Found Kalank
Found Z for Zachariah
Found Your Name Engraved Herein
Found Youth
Found Tu Me Manques
Found Together
Found

In [124]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(tmdb_movies_list[:5],indent=4))


[
    {
        "title": "Kangaroo: A Love-Hate Story",
        "original_title": "Kangaroo: A Love-Hate Story",
        "budget": 0,
        "original_language": "en",
        "homepage": "http://KangarooTheMovie.com",
        "overview": "This groundbreaking film reveals the truth surrounding Australia\u2019s love-hate relationship with its beloved icon. The kangaroo image is proudly used by top companies, sports teams and as tourist souvenirs, yet when they hop across the vast continent some consider them to be pests to be shot and sold for profit. KANGAROO unpacks a national paradigm where the relationship with kangaroos is examined.",
        "popularity": 1.574,
        "runtime": 103,
        "revenue": 0,
        "release_date": "2018-01-19",
        "vote_average": 8.7,
        "vote_count": 3,
        "genres": [
            "Documentary"
        ],
        "spoken_languages": [
            "English"
        ],
        "production_countries": [
            "Australia"
       

In [125]:
# Convert the results to a DataFrame
tmdb_movies_list_df = pd.DataFrame(tmdb_movies_list)
tmdb_movies_list_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,Kangaroo: A Love-Hate Story,Kangaroo: A Love-Hate Story,0,en,http://KangarooTheMovie.com,This groundbreaking film reveals the truth sur...,1.574,103,0,2018-01-19,8.700,3,[Documentary],[English],[Australia]
1,The Tomorrow Man,The Tomorrow Man,0,en,https://bleeckerstreetmedia.com/thetomorrowman,Ed Hemsler spends his life preparing for a dis...,9.399,94,0,2019-05-22,5.746,59,"[Drama, Romance]",[English],[United States of America]
2,Marriage Story,Marriage Story,18000000,en,https://www.marriagestorymovie.com,A stage director and an actress struggle throu...,25.751,137,2300000,2019-09-28,7.744,6764,[Drama],[English],[United Kingdom]
3,Bones and All,Bones and All,16000000,it,https://www.bonesandallfilm.net/,"Abandoned by her father, a young woman embarks...",42.212,131,15234907,2022-11-18,7.100,1254,"[Drama, Horror, Romance]",[English],"[Italy, United States of America]"
4,"Love, Antosha","Love, Antosha",0,en,https://antonyelchindoc.com/,"From a prolific career in film and television,...",9.253,92,0,2019-08-02,7.200,50,[Documentary],"[English, Pусский]",[United States of America]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,Loving,Loving,9000000,en,http://focusfeatures.com/loving,"The story of Richard and Mildred Loving, an in...",36.191,123,8996802,2016-11-04,6.698,892,"[Drama, Romance]",[English],"[United Kingdom, United States of America]"
152,Ocean Waves,海がきこえる,0,ja,,"At Kichijōji Station, Tokyo, Taku Morisaki gli...",31.779,72,0,1994-10-07,6.400,828,"[Animation, Drama, Romance]",[日本語],[Japan]
153,Dinner in America,Dinner in America,0,en,https://www.dinnerinamerica.com,An on-the-lam punk rocker and a young woman ob...,11.671,106,0,2022-05-27,7.384,56,"[Romance, Comedy, Music]",[English],[United States of America]
154,The Harvest,The Harvest,0,en,,Maryann moves in with her grandparents after s...,10.786,104,0,2015-04-10,5.900,192,"[Horror, Thriller, Mystery]",[English],[United States of America]


### Merge and Clean the Data for Export

In [133]:
# Merge the New York Times reviews and TMDB DataFrames on title
merged_df = pd.merge(tmdb_movies_list_df,reviews_df, on='title',how='left')
merged_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,...,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,Kangaroo: A Love-Hate Story,Kangaroo: A Love-Hate Story,0,en,http://KangarooTheMovie.com,This groundbreaking film reveals the truth sur...,1.574,103,0,2018-01-19,...,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",
1,The Tomorrow Man,The Tomorrow Man,0,en,https://bleeckerstreetmedia.com/thetomorrowman,Ed Hemsler spends his life preparing for a dis...,9.399,94,0,2019-05-22,...,‘The Tomorrow Man’ Review: Love Among the Neur...,,,"They Whirl, They Twirl, They Tango",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
2,Marriage Story,Marriage Story,18000000,en,https://www.marriagestorymovie.com,A stage director and an actress struggle throu...,25.751,137,2300000,2019-09-28,...,‘Marriage Story’ Review: Dance Me to the End o...,critic’s pick,,"Friendly Split, Shattering Break",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
3,Bones and All,Bones and All,16000000,it,https://www.bonesandallfilm.net/,"Abandoned by her father, a young woman embarks...",42.212,131,15234907,2022-11-18,...,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
4,"Love, Antosha","Love, Antosha",0,en,https://antonyelchindoc.com/,"From a prolific career in film and television,...",9.253,92,0,2019-08-02,...,"‘Love, Antosha’ Review: A Heartbreaking Look a...",Critic’s Pick,,"Love, Antosha",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Loving,Loving,9000000,en,http://focusfeatures.com/loving,"The story of Richard and Mildred Loving, an in...",36.191,123,8996802,2016-11-04,...,"Review: In ‘Loving,’ They Loved. A Segregated ...",,,They Fought the Law. And Their Love Won.,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
154,Ocean Waves,海がきこえる,0,ja,,"At Kichijōji Station, Tokyo, Taku Morisaki gli...",31.779,72,0,1994-10-07,...,"Review: ‘Ocean Waves,’ a Tale of Young Love, G...",,,"Teenage Romance, Flashbacks and Style",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
155,Dinner in America,Dinner in America,0,en,https://www.dinnerinamerica.com,An on-the-lam punk rocker and a young woman ob...,11.671,106,0,2022-05-27,...,‘Dinner in America’ Review: A Punk-Rock Love S...,,,Dinner in America,,,,By Concepción de León,"[{'firstname': 'Concepción', 'middlename': Non...",
156,The Harvest,The Harvest,0,en,,Maryann moves in with her grandparents after s...,10.786,104,0,2015-04-10,...,"Review: In ‘The Harvest,’ Love Is Wielded With...",,,"Love, Wielded With a Chill",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",


In [134]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ['genres','spoken_languages','production_countries']

# Create a list of characters to remove
character_to_remove = ['[',']']

# Loop through the list of columns to fix
for column in columns_to_fix:
    # Convert the column to type 'str'
    merged_df[column] = merged_df[column].astype(str)
    
    # Loop through characters to remove
    for line in merged_df[column]:
        merged_df[column] = merged_df[column].str.replace(r"\[|\]|\u2018|\u2019|'", '', regex=True)

# Display the fixed DataFrame
merged_df

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,...,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,Kangaroo: A Love-Hate Story,Kangaroo: A Love-Hate Story,0,en,http://KangarooTheMovie.com,This groundbreaking film reveals the truth sur...,1.574,103,0,2018-01-19,...,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",
1,The Tomorrow Man,The Tomorrow Man,0,en,https://bleeckerstreetmedia.com/thetomorrowman,Ed Hemsler spends his life preparing for a dis...,9.399,94,0,2019-05-22,...,‘The Tomorrow Man’ Review: Love Among the Neur...,,,"They Whirl, They Twirl, They Tango",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
2,Marriage Story,Marriage Story,18000000,en,https://www.marriagestorymovie.com,A stage director and an actress struggle throu...,25.751,137,2300000,2019-09-28,...,‘Marriage Story’ Review: Dance Me to the End o...,critic’s pick,,"Friendly Split, Shattering Break",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
3,Bones and All,Bones and All,16000000,it,https://www.bonesandallfilm.net/,"Abandoned by her father, a young woman embarks...",42.212,131,15234907,2022-11-18,...,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
4,"Love, Antosha","Love, Antosha",0,en,https://antonyelchindoc.com/,"From a prolific career in film and television,...",9.253,92,0,2019-08-02,...,"‘Love, Antosha’ Review: A Heartbreaking Look a...",Critic’s Pick,,"Love, Antosha",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Loving,Loving,9000000,en,http://focusfeatures.com/loving,"The story of Richard and Mildred Loving, an in...",36.191,123,8996802,2016-11-04,...,"Review: In ‘Loving,’ They Loved. A Segregated ...",,,They Fought the Law. And Their Love Won.,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
154,Ocean Waves,海がきこえる,0,ja,,"At Kichijōji Station, Tokyo, Taku Morisaki gli...",31.779,72,0,1994-10-07,...,"Review: ‘Ocean Waves,’ a Tale of Young Love, G...",,,"Teenage Romance, Flashbacks and Style",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
155,Dinner in America,Dinner in America,0,en,https://www.dinnerinamerica.com,An on-the-lam punk rocker and a young woman ob...,11.671,106,0,2022-05-27,...,‘Dinner in America’ Review: A Punk-Rock Love S...,,,Dinner in America,,,,By Concepción de León,"[{'firstname': 'Concepción', 'middlename': Non...",
156,The Harvest,The Harvest,0,en,,Maryann moves in with her grandparents after s...,10.786,104,0,2015-04-10,...,"Review: In ‘The Harvest,’ Love Is Wielded With...",,,"Love, Wielded With a Chill",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",


In [135]:
# Drop "byline.person" column
merged_df.drop(columns='byline.person', inplace=True)

In [136]:
# Delete duplicate rows and reset index
cleaned_merged_df = merged_df.drop_duplicates().reset_index(drop=True)
cleaned_merged_df.head()

Unnamed: 0,title,original_title,budget,original_language,homepage,overview,popularity,runtime,revenue,release_date,...,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.organization
0,Kangaroo: A Love-Hate Story,Kangaroo: A Love-Hate Story,0,en,http://KangarooTheMovie.com,This groundbreaking film reveals the truth sur...,1.574,103,0,2018-01-19,...,263,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,
1,The Tomorrow Man,The Tomorrow Man,0,en,https://bleeckerstreetmedia.com/thetomorrowman,Ed Hemsler spends his life preparing for a dis...,9.399,94,0,2019-05-22,...,280,‘The Tomorrow Man’ Review: Love Among the Neur...,,,"They Whirl, They Twirl, They Tango",,,,By Jeannette Catsoulis,
2,Marriage Story,Marriage Story,18000000,en,https://www.marriagestorymovie.com,A stage director and an actress struggle throu...,25.751,137,2300000,2019-09-28,...,1244,‘Marriage Story’ Review: Dance Me to the End o...,critic’s pick,,"Friendly Split, Shattering Break",,,,By A.O. Scott,
3,Bones and All,Bones and All,16000000,it,https://www.bonesandallfilm.net/,"Abandoned by her father, a young woman embarks...",42.212,131,15234907,2022-11-18,...,710,‘Bones and All’ Review: You Eat What You Are,,,"It’s Eat, Prey, Love on a Journey of Self-Disc...",,,,By A.O. Scott,
4,"Love, Antosha","Love, Antosha",0,en,https://antonyelchindoc.com/,"From a prolific career in film and television,...",9.253,92,0,2019-08-02,...,319,"‘Love, Antosha’ Review: A Heartbreaking Look a...",Critic’s Pick,,"Love, Antosha",,,,By Glenn Kenny,


In [142]:
# Export data to CSV without the index
cleaned_merged_df.to_csv("output\cleaned_data.csv",index=False)