### Import Required Libraries and Set Up Environment Variables

In [1]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [2]:
# Set environment variables from the .env in the local environment
load_dotenv('example.env')

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

# Check if the API keys are retrieved successfully
if not nyt_api_key:
    raise ValueError("No NYT API key found. Please set the NYT_API_KEY environment variable.")
if not tmdb_api_key:
    raise ValueError("No TMDb API key found. Please set the TMDB_API_KEY environment variable.")

# Now you can use these API keys in your requests
print(f"NYT API Key: {nyt_api_key}")
print(f"TMDB API Key: {tmdb_api_key}")

NYT API Key: ntfOhYD4PdouYsuZqYQAiqWQMD2P53KL
TMDB API Key: 9acd8d526729e5e87c83d2f42e34f28e


### Access the New York Times API

In [3]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL


# Build URL with API key and parameters
query_params = {
    'fq': filter_query,
    'sort': sort,
    'fl': field_list,
    'begin_date': begin_date,
    'end_date': end_date,
    'api-key': nyt_api_key
}



response = requests.get(url, params=query_params)

# Check for a successful response
if response.status_code == 200:
    data = response.json()
    # Process the data as needed
    print(data)
else:
    print(f"Error: {response.status_code}")


{'status': 'OK', 'copyright': 'Copyright (c) 2024 The New York Times Company. All Rights Reserved.', 'response': {'docs': [{'web_url': 'https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html', 'snippet': 'A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.', 'source': 'The New York Times', 'headline': {'main': '‘The Attachment Diaries’ Review: Love, Sick', 'kicker': None, 'content_kicker': None, 'print_headline': 'The Attachment Diaries', 'name': None, 'seo': None, 'sub': None}, 'keywords': [{'name': 'subject', 'value': 'Movies', 'rank': 1, 'major': 'N'}, {'name': 'creative_works', 'value': 'The Attachment Diaries (Movie)', 'rank': 2, 'major': 'N'}, {'name': 'persons', 'value': 'Diment, Valentin Javier', 'rank': 3, 'major': 'N'}], 'pub_date': '2023-05-25T11:00:03+0000', 'byline': {'original': 'By Jeannette Catsoulis', 'person': [{'firstname': 'Jeannette', 'middlename': None, 'lastname': 'Catsoulis', 'qua

In [4]:
# Create an empty list to store the reviews


# loop through pages 0-19

    # create query with a page number
    # API results show 10 articles at a time

    
    # Make a "GET" request and retrieve the JSON

    
    # Add a twelve second interval between queries to stay within API query limits

    
    # Try and save the reviews to the reviews_list

        # loop through the reviews["response"]["docs"] and append each review to the list

        # Print the page that was just retrieved


        # Print the page number that had no results then break from the loop
        
        
        


# Create an empty list to store the reviews
reviews_list = []

# Loop through pages 0-19
for page in range(20):
    # Create query with a page number
    query_params = {
        'fq': filter_query,
        'sort': sort,
        'fl': field_list,
        'begin_date': begin_date,
        'end_date': end_date,
        'api-key': nyt_api_key,
        'page': page
    }

    # Make a "GET" request and retrieve the JSON
    response = requests.get(url, params=query_params)

    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)

    try:
        reviews = response.json()

        # Loop through the reviews["response"]["docs"] and append each review to the list
        if 'response' in reviews and 'docs' in reviews['response']:
            for review in reviews['response']['docs']:
                reviews_list.append(review)
            print(f"Page {page} retrieved successfully.")
        else:
            print(f"Page {page} had no results.")
            break

    except Exception as e:
        print(f"An error occurred on page {page}: {e}")
        break

# Print the total number of reviews retrieved
print(f"Total number of reviews retrieved: {len(reviews_list)}")



Page 0 retrieved successfully.
Page 1 retrieved successfully.
Page 2 retrieved successfully.
Page 3 retrieved successfully.
Page 4 retrieved successfully.
Page 5 retrieved successfully.
Page 6 retrieved successfully.
Page 7 retrieved successfully.
Page 8 retrieved successfully.
Page 9 retrieved successfully.
Page 10 retrieved successfully.
Page 11 retrieved successfully.
Page 12 retrieved successfully.
Page 13 retrieved successfully.
Page 14 retrieved successfully.
Page 15 retrieved successfully.
Page 16 retrieved successfully.
Page 17 retrieved successfully.
Page 18 retrieved successfully.
Page 19 retrieved successfully.
Total number of reviews retrieved: 200


In [5]:




# Preview the first 5 results in JSON format with indentation
formatted_reviews = json.dumps(reviews_list[:5], indent=4)
print(formatted_reviews)


[
    {
        "web_url": "https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html",
        "snippet": "A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018The Attachment Diaries\u2019 Review: Love, Sick",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "The Attachment Diaries",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Movies",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "creative_works",
                "value": "The Attachment Diaries (Movie)",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",
 

In [6]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()

from pandas import json_normalize

# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df = json_normalize(reviews_list)
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2017/03/09/movies/the-...,This moody romance stars Tatiana Maslany (“Orp...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-03-09T21:54:58+0000,251,Review: A Combustible Pair Find Love in ‘The O...,,,Review: A Combustible Pair Find Love in ‘The O...,,,,By Andy Webster,"[{'firstname': 'Andy', 'middlename': None, 'la...",
196,https://www.nytimes.com/2017/03/09/movies/revi...,A nurse travels to the Ottoman Empire on the e...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-03-09T21:53:12+0000,267,"Review: Love as the World Wars, in ‘The Ottoma...",,,"Review: Love as the World Wars, in ‘The Ottoma...",,,,By Neil Genzlinger,"[{'firstname': 'Neil', 'middlename': None, 'la...",
197,https://www.nytimes.com/2017/03/02/movies/love...,Josh Kornbluth runs afoul of the Internal Reve...,The New York Times,"[{'name': 'creative_works', 'value': 'Love & T...",2017-03-02T21:44:18+0000,246,Review: It’s All Mirth and Taxes in ‘Love & Ta...,,,"It’s Inevitable, Mirth and Taxes",,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",
198,https://www.nytimes.com/2017/02/16/movies/ever...,A messed-up heroine is asked to choose between...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-02-16T21:45:50+0000,256,"Review: ‘Everybody Loves Somebody,’ a Rom-Com ...",,,Everybody Loves Somebody,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",


In [7]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
reviews_df['title'] = reviews_df['headline.main'].str.extract(r'\u2018(.*?)\u2019') + " Review"
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries Review
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What Review
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever Review
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist Review
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People Review
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2017/03/09/movies/the-...,This moody romance stars Tatiana Maslany (“Orp...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-03-09T21:54:58+0000,251,Review: A Combustible Pair Find Love in ‘The O...,,,Review: A Combustible Pair Find Love in ‘The O...,,,,By Andy Webster,"[{'firstname': 'Andy', 'middlename': None, 'la...",,The Other Half Review
196,https://www.nytimes.com/2017/03/09/movies/revi...,A nurse travels to the Ottoman Empire on the e...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-03-09T21:53:12+0000,267,"Review: Love as the World Wars, in ‘The Ottoma...",,,"Review: Love as the World Wars, in ‘The Ottoma...",,,,By Neil Genzlinger,"[{'firstname': 'Neil', 'middlename': None, 'la...",,The Ottoman Lieutenant Review
197,https://www.nytimes.com/2017/03/02/movies/love...,Josh Kornbluth runs afoul of the Internal Reve...,The New York Times,"[{'name': 'creative_works', 'value': 'Love & T...",2017-03-02T21:44:18+0000,246,Review: It’s All Mirth and Taxes in ‘Love & Ta...,,,"It’s Inevitable, Mirth and Taxes",,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,Love & Taxes Review
198,https://www.nytimes.com/2017/02/16/movies/ever...,A messed-up heroine is asked to choose between...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-02-16T21:45:50+0000,256,"Review: ‘Everybody Loves Somebody,’ a Rom-Com ...",,,Everybody Loves Somebody,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,"Everybody Loves Somebody, Review"


In [8]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_df['keywords'] = reviews_df['keywords'].apply(extract_keywords)
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries Review
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What Review
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever Review
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist Review
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People Review
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2017/03/09/movies/the-...,This moody romance stars Tatiana Maslany (“Orp...,The New York Times,subject: Movies;creative_works: The Other Half...,2017-03-09T21:54:58+0000,251,Review: A Combustible Pair Find Love in ‘The O...,,,Review: A Combustible Pair Find Love in ‘The O...,,,,By Andy Webster,"[{'firstname': 'Andy', 'middlename': None, 'la...",,The Other Half Review
196,https://www.nytimes.com/2017/03/09/movies/revi...,A nurse travels to the Ottoman Empire on the e...,The New York Times,subject: Movies;creative_works: The Ottoman Li...,2017-03-09T21:53:12+0000,267,"Review: Love as the World Wars, in ‘The Ottoma...",,,"Review: Love as the World Wars, in ‘The Ottoma...",,,,By Neil Genzlinger,"[{'firstname': 'Neil', 'middlename': None, 'la...",,The Ottoman Lieutenant Review
197,https://www.nytimes.com/2017/03/02/movies/love...,Josh Kornbluth runs afoul of the Internal Reve...,The New York Times,creative_works: Love & Taxes (Movie);persons: ...,2017-03-02T21:44:18+0000,246,Review: It’s All Mirth and Taxes in ‘Love & Ta...,,,"It’s Inevitable, Mirth and Taxes",,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,Love & Taxes Review
198,https://www.nytimes.com/2017/02/16/movies/ever...,A messed-up heroine is asked to choose between...,The New York Times,subject: Movies;creative_works: Everybody Love...,2017-02-16T21:45:50+0000,256,"Review: ‘Everybody Loves Somebody,’ a Rom-Com ...",,,Everybody Loves Somebody,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,"Everybody Loves Somebody, Review"


In [9]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
titles_list = reviews_df['title'].to_list()
titles_list

['The Attachment Diaries Review',
 'What Review',
 'You Can Live Forever Review',
 'A Tourist Review',
 'Other People Review',
 'One True Loves Review',
 'The Lost Weekend: A Love Story Review',
 'A Thousand and One Review',
 'Your Place or Mine Review',
 'Love in the Time of Fentanyl Review',
 'Pamela, a Love Story Review',
 'In From the Side Review',
 'After Love Review',
 'Alcarràs Review',
 'Nelly & Nadine Review',
 'Lady Chatterley Review',
 'The Sound of Christmas Review',
 'The Inspection Review',
 'Bones and All Review',
 'My Policeman Review',
 'About Fate Review',
 'Waiting for Bojangles Review',
 'I Love My Dad Review',
 'A Love Song Review',
 'Alone Together Review',
 'Art of Love Review',
 'The Wheel Review',
 'Thor: Love and Thunder Review',
 'Both Sides of the Blade Review',
 'Fire of Love Review',
 'Love & Gelato Review',
 'Stay Prayed Up Review',
 'Benediction Review',
 'Dinner in America Review',
 'In a New York Minute Review',
 'Anaïs in Love Review',
 'I Love Americ

### Access The Movie Database API

In [10]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

In [32]:
# Print each item and its type to find any non-string items
for title in titles_list:
    print(f"{title}: {type(title)}")

# Ensure all titles are strings and clean the review part from the title
titles_list = [str(title).replace(' Review', '') for title in titles_list if isinstance(title, str)]

# Confirm the list is correct
titles_list


The Attachment Diaries: <class 'str'>
What: <class 'str'>
You Can Live Forever: <class 'str'>
A Tourist: <class 'str'>
Other People: <class 'str'>
One True Loves: <class 'str'>
The Lost Weekend: A Love Story: <class 'str'>
A Thousand and One: <class 'str'>
Your Place or Mine: <class 'str'>
Love in the Time of Fentanyl: <class 'str'>
Pamela, a Love Story: <class 'str'>
In From the Side: <class 'str'>
After Love: <class 'str'>
Alcarràs: <class 'str'>
Nelly & Nadine: <class 'str'>
Lady Chatterley: <class 'str'>
The Sound of Christmas: <class 'str'>
The Inspection: <class 'str'>
Bones and All: <class 'str'>
My Policeman: <class 'str'>
About Fate: <class 'str'>
Waiting for Bojangles: <class 'str'>
I Love My Dad: <class 'str'>
A Love Song: <class 'str'>
Alone Together: <class 'str'>
Art of Love: <class 'str'>
The Wheel: <class 'str'>
Thor: Love and Thunder: <class 'str'>
Both Sides of the Blade: <class 'str'>
Fire of Love: <class 'str'>
Love & Gelato: <class 'str'>
Stay Prayed Up: <class 'st

['The Attachment Diaries',
 'What',
 'You Can Live Forever',
 'A Tourist',
 'Other People',
 'One True Loves',
 'The Lost Weekend: A Love Story',
 'A Thousand and One',
 'Your Place or Mine',
 'Love in the Time of Fentanyl',
 'Pamela, a Love Story',
 'In From the Side',
 'After Love',
 'Alcarràs',
 'Nelly & Nadine',
 'Lady Chatterley',
 'The Sound of Christmas',
 'The Inspection',
 'Bones and All',
 'My Policeman',
 'About Fate',
 'Waiting for Bojangles',
 'I Love My Dad',
 'A Love Song',
 'Alone Together',
 'Art of Love',
 'The Wheel',
 'Thor: Love and Thunder',
 'Both Sides of the Blade',
 'Fire of Love',
 'Love & Gelato',
 'Stay Prayed Up',
 'Benediction',
 'Dinner in America',
 'In a New York Minute',
 'Anaïs in Love',
 'I Love America',
 'See You Then',
 'La Mami',
 'Love After Love',
 'Deep Water',
 'Lucy and Desi',
 'Cyrano',
 'The In Between',
 'Book of Love',
 'Lingui, the Sacred Bonds',
 'The Pink Cloud',
 'A Journal for Jordan',
 'West Side Story',
 'Aulcie',
 'Love Is Love 

In [38]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple of 50 requests
request_counter = 0

# Ensure all titles are strings
titles_list = [str(title) for title in titles_list if title]

# Loop through the titles
for title in titles_list:
    # Check if we need to sleep before making a request
    if request_counter > 0 and request_counter % 50 == 0:
        print("Sleeping for 10 seconds to avoid hitting API rate limits...")
        time.sleep(10)
    
    # Add 1 to the request counter
    request_counter += 1

    # Perform a "GET" request for The Movie Database
    search_url = f"{url}{title}{tmdb_key_string}"
    response = requests.get(search_url)
    search_results = response.json()

    # Handle the case when no results are found
    if not search_results['results']:
        print(f"Movie not found: {title}")
        continue

    try:
        # Only take the first search result to avoid duplicates
        movie = search_results['results'][0]
        movie_id = movie['id']

        # Make a request for the full movie details
        movie_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}"
        movie_response = requests.get(movie_url)
        movie_details = movie_response.json()

        # Extract the genre names into a list
        genres = [genre['name'] for genre in movie_details['genres']]

        # Extract the spoken_languages' English name into a list
        spoken_languages = [lang['english_name'] for lang in movie_details['spoken_languages']]

        # Extract the production_countries' name into a list
        production_countries = [country['name'] for country in movie_details['production_countries']]

        # Add the relevant data to a dictionary and append it to the tmdb_movies_list list
        movie_data = {
            'title': title,  # Use title from the cleaned titles_list
            'tmdb_title': movie_details.get('title', 'N/A'),
            'tmdb_id': movie_details.get('id', 'N/A'),
            'genres': genres,
            'spoken_languages': spoken_languages,
            'production_countries': production_countries,
            'release_date': movie_details.get('release_date', 'N/A'),
            'overview': movie_details.get('overview', 'N/A'),
            'budget': movie_details.get('budget', 'N/A'),
            'revenue': movie_details.get('revenue', 'N/A'),
            'runtime': movie_details.get('runtime', 'N/A'),
            'vote_average': movie_details.get('vote_average', 'N/A'),
            'vote_count': movie_details.get('vote_count', 'N/A')
        }
        tmdb_movies_list.append(movie_data)

        # Print out the title that was found
        print(f"Found movie: {movie_details['title']}")

    except Exception as e:
        print(f"An error occurred while processing the title '{title}': {e}")

# Print the total number of movies found
print(f"Total number of movies found: {len(tmdb_movies_list)}")





Found movie: The Attachment Diaries
Found movie: What...
Found movie: You Can Live Forever
Found movie: A Tourist's Guide to Love
Found movie: Other People
Found movie: One True Loves
Found movie: The Lost Weekend: A Love Story
Found movie: A Thousand and One
Found movie: Your Place or Mine
Found movie: Love in the Time of Fentanyl
Found movie: Pamela, A Love Story
Found movie: In from the Side
Found movie: After Love
Found movie: Alcarràs
Found movie: Nelly and Monsieur Arnaud
Found movie: Lady Chatterley
Found movie: The Sound of Christmas
Found movie: The Inspection
Found movie: Bones and All
Found movie: My Policeman
Found movie: About Fate
Found movie: Waiting for Bojangles
Found movie: I Love My Dad
Found movie: A Love Song
Found movie: Alone Together
Found movie: Art of Love
Found movie: 2099: The Soldier Protocol
Found movie: Thor: Love and Thunder
Found movie: Both Sides of the Blade
Found movie: Fire of Love
Found movie: Art of Love
Found movie: Stay Prayed Up
Found movie: Be

In [39]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data




# Preview the first 5 results in JSON format
formatted_reviews = json.dumps(tmdb_movies_list[:5], indent=4)
print(formatted_reviews)




[
    {
        "title": "The Attachment Diaries",
        "tmdb_title": "The Attachment Diaries",
        "tmdb_id": 743040,
        "genres": [
            "Drama",
            "Mystery",
            "Thriller",
            "Horror"
        ],
        "spoken_languages": [
            "Spanish"
        ],
        "production_countries": [
            "Argentina"
        ],
        "release_date": "2021-10-07",
        "overview": "Argentina, 1970s. A desperate young woman goes to a clinic to have a clandestine abortion. As her pregnancy is already through the fourth month, the doctor refuses. Instead, she proposes to sell the baby to one of her clients and offers to provide shelter in her house until the child is born. Their disturbed personalities will become intertwined in a strange and dangerous relationship.",
        "budget": 0,
        "revenue": 0,
        "runtime": 102,
        "vote_average": 3.0,
        "vote_count": 4
    },
    {
        "title": "What",
        "tmdb_

In [68]:
# Convert the results to a DataFrame


# Convert the results to a DataFrame
tmdb_movies_df = pd.DataFrame(tmdb_movies_list)

tmdb_movies_df

Unnamed: 0,title,tmdb_title,tmdb_id,genres,spoken_languages,production_countries,release_date,overview,budget,revenue,runtime,vote_average,vote_count
0,The Attachment Diaries,The Attachment Diaries,743040,"[Drama, Mystery, Thriller, Horror]",[Spanish],[Argentina],2021-10-07,"Argentina, 1970s. A desperate young woman goes...",0,0,102,3.000,4
1,What,What...,1242065,"[Comedy, Drama]",[English],[Australia],2024-06-29,"It's another ordinary day for Ricky, until he ...",0,0,0,0.000,0
2,You Can Live Forever,You Can Live Forever,887580,"[Drama, Romance]","[English, French]","[Canada, United States of America]",2023-03-24,"When Jaime, a gay teenager, is sent to live in...",0,15055,96,6.657,35
3,A Tourist,A Tourist's Guide to Love,813726,"[Romance, Comedy]","[English, Vietnamese]",[United States of America],2023-04-21,"After an unexpected break up, a travel executi...",0,0,96,6.291,160
4,Other People,Other People,371449,"[Comedy, Drama]",[English],[United States of America],2016-09-09,"David, a struggling comedy writer fresh off fr...",0,93000,97,6.300,193
...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,The Other Half,The Other Half,381015,"[Drama, Romance]",[English],[Canada],2016-12-02,A grief-stricken man and a bipolar woman fall ...,0,0,103,6.300,24
194,The Ottoman Lieutenant,The Ottoman Lieutenant,433356,"[Romance, Drama, War]","[Turkish, English]","[Turkey, United States of America]",2017-03-10,"Lillie, a determined American woman, ventures ...",40000000,413844,111,6.173,245
195,Love & Taxes,Art of Love,1248795,"[Romance, Action, Drama]",[Turkish],[Turkey],2024-03-13,After learning that the art thief she has been...,0,0,99,6.505,105
196,"Everybody Loves Somebody,",Everybody Loves Somebody,431093,"[Romance, Comedy]","[English, Spanish]",[Mexico],2017-02-10,"On the surface, Clara Barron seems to have it ...",2000000,0,102,6.600,158


### Merge and Clean the Data for Export

In [69]:
# Merge the New York Times reviews and TMDB DataFrames on title
merged_df = pd.merge(reviews_df, tmdb_movies_df, on='title', how='inner')
merged_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,genres,spoken_languages,production_countries,release_date,overview,budget,revenue,runtime,vote_average,vote_count


In [75]:
import pandas as pd

# Assuming reviews_df and tmdb_movies_df are already created

# Remove the word 'Review' from the titles in reviews_df
reviews_df['title'] = reviews_df['title'].str.replace(' review', '', case=False, regex=False).str.strip()

# Display cleaned titles for debug purposes
print("Sample cleaned titles from reviews_df:")
print(reviews_df['title'].head(20))

# Merge the DataFrames on the cleaned titles
merged_df = pd.merge(reviews_df, tmdb_movies_df, how='inner', left_on='title', right_on='title')

# Display the number of common titles
print("Number of common titles after cleaning:", len(merged_df))

# Display some of the common titles for verification
print("\nSample common titles:")
print(merged_df['title'].head(20))

# Display the merged DataFrame
print(merged_df.head())
print("Number of rows:", len(merged_df))
print("Number of columns:", len(merged_df.columns))

# If required, save the DataFrame to a CSV to inspect
# merged_df.to_csv('merged_output.csv', index=False)


Sample cleaned titles from reviews_df:
0             the attachment diaries
1                               what
2               you can live forever
3                          a tourist
4                       other people
5                     one true loves
6     the lost weekend: a love story
7                 a thousand and one
8                 your place or mine
9       love in the time of fentanyl
10              pamela, a love story
11                  in from the side
12                        after love
13                          alcarràs
14                    nelly & nadine
15                   lady chatterley
16            the sound of christmas
17                    the inspection
18                     bones and all
19                      my policeman
Name: title, dtype: object
Number of common titles after cleaning: 150

Sample common titles:
0           the attachment diaries
1                             what
2             you can live forever
3                     

In [79]:
merged_df = pd.merge(reviews_df, tmdb_movies_df, on='title', how='inner')
merged_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,genres,spoken_languages,production_countries,release_date,overview,budget,revenue,runtime,vote_average,vote_count
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,...,"[Drama, Mystery, Thriller, Horror]",[Spanish],[Argentina],2021-10-07,"Argentina, 1970s. A desperate young woman goes...",0,0,102,3.000,4
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,...,"[Comedy, Drama]",[English],[Australia],2024-06-29,"It's another ordinary day for Ricky, until he ...",0,0,0,0.000,0
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,...,"[Drama, Romance]","[English, French]","[Canada, United States of America]",2023-03-24,"When Jaime, a gay teenager, is sent to live in...",0,15055,96,6.657,35
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,...,"[Romance, Comedy]","[English, Vietnamese]",[United States of America],2023-04-21,"After an unexpected break up, a travel executi...",0,0,96,6.291,160
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,...,"[Comedy, Drama]",[English],[United States of America],2016-09-09,"David, a struggling comedy writer fresh off fr...",0,93000,97,6.300,193
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,https://www.nytimes.com/2017/05/11/movies/houn...,This feature debut of the Australian filmmaker...,The New York Times,subject: Movies;creative_works: Hounds of Love...,2017-05-11T20:16:51+0000,280,Review: ‘Hounds of Love’ Is Tense and Deadly D...,,,Hounds of Love,...,"[Crime, Drama, Horror]",[English],[Australia],2016-10-07,When Vicki Maloney is randomly abducted from a...,0,0,108,6.100,328
146,https://www.nytimes.com/2017/04/27/movies/haro...,Meet the husband-and-wife team that inspired s...,The New York Times,creative_works: Harold and Lillian: A Hollywoo...,2017-04-27T20:29:44+0000,263,Review: ‘Harold and Lillian’ Introduces a Holl...,,,Harold and Lillian: A Hollywood Love Story,...,[Documentary],[English],[United States of America],2017-02-01,Working largely uncredited in the Hollywood sy...,0,0,96,6.800,19
147,https://www.nytimes.com/2017/04/20/movies/the-...,"Oscar Isaac, Christian Bale and Charlotte Le B...",The New York Times,subject: Movies;creative_works: The Promise (M...,2017-04-20T16:49:46+0000,455,Review: ‘The Promise’ Finds a Love Triangle in...,,,"A Love Triangle, Near the Front Lines",...,"[History, Romance, Drama]","[Armenian, German, English, French]","[Spain, United States of America]",2016-12-02,Set during the last days of the Ottoman Empire...,90000000,12448676,133,6.900,452
148,https://www.nytimes.com/2017/03/09/movies/the-...,This moody romance stars Tatiana Maslany (“Orp...,The New York Times,subject: Movies;creative_works: The Other Half...,2017-03-09T21:54:58+0000,251,Review: A Combustible Pair Find Love in ‘The O...,,,Review: A Combustible Pair Find Love in ‘The O...,...,"[Drama, Romance]",[English],[Canada],2016-12-02,A grief-stricken man and a bipolar woman fall ...,0,0,103,6.300,24


In [80]:
# Display the column names of the DataFrame
print(merged_df.columns)


Index(['web_url', 'snippet', 'source', 'keywords', 'pub_date', 'word_count',
       'headline.main', 'headline.kicker', 'headline.content_kicker',
       'headline.print_headline', 'headline.name', 'headline.seo',
       'headline.sub', 'byline.original', 'byline.person',
       'byline.organization', 'title', 'clean_title', 'tmdb_title', 'tmdb_id',
       'genres', 'spoken_languages', 'production_countries', 'release_date',
       'overview', 'budget', 'revenue', 'runtime', 'vote_average',
       'vote_count'],
      dtype='object')


In [82]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing


# Create a list of characters to remove


# Loop through the list of columns to fix

    # Convert the column to type 'str'


    # Loop through characters to remove


# Display the fixed DataFrame

# Assuming merged_df is the DataFrame created after merging reviews_df and tmdb_movies_df

# Create a list of the columns that need fixing
columns_to_fix = ['genres', 'spoken_languages', 'production_countries', 'byline.person']

# Create a list of characters to remove
chars_to_remove = ["[", "]", "'", '"']

# Loop through the list of columns to fix
for column in columns_to_fix:
    # Convert the column to type 'str'
    merged_df[column] = merged_df[column].astype(str)
    
    # Loop through characters to remove
    for char in chars_to_remove:
        merged_df[column] = merged_df[column].str.replace(char, '', regex=False)

# Display the fixed DataFrame
merged_df.head()




Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,genres,spoken_languages,production_countries,release_date,overview,budget,revenue,runtime,vote_average,vote_count
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,...,"Drama, Mystery, Thriller, Horror",Spanish,Argentina,2021-10-07,"Argentina, 1970s. A desperate young woman goes...",0,0,102,3.0,4
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,...,"Comedy, Drama",English,Australia,2024-06-29,"It's another ordinary day for Ricky, until he ...",0,0,0,0.0,0
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,...,"Drama, Romance","English, French","Canada, United States of America",2023-03-24,"When Jaime, a gay teenager, is sent to live in...",0,15055,96,6.657,35
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,...,"Romance, Comedy","English, Vietnamese",United States of America,2023-04-21,"After an unexpected break up, a travel executi...",0,0,96,6.291,160
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,...,"Comedy, Drama",English,United States of America,2016-09-09,"David, a struggling comedy writer fresh off fr...",0,93000,97,6.3,193


In [83]:
# Drop "byline.person" column
# Drop the "byline.person" column
merged_df = merged_df.drop(columns=['byline.person'])

# Display the columns to verify that "byline.person" has been dropped
merged_df.head()


Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,genres,spoken_languages,production_countries,release_date,overview,budget,revenue,runtime,vote_average,vote_count
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,...,"Drama, Mystery, Thriller, Horror",Spanish,Argentina,2021-10-07,"Argentina, 1970s. A desperate young woman goes...",0,0,102,3.0,4
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,...,"Comedy, Drama",English,Australia,2024-06-29,"It's another ordinary day for Ricky, until he ...",0,0,0,0.0,0
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,...,"Drama, Romance","English, French","Canada, United States of America",2023-03-24,"When Jaime, a gay teenager, is sent to live in...",0,15055,96,6.657,35
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,...,"Romance, Comedy","English, Vietnamese",United States of America,2023-04-21,"After an unexpected break up, a travel executi...",0,0,96,6.291,160
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,...,"Comedy, Drama",English,United States of America,2016-09-09,"David, a struggling comedy writer fresh off fr...",0,93000,97,6.3,193


In [84]:
# Delete duplicate rows and reset index
# Delete duplicate rows
merged_df = merged_df.drop_duplicates()

# Reset the index
merged_df = merged_df.reset_index(drop=True)

# Display the DataFrame to verify the changes
merged_df.head()


Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,genres,spoken_languages,production_countries,release_date,overview,budget,revenue,runtime,vote_average,vote_count
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,...,"Drama, Mystery, Thriller, Horror",Spanish,Argentina,2021-10-07,"Argentina, 1970s. A desperate young woman goes...",0,0,102,3.0,4
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,...,"Comedy, Drama",English,Australia,2024-06-29,"It's another ordinary day for Ricky, until he ...",0,0,0,0.0,0
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,...,"Drama, Romance","English, French","Canada, United States of America",2023-03-24,"When Jaime, a gay teenager, is sent to live in...",0,15055,96,6.657,35
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,...,"Romance, Comedy","English, Vietnamese",United States of America,2023-04-21,"After an unexpected break up, a travel executi...",0,0,96,6.291,160
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,...,"Comedy, Drama",English,United States of America,2016-09-09,"David, a struggling comedy writer fresh off fr...",0,93000,97,6.3,193


In [86]:
# Export data to CSV without the index
# Export data to CSV without the index
merged_df.to_csv('merged_reviews_tmdb.csv', index=False)
