### Import Required Libraries and Set Up Environment Variables

In [7]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [8]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [9]:
# Set the base URL
nyt_base_url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Check parameters
print(f"Filter Query: {filter_query}")
print(f"Sort: {sort}")
print(f"Field List: {field_list}")
print(f"Begin Date: {begin_date}")
print(f"End Date: {end_date}")

Filter Query: section_name:"Movies" AND type_of_material:"Review" AND headline:"love"
Sort: newest
Field List: headline,web_url,snippet,source,keywords,pub_date,byline,word_count
Begin Date: 20130101
End Date: 20230531


In [10]:
# Build URL  
nyt_query_url = (f"{nyt_base_url}fq={filter_query}&sort={sort}&fl={field_list}&"
                 f"begin_date={begin_date}&end_date={end_date}&page=0&api-key={nyt_api_key}")

# Print the URL to verify it
print("Query URL:", nyt_query_url)

Query URL: https://api.nytimes.com/svc/search/v2/articlesearch.json?fq=section_name:"Movies" AND type_of_material:"Review" AND headline:"love"&sort=newest&fl=headline,web_url,snippet,source,keywords,pub_date,byline,word_count&begin_date=20130101&end_date=20230531&page=0&api-key=E0XnnKtEwnWxVn9LAPTIGErcmqgwKj4J


In [11]:
# Send a GET request to the API
response = requests.get(nyt_query_url)

# Check if the request was successful
if response.status_code == 200:
    print("Request successful!")
else:
    print(f"Error: {response.status_code}")

Request successful!


In [12]:
# If the request is successful, parse the JSON response
if response.status_code == 200:
    # Convert the response to JSON
    reviews_data = response.json()
    
    # Preview the first 5 results (in the 'docs' key)
    import json
    print(json.dumps(reviews_data['response']['docs'][:5], indent=4))

[
    {
        "web_url": "https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html",
        "snippet": "A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018The Attachment Diaries\u2019 Review: Love, Sick",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "The Attachment Diaries",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Movies",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "creative_works",
                "value": "The Attachment Diaries (Movie)",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",
 

In [13]:
# Create an empty list to store the reviews
reviews_list = []

# Loop through pages 0-19 (each page contains up to 10 articles)
for page in range(20):
    # Create query with a page number
    nyt_query_url = (
        f"{nyt_base_url}fq={filter_query}&sort={sort}&fl={field_list}&"
        f"begin_date={begin_date}&end_date={end_date}&page={page}&api-key={nyt_api_key}"
    )

    # Make a "GET" request and retrieve the JSON
    try:
        response = requests.get(nyt_query_url)
        response_data = response.json()

        # Check if 'response' and 'docs' keys exist in the response data
        if 'response' in response_data and 'docs' in response_data['response']:
            docs = response_data['response']['docs']

            # Append each review to the reviews_list
            reviews_list.extend(docs)

            # Print the page that was just retrieved
            print(f"Checked page {page}")

        # If no docs are found, just continue to the next page
        else:
            print(f"Checked page {page} (no results)")

    except Exception as e:
        print(f"Error on page {page}: {e}")
        break

    # Add a twelve-second interval between queries to stay within API query limits
    time.sleep(12)

# Print total number of reviews retrieved
print(f"Total reviews retrieved: {len(reviews_list)}")


Checked page 0
Checked page 1
Checked page 2
Checked page 3
Checked page 4
Checked page 5
Checked page 6
Checked page 7
Checked page 8
Checked page 9
Checked page 10
Checked page 11
Checked page 12
Checked page 13
Checked page 14
Checked page 15
Checked page 16
Checked page 17
Checked page 18
Checked page 19
Total reviews retrieved: 200


In [42]:
if len(reviews_list) >= 5:
    # Preview the first 5 results in JSON format
    first_five_reviews = reviews_list[:5]
    
    # Use json.dumps with argument indent=4 to format data
    formatted_json = json.dumps(first_five_reviews, indent=4)
    print(formatted_json)
else:
    print(f"Only {len(reviews_list)} reviews available.")

[
    {
        "web_url": "https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html",
        "snippet": "A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018The Attachment Diaries\u2019 Review: Love, Sick",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "The Attachment Diaries",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Movies",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "creative_works",
                "value": "The Attachment Diaries (Movie)",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",
 

In [43]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
df_reviews = pd.json_normalize(reviews_list)
df_reviews.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",


In [44]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early

# Define a function to extract the title between \u2018 and \u2019 and append " Review"
def extract_title(headline):
    # Find the positions of the first \u2018 and the last \u2019
    start = headline.find("\u2018") + 1
    end = headline.rfind("\u2019")  # Use rfind to find the last occurrence of \u2019
    
    # Extract the title between the quotes
    if start != -1 and end != -1:
        title = headline[start:end]
        return title + " Review"
    else:
        return headline  # In case the headline doesn't match the expected pattern

# Apply the function to the "headline.main" column and create a new column "title"
df_reviews['title'] = df_reviews['headline.main'].apply(extract_title)

# Preview the DataFrame to see the new column
df_reviews[['headline.main', 'title']].head()


Unnamed: 0,headline.main,title
0,"‘The Attachment Diaries’ Review: Love, Sick",The Attachment Diaries Review
1,Review: ‘What’s Love Got to Do With It?’ Proba...,What’s Love Got to Do With It? Review
2,‘You Can Live Forever’ Review: Do You Love Me ...,You Can Live Forever Review
3,‘A Tourist’s Guide to Love’ Review: A Wearying...,A Tourist’s Guide to Love Review
4,‘Other People’s Children’ Review: True Romance,Other People’s Children Review


In [45]:
# Define the function to extract 'name' and 'value' from the "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    
    # Check if the keyword_list is actually a list
    if isinstance(keyword_list, list):
        # Iterate over each item in the keyword_list (which should be a list of dictionaries)
        for item in keyword_list:
            # Check if the item is a dictionary and contains 'name' and 'value' keys
            if isinstance(item, dict) and 'name' in item and 'value' in item:
                # Extract 'name' and 'value' and format them
                keyword = f"{item['name']}: {item['value']}; "
                extracted_keywords += keyword
    return extracted_keywords.strip()

# Apply the function to the "keywords" column to convert lists to strings
df_reviews['keywords'] = df_reviews['keywords'].apply(extract_keywords)

# Preview the updated DataFrame to see the formatted "keywords" column
df_reviews[['keywords']].head()

Unnamed: 0,keywords
0,subject: Movies; creative_works: The Attachmen...
1,"subject: Movies; persons: Kapur, Shekhar; pers..."
2,subject: Movies; creative_works: You Can Live ...
3,subject: Movies; creative_works: A Tourist's G...
4,"subject: Movies; persons: Zlotowski, Rebecca; ..."


In [46]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
title_list = df_reviews['title'].to_list()

# Preview the list of titles
(title_list)

['The Attachment Diaries Review',
 'What’s Love Got to Do With It? Review',
 'You Can Live Forever Review',
 'A Tourist’s Guide to Love Review',
 'Other People’s Children Review',
 'One True Loves Review',
 'The Lost Weekend: A Love Story Review',
 'A Thousand and One Review',
 'Your Place or Mine Review',
 'Love in the Time of Fentanyl Review',
 'Pamela, a Love Story Review',
 'In From the Side Review',
 'After Love Review',
 'Alcarràs Review',
 'Nelly & Nadine Review',
 'Lady Chatterley’s Lover Review',
 'The Sound of Christmas Review',
 'The Inspection Review',
 'Bones and All Review',
 'My Policeman Review',
 'About Fate’ Review: Love the One You Review',
 'Waiting for Bojangles Review',
 'I Love My Dad Review',
 'A Love Song Review',
 'Alone Together Review',
 'Art of Love Review',
 'The Wheel Review',
 'Thor: Love and Thunder’ Review: A God Review',
 'Both Sides of the Blade Review',
 'Fire of Love Review',
 'Love & Gelato Review',
 'Stay Prayed Up Review',
 'Benediction’ Review:

### Access The Movie Database API

In [47]:
# Prepare The Movie Database query
tmdb_base_url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

In [48]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple of 50 requests
request_counter = 0

# Loop through the titles
for title in title_list:
    # Strip the word "Review" from the end of the title (if present)
    if "Review" in title:
        title = title.replace(" Review", "").strip()
    
    # Check if we need to sleep before making a request (after every 50 requests)
    if request_counter > 0 and request_counter % 50 == 0:
        print(f"Reached {request_counter} requests. Sleeping for 10 seconds...")
        time.sleep(10)

    # Add 1 to the request counter
    request_counter += 1
    
    # Encode the title to be URL-safe
    query_title = requests.utils.quote(title)
    
    # Perform a "GET" request for The Movie Database
    tmdb_query_url = f"{tmdb_base_url}{query_title}{tmdb_key_string}"
    
    try:
        response = requests.get(tmdb_query_url)
        data = response.json()

        # Check if there are results in the TMDB response
        if 'results' in data and len(data['results']) > 0:
            # Get movie id from the first result
            movie_id = data['results'][0]['id']
            
            # Make a request for the full movie details
            details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}"
            details_response = requests.get(details_url)
            details_data = details_response.json()
            
            # Extract the genre names into a list
            genres = [genre['name'] for genre in details_data.get('genres', [])]
            
            # Extract the spoken_languages' English name into a list
            spoken_languages = [lang['english_name'] for lang in details_data.get('spoken_languages', [])]
            
            # Extract the production_countries' name into a list
            production_countries = [country['name'] for country in details_data.get('production_countries', [])]
            
            # Add the relevant data to a dictionary
            movie_details = {
                'title': title,
                'original_title': details_data.get('original_title', ''),
                'genres': genres,
                'spoken_languages': spoken_languages,
                'production_countries': production_countries,
                'release_date': details_data.get('release_date', ''),
                'runtime': details_data.get('runtime', ''),
                'overview': details_data.get('overview', ''),
                'popularity': details_data.get('popularity', ''),
                'vote_average': details_data.get('vote_average', ''),
                'vote_count': details_data.get('vote_count', '')
            }
            
            # Append the movie details to the tmdb_movies_list
            tmdb_movies_list.append(movie_details)
            
            # Print out that the movie was found
            print(f"Found {title}")
        
        else:
            # Print out that the movie was not found
            print(f"{title} not found.")
    
    except Exception as e:
        print(f"An error occurred for '{title}': {e}")

# Print total number of movies found
print(f"Total movies found: {len(tmdb_movies_list)}")


Found The Attachment Diaries
Found What’s Love Got to Do With It?
Found You Can Live Forever
Found A Tourist’s Guide to Love
Found Other People’s Children
Found One True Loves
Found The Lost Weekend: A Love Story
Found A Thousand and One
Found Your Place or Mine
Found Love in the Time of Fentanyl
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
About Fate’: Love the One You not found.
Found Waiting for Bojangles
Found I Love My Dad
Found A Love Song
Found Alone Together
Found Art of Love
Found The Wheel
Thor: Love and Thunder’: A God not found.
Found Both Sides of the Blade
Found Fire of Love
Found Love & Gelato
Found Stay Prayed Up
Benediction’: A Poet not found.
Found Dinner in America
Found In a New York Minute
Found Anaïs in Love
Found I Love America
Found See You Then
Found La Mami
Found Love After Love
Found D

In [51]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
if len(tmdb_movies_list) >= 5:
    # Get the first 5 results from the list
    first_five_results = tmdb_movies_list[:5]

 # Convert to JSON format with indentation for readability
    formatted_json = json.dumps(first_five_results, indent=4)
    
    # Print the formatted JSON
    print(formatted_json)
else:
    print(f"Only {len(tmdb_movies_list)} results available.")

[
    {
        "title": "The Attachment Diaries",
        "original_title": "El apego",
        "genres": [
            "Drama",
            "Mystery",
            "Thriller",
            "Horror"
        ],
        "spoken_languages": [
            "Spanish"
        ],
        "production_countries": [
            "Argentina"
        ],
        "release_date": "2021-10-07",
        "runtime": 102,
        "overview": "Argentina, 1970s. A desperate young woman goes to a clinic to have a clandestine abortion. As her pregnancy is already through the fourth month, the doctor refuses. Instead, she proposes to sell the baby to one of her clients and offers to provide shelter in her house until the child is born. Their disturbed personalities will become intertwined in a strange and dangerous relationship.",
        "popularity": 1.028,
        "vote_average": 3.0,
        "vote_count": 4
    },
    {
        "title": "What\u2019s Love Got to Do With It?",
        "original_title": "What's 

In [52]:
# Convert the results to a DataFrame
df_tmdb_movies = pd.DataFrame(tmdb_movies_list)
df_tmdb_movies.head()

Unnamed: 0,title,original_title,genres,spoken_languages,production_countries,release_date,runtime,overview,popularity,vote_average,vote_count
0,The Attachment Diaries,El apego,"[Drama, Mystery, Thriller, Horror]",[Spanish],[Argentina],2021-10-07,102,"Argentina, 1970s. A desperate young woman goes...",1.028,3.0,4
1,What’s Love Got to Do With It?,What's Love Got to Do with It,"[Drama, Music, History]",[English],[United States of America],1993-06-09,118,Singer Tina Turner rises to stardom while must...,12.901,7.102,309
2,You Can Live Forever,You Can Live Forever,"[Drama, Romance]","[English, French]","[Canada, United States of America]",2023-03-24,96,"When Jaime, a gay teenager, is sent to live in...",18.031,6.463,41
3,A Tourist’s Guide to Love,A Tourist's Guide to Love,"[Romance, Comedy]","[English, Vietnamese]",[United States of America],2023-04-21,96,"After an unexpected break up, a travel executi...",11.519,6.299,172
4,Other People’s Children,Les Enfants des autres,"[Drama, Comedy]","[French, English]",[France],2022-09-21,104,"Rachel loves her life, her students, her frien...",7.031,6.813,195


### Merge and Clean the Data for Export

# Standardize the 'title' column in both DataFrames: lowercase and strip whitespace
df_reviews['title'] = df_reviews['title'].str.lower().str.strip()
df_tmdb_movies['title'] = df_tmdb_movies['title'].str.lower().str.strip()
(df_tmdb_movies)

In [56]:
df_reviews

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies; creative_works: The Attachmen...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries Review
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies; persons: Kapur, Shekhar; pers...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What’s Love Got to Do With It? Review
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies; creative_works: You Can Live ...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever Review
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies; creative_works: A Tourist's G...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist’s Guide to Love Review
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies; persons: Zlotowski, Rebecca; ...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People’s Children Review
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2017/03/09/movies/the-...,This moody romance stars Tatiana Maslany (“Orp...,The New York Times,subject: Movies; creative_works: The Other Hal...,2017-03-09T21:54:58+0000,251,Review: A Combustible Pair Find Love in ‘The O...,,,Review: A Combustible Pair Find Love in ‘The O...,,,,By Andy Webster,"[{'firstname': 'Andy', 'middlename': None, 'la...",,The Other Half Review
196,https://www.nytimes.com/2017/03/09/movies/revi...,A nurse travels to the Ottoman Empire on the e...,The New York Times,subject: Movies; creative_works: The Ottoman L...,2017-03-09T21:53:12+0000,267,"Review: Love as the World Wars, in ‘The Ottoma...",,,"Review: Love as the World Wars, in ‘The Ottoma...",,,,By Neil Genzlinger,"[{'firstname': 'Neil', 'middlename': None, 'la...",,The Ottoman Lieutenant Review
197,https://www.nytimes.com/2017/03/02/movies/love...,Josh Kornbluth runs afoul of the Internal Reve...,The New York Times,creative_works: Love & Taxes (Movie); persons:...,2017-03-02T21:44:18+0000,246,Review: It’s All Mirth and Taxes in ‘Love & Ta...,,,"It’s Inevitable, Mirth and Taxes",,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,Love & Taxes Review
198,https://www.nytimes.com/2017/02/16/movies/ever...,A messed-up heroine is asked to choose between...,The New York Times,subject: Movies; creative_works: Everybody Lov...,2017-02-16T21:45:50+0000,256,"Review: ‘Everybody Loves Somebody,’ a Rom-Com ...",,,Everybody Loves Somebody,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,"Everybody Loves Somebody, Review"


In [55]:
df_tmdb_movies

Unnamed: 0,title,original_title,genres,spoken_languages,production_countries,release_date,runtime,overview,popularity,vote_average,vote_count
0,The Attachment Diaries,El apego,"[Drama, Mystery, Thriller, Horror]",[Spanish],[Argentina],2021-10-07,102,"Argentina, 1970s. A desperate young woman goes...",1.028,3.000,4
1,What’s Love Got to Do With It?,What's Love Got to Do with It,"[Drama, Music, History]",[English],[United States of America],1993-06-09,118,Singer Tina Turner rises to stardom while must...,12.901,7.102,309
2,You Can Live Forever,You Can Live Forever,"[Drama, Romance]","[English, French]","[Canada, United States of America]",2023-03-24,96,"When Jaime, a gay teenager, is sent to live in...",18.031,6.463,41
3,A Tourist’s Guide to Love,A Tourist's Guide to Love,"[Romance, Comedy]","[English, Vietnamese]",[United States of America],2023-04-21,96,"After an unexpected break up, a travel executi...",11.519,6.299,172
4,Other People’s Children,Les Enfants des autres,"[Drama, Comedy]","[French, English]",[France],2022-09-21,104,"Rachel loves her life, her students, her frien...",7.031,6.813,195
...,...,...,...,...,...,...,...,...,...,...,...
181,"Frantz,",Frantz,"[History, Drama, Romance]","[German, French]","[France, Germany]",2016-09-07,113,"In the aftermath of WWI, a young German who gr...",12.885,7.365,676
182,The Other Half,The Other Half,"[Drama, Romance]",[English],[Canada],2016-12-02,103,A grief-stricken man and a bipolar woman fall ...,4.172,6.300,24
183,The Ottoman Lieutenant,The Ottoman Lieutenant,"[Romance, Drama, War]","[Turkish, English]","[Turkey, United States of America]",2017-03-28,111,"Lillie, a determined American woman, ventures ...",14.366,6.163,249
184,Love & Taxes,Love & Taxes,[Comedy],[English],[United States of America],2017-03-03,98,Love &amp; Taxes is a riveting comic tale of s...,2.592,4.000,1


In [60]:
# Merge the New York Times reviews and TMDB DataFrames on title
df_merged = pd.merge(df_tmdb_movies, df_reviews, on='title')

# Preview the first few rows of the merged DataFrame
print(df_merged.head())

Empty DataFrame
Columns: [title, original_title, genres, spoken_languages, production_countries, release_date, runtime, overview, popularity, vote_average, vote_count, web_url, snippet, source, keywords, pub_date, word_count, headline.main, headline.kicker, headline.content_kicker, headline.print_headline, headline.name, headline.seo, headline.sub, byline.original, byline.person, byline.organization]
Index: []

[0 rows x 27 columns]


In [40]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ['genres', 'spoken_languages', 'production_countries']


# Create a list of characters to remove
characters_to_remove = ["[", "]", "'", '"']

# Loop through the list of columns to fix
for col in columns_to_fix:
    # Convert the column to type 'str'
    df_merged[col] = df_merged[col].astype(str)


    # Loop through characters to remove
     for char in characters_to_remove:
        df_merged[col] = df_merged[col].str.replace(char, '', regex=False)

# Display the fixed DataFrame
print(df_merged.head())

IndentationError: unexpected indent (2144499378.py, line 16)

In [None]:
# Drop "byline.person" column
if 'byline.person' in df_merged.columns:
    df_merged = df_merged.drop(columns=['byline.person'])


In [None]:
# Delete duplicate rows and reset index
df_merged = df_merged.drop_duplicates().reset_index(drop=True)

In [None]:
# Export data to CSV without the index
df_merged.to_csv('merged_movies_reviews.csv', index=False)

In [None]:
print("Data exported to 'merged_movies_reviews.csv'.")