### Import Required Libraries and Set Up Environment Variables

In [1]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json
import urllib


In [2]:
# Set environment variables from the .env in the local environment
load_dotenv('example.env')

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [4]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
#filter_query = 'section_name:"Movies" AND type_of_material:/"Review/" AND headline:/"love/"'
filter_query = 'section_name:Movies AND type_of_material:Review AND headline:love'
fq = title_url = urllib.parse.quote(filter_query)
# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
movies_reviews_url = f"{url}api-key={nyt_api_key}&fq={fq}&sort={sort}&fl={field_list}&begin_date={begin_date}&end_date={end_date}"

In [5]:
# Create an empty list to store the reviews
nyt_reviews =[]

# loop through pages 0-19
for p in range(20):

    # create query with a page number
    movies_reviews_url_page=f"{movies_reviews_url}&page={p+1}"
    # API results show 10 articles at a time    
    # Make a "GET" request and retrieve the JSON
    response = requests.get(movies_reviews_url_page).json()
    
    # Try and save the reviews to the reviews_list
    if len(response["response"]["docs"])>0:
        # loop through the reviews["response"]["docs"] and append each review to the list
        nyt_reviews.extend(response["response"]["docs"])

        # Print the page that was just retrieved
        print (f"Checked page {p}")      
    else:
        # Print the page number that had no results then break from the loop
        print(f"Page {p} is empty")
        break

    # Add a twelve second interval between queries to stay within API query limits
    print("Sleeping for 12 Seconds")
    time.sleep(12)

Checked page 0
Sleeping for 12 Seconds
Checked page 1
Sleeping for 12 Seconds
Checked page 2
Sleeping for 12 Seconds
Checked page 3
Sleeping for 12 Seconds
Checked page 4
Sleeping for 12 Seconds
Checked page 5
Sleeping for 12 Seconds
Checked page 6
Sleeping for 12 Seconds
Checked page 7
Sleeping for 12 Seconds
Checked page 8
Sleeping for 12 Seconds
Checked page 9
Sleeping for 12 Seconds
Checked page 10
Sleeping for 12 Seconds
Checked page 11
Sleeping for 12 Seconds
Checked page 12
Sleeping for 12 Seconds
Checked page 13
Sleeping for 12 Seconds
Checked page 14
Sleeping for 12 Seconds
Checked page 15
Sleeping for 12 Seconds
Checked page 16
Sleeping for 12 Seconds
Checked page 17
Sleeping for 12 Seconds
Checked page 18
Sleeping for 12 Seconds
Checked page 19
Sleeping for 12 Seconds


In [6]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data

print(json.dumps(nyt_reviews[:5], indent=4))


[
    {
        "web_url": "https://www.nytimes.com/2023/01/31/movies/pamela-a-love-story-review.html",
        "snippet": "This documentary from Ryan White rewinds, to powerful effect, on Pamela Anderson\u2019s life and fame.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018Pamela, a Love Story\u2019 Review: A Frank Look Back",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "Pamela, a Love Story",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Documentary Films and Programs",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "persons",
                "value": "Anderson, Pamela (1967- )",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",

In [7]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
pd_nyt_reviews = pd.json_normalize(nyt_reviews)
pd_nyt_reviews

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2023/01/31/movies/pame...,"This documentary from Ryan White rewinds, to p...",The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2023-01-31T12:00:05+0000,295,"‘Pamela, a Love Story’ Review: A Frank Look Back",,,"Pamela, a Love Story",,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
1,https://www.nytimes.com/2023/01/19/movies/in-f...,"In Matt Carter’s gay rugby film, sports and ro...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-19T17:50:16+0000,281,‘In From the Side’ Review: Love and Rugby Play...,,,In From the Side,,,,By Kyle Turner,"[{'firstname': 'Kyle', 'middlename': None, 'la...",
2,https://www.nytimes.com/2023/01/19/movies/afte...,In this intelligent melodrama by the director ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-19T12:00:06+0000,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,,,,By Beatrice Loayza,"[{'firstname': 'Beatrice', 'middlename': None,...",
3,https://www.nytimes.com/2023/01/05/movies/alca...,"In this naturalistic drama from Spain, a famil...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-01-05T12:00:03+0000,306,‘Alcarràs’ Review: Labor of Love,,,Alcarràs,,,,By Devika Girish,"[{'firstname': 'Devika', 'middlename': None, '...",
4,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,,,,By Teo Bugbee,"[{'firstname': 'Teo', 'middlename': None, 'las...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://www.nytimes.com/2016/12/08/movies/fran...,"Frankie and Johnny’s story returns, written an...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2016-12-08T22:05:30+0000,554,"Review: ‘Frank & Lola,’ Retreading Old Ground",,,Tainted Love in Familiar Packaging,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
196,https://www.nytimes.com/2016/11/22/movies/alli...,“Allied” is not so much a work of art as a tri...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2016-11-22T21:59:34+0000,901,"Review: ‘Allied,’ With Brad Pitt and Marion Co...",,,"For This Couple, All’s Ambiguous in Love and War",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
197,https://www.nytimes.com/2016/11/18/movies/revi...,Anna Biller’s new film stars Samantha Robinson...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2016-11-17T22:14:13+0000,604,"Review: ‘The Love Witch,’ Hell-Bent on Capturi...",,,"A Dark Seductress, Hell-Bent on Capturing Hearts",,,,By A.O. Scott,"[{'firstname': 'A.', 'middlename': 'O.', 'last...",
198,https://www.nytimes.com/2016/11/18/movies/ali-...,"During the era surrounding World War I, in the...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2016-11-17T22:11:30+0000,280,"Review: ‘Ali and Nino,’ a Love Story Set Again...",,,,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",


In [8]:
# Extract the title from the "headline.main" column and
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early


# save it to a new column "title"
pd_nyt_reviews['title'] = pd_nyt_reviews['headline.main'].apply(lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")])


In [9]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
pd_nyt_reviews['keywords'] = pd_nyt_reviews['keywords'].apply(extract_keywords)


In [10]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
pd_nyt_reviews.to_csv('nyt_reviews.csv')
titles = pd_nyt_reviews['title'].to_list()

### Access The Movie Database API

In [11]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key
details_url = "https://api.themoviedb.org/3/movie/"

In [15]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple
# of 50 requests
rq = 1 

# Loop through the titles
for title in titles:
    # Check if we need to sleep before making a request
    if rq == 50:
        rq = 1
        time.sleep(1)
        print("Sleeping for 1 Second")


    # Add 1 to the request counter
    rq = rq+1
    title_url = urllib.parse.quote(title)
    search_url = f"{url}{title_url}{tmdb_key_string}"
    # Perform a "GET" request for The Movie Database
    search_response = requests.get(search_url).json()
    #print(f"{search_url}")
    #print(json.dumps(search_response, indent=4))

    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.

    try:
        # Get movie id
        movie_id = search_response['results'][0]['id']

        # Make a request for a the full movie details
        # Execute "GET" request with url
        details_response = requests.get(f"{details_url}{movie_id}?api_key={tmdb_api_key}").json()
        
        # Extract the genre names into a list
        genres = [o['name'] for o in details_response['genres']]

        # Extract the spoken_languages' English name into a list
        spoken = [o['english_name'] for o in details_response['spoken_languages']]

        # Extract the production_countries' name into a list
        production = [o['name'] for o in details_response['production_countries']]
        
        # Add the relevant data to a dictionary and
        # append it to the tmdb_movies_list list
        movie = {
            'title': title,
            'original_title': details_response['original_title'],
            'budget': details_response['budget'],
            'original_language': details_response['original_language'],
            'homepage': details_response['homepage'],
            'overview': details_response['overview'],
            'popularity': details_response['popularity'],
            'runtime': details_response['runtime'],
            'revenue': details_response['revenue'],
            'release_date': details_response['release_date'],
            'vote_average': details_response['vote_average'],
            'vote_count': details_response['vote_count'],
            'genres': genres,
            'spoken_languages': spoken,
            'production_countries': production
        }
        tmdb_movies_list.append(movie)
        # Print out the title that was found
        print(f"{title} found")
    except:
        print(f"{title} not found")


Pamela, a Love Story found
In From the Side found
After Love found
Alcarràs found
Nelly & Nadine found
Lady Chatterley’s Lover found
The Sound of Christmas found
The Inspection found
Bones and All found
My Policeman found
About Fate found
Waiting for Bojangles found
I Love My Dad found
A Love Song found
Alone Together found
Art of Love found
The Wheel found
Thor: Love and Thunder found
Both Sides of the Blade found
Fire of Love found
Love & Gelato found
Stay Prayed Up found
Benediction found
Dinner in America found
In a New York Minute found
Anaïs in Love found
I Love America found
See You Then found
La Mami found
Love After Love found
Deep Water found
Lucy and Desi found
Cyrano found
The In Between found
Book of Love found
Lingui, the Sacred Bonds found
The Pink Cloud found
A Journal for Jordan found
West Side Story found
Aulcie not found
Love Is Love Is Love found
Love Hard found
Bergman Island found
Hard Luck Love Song found
South of Heaven found
Wife of a Spy found
Happier Than Eve

In [16]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
print (json.dumps(tmdb_movies_list[:5],indent=4))

[
    {
        "title": "Pamela, a Love Story",
        "original_title": "Pamela, A Love Story",
        "budget": 0,
        "original_language": "en",
        "homepage": "https://www.netflix.com/title/81590934",
        "overview": "In her own words, through personal video and diaries, Pamela Anderson shares the story of her rise to fame, rocky romances and infamous sex tape scandal.",
        "popularity": 17.316,
        "runtime": 113,
        "revenue": 0,
        "release_date": "2023-01-30",
        "vote_average": 6.985,
        "vote_count": 197,
        "genres": [
            "Documentary"
        ],
        "spoken_languages": [
            "English"
        ],
        "production_countries": [
            "United States of America"
        ]
    },
    {
        "title": "In From the Side",
        "original_title": "In from the Side",
        "budget": 0,
        "original_language": "en",
        "homepage": "http://www.infromthesidemovie.com",
        "overview": "M

In [17]:
# Convert the results to a DataFrame
movies_df = pd.DataFrame.from_dict(tmdb_movies_list)


### Merge and Clean the Data for Export

In [18]:
# Merge the New York Times reviews and TMDB DataFrames on title
nyt_reviewd_movies = pd_nyt_reviews.merge(movies_df, on='title')

In [19]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ['genres', 'spoken_languages', 'production_countries']


# Create a list of characters to remove
characters_to_remove =['[',']','\'']

# Loop through the list of columns to fix
for col in columns_to_fix:
    # Convert the column to type 'str'
    nyt_reviewd_movies[col] = nyt_reviewd_movies[col].astype(str)
    # Loop through characters to remove
    for ch in characters_to_remove:
        nyt_reviewd_movies[col] = nyt_reviewd_movies[col].str.replace(ch, '')


# Display the fixed DataFrame
nyt_reviewd_movies.head(5)

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,https://www.nytimes.com/2023/01/31/movies/pame...,"This documentary from Ryan White rewinds, to p...",The New York Times,subject: Documentary Films and Programs;person...,2023-01-31T12:00:05+0000,295,"‘Pamela, a Love Story’ Review: A Frank Look Back",,,"Pamela, a Love Story",...,"In her own words, through personal video and d...",17.316,113,0,2023-01-30,6.985,197,Documentary,English,United States of America
1,https://www.nytimes.com/2023/01/19/movies/in-f...,"In Matt Carter’s gay rugby film, sports and ro...",The New York Times,subject: Movies;creative_works: In From the Si...,2023-01-19T17:50:16+0000,281,‘In From the Side’ Review: Love and Rugby Play...,,,In From the Side,...,"Mark, a new and inexperienced rugby club membe...",15.798,134,0,2022-09-16,7.152,46,"Drama, Romance",English,United Kingdom
2,https://www.nytimes.com/2023/01/19/movies/afte...,In this intelligent melodrama by the director ...,The New York Times,subject: Movies;creative_works: After Love (20...,2023-01-19T12:00:06+0000,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,After Love,...,Set in the port town of Dover in the South-Eas...,9.199,89,0,2021-06-04,7.31,92,Drama,"English, Arabic, French, Urdu",United Kingdom
3,https://www.nytimes.com/2023/01/05/movies/alca...,"In this naturalistic drama from Spain, a famil...",The New York Times,"subject: Movies;persons: Simon, Carla;creative...",2023-01-05T12:00:03+0000,306,‘Alcarràs’ Review: Labor of Love,,,Alcarràs,...,"In a small village in Catalonia, the peach far...",12.125,120,0,2022-04-29,6.935,162,Drama,Catalan,Spain
4,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,subject: Documentary Films and Programs;person...,2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,...,Nelly and Nadine meet in Ravensbrück concentra...,2.163,92,0,2024-06-05,0.0,0,Documentary,"English, Spanish, French, Swedish","Belgium, Norway, Sweden"


In [20]:
# Drop "byline.person" column
nyt_reviewd_movies = nyt_reviewd_movies.drop(['byline.person'], axis=1)


In [25]:
# Delete duplicate rows and reset index

nyt_reviewd_movies = nyt_reviewd_movies.drop_duplicates()

nyt_reviewd_movies.reset_index()


Unnamed: 0,index,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,...,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,0,https://www.nytimes.com/2023/01/31/movies/pame...,"This documentary from Ryan White rewinds, to p...",The New York Times,subject: Documentary Films and Programs;person...,2023-01-31T12:00:05+0000,295,"‘Pamela, a Love Story’ Review: A Frank Look Back",,,...,"In her own words, through personal video and d...",17.316,113,0,2023-01-30,6.985,197,Documentary,English,United States of America
1,1,https://www.nytimes.com/2023/01/19/movies/in-f...,"In Matt Carter’s gay rugby film, sports and ro...",The New York Times,subject: Movies;creative_works: In From the Si...,2023-01-19T17:50:16+0000,281,‘In From the Side’ Review: Love and Rugby Play...,,,...,"Mark, a new and inexperienced rugby club membe...",15.798,134,0,2022-09-16,7.152,46,"Drama, Romance",English,United Kingdom
2,2,https://www.nytimes.com/2023/01/19/movies/afte...,In this intelligent melodrama by the director ...,The New York Times,subject: Movies;creative_works: After Love (20...,2023-01-19T12:00:06+0000,359,‘After Love’ Review: The Other Woman,Critic’s Pick,,...,Set in the port town of Dover in the South-Eas...,9.199,89,0,2021-06-04,7.310,92,Drama,"English, Arabic, French, Urdu",United Kingdom
3,3,https://www.nytimes.com/2023/01/05/movies/alca...,"In this naturalistic drama from Spain, a famil...",The New York Times,"subject: Movies;persons: Simon, Carla;creative...",2023-01-05T12:00:03+0000,306,‘Alcarràs’ Review: Labor of Love,,,...,"In a small village in Catalonia, the peach far...",12.125,120,0,2022-04-29,6.935,162,Drama,Catalan,Spain
4,4,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,subject: Documentary Films and Programs;person...,2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,...,Nelly and Nadine meet in Ravensbrück concentra...,2.163,92,0,2024-06-05,0.000,0,Documentary,"English, Spanish, French, Swedish","Belgium, Norway, Sweden"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,139,https://www.nytimes.com/2017/07/27/movies/the-...,"In war-torn Liberia, Charlize Theron and Javie...",The New York Times,subject: Movies;creative_works: The Last Face ...,2017-07-27T20:29:01+0000,280,Review: Aid Workers in Love and War in Sean Pe...,,,...,"Miguel, a heroic Spanish doctor, puts himself ...",11.708,130,0,2017-01-11,5.605,186,Drama,"English, Portuguese",United States of America
140,140,https://www.nytimes.com/2017/06/15/movies/lost...,The filmmakers Fiona Gordon and Dominique Abel...,The New York Times,creative_works: Lost in Paris (Movie);subject:...,2017-06-15T19:37:09+0000,254,Review: Finding Love (and Slapstick) While ‘Lo...,,,...,Fiona visits Paris for the first time to assis...,10.522,83,0,2017-01-14,6.059,111,Comedy,French,"Belgium, France"
141,141,https://www.nytimes.com/2017/03/09/movies/the-...,This moody romance stars Tatiana Maslany (“Orp...,The New York Times,subject: Movies;creative_works: The Other Half...,2017-03-09T21:54:58+0000,251,Review: A Combustible Pair Find Love in ‘The O...,,,...,A grief-stricken man and a bipolar woman fall ...,4.989,103,0,2016-12-02,6.159,22,"Drama, Romance",English,Canada
142,142,https://www.nytimes.com/2017/03/09/movies/revi...,A nurse travels to the Ottoman Empire on the e...,The New York Times,subject: Movies;creative_works: The Ottoman Li...,2017-03-09T21:53:12+0000,267,"Review: Love as the World Wars, in ‘The Ottoma...",,,...,"Lillie, a determined American woman, ventures ...",23.324,111,413844,2017-03-10,6.173,240,"Romance, Drama, War","Turkish, English","Turkey, United States of America"


In [26]:
# Export data to CSV without the index
nyt_reviewd_movies.to_csv('output/nyt_reviewd_movies.csv', index=False)