### Import Required Libraries and Set Up Environment Variables

In [5]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json


In [6]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [7]:
# Set the base URL
def get_url(ak, bd, ed, fq, fl, so):
    url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key="+ak+"&begin_date="+bd+"&end_date="+ed+"&fq="+fq+"so"+so+"&fl="+fl
    return url

In [8]:
# Set the base URL
#url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'
#filter_query = 'Movies'
# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

url = get_url(nyt_api_key,begin_date,end_date,filter_query,field_list,sort)
#print(url)

In [9]:
# Create an empty list to store the review
reviews_list = []
#url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key=MebDfltQAgwkMaU61WR9BEXzAUB6W3fq&begin_date=20130101&end_date=20230531&fq=section_name:"Movies" AND type_of_material:"Review" AND headline:"love"&sort=newest&fl=headline,web_url,snippet,source,keywords,pub_date,byline,word_count'
# loop through pages 0-19
for page in range(20):
    # create query with page number
    query_url = f"{url}&page={str(page)}"
    reviews = requests.get(query_url).json()
    time. sleep(12)
    for review in reviews["response"]["docs"]:
        # Try and save the reviews to the reviews_list
            try:
        # loop through the reviews["response"]["docs"] and append each review to the list
        # Print the page that was just retrieved
                print(f"Checked Page: {page}")
                #print(reviews)
                reviews_list.append(review)
            except:
        #Print the page number that had no results then break from the loop
                print(f"No Results found for page: {page}")
            break

Checked Page: 0
Checked Page: 1
Checked Page: 2
Checked Page: 3
Checked Page: 4
Checked Page: 5
Checked Page: 6
Checked Page: 7
Checked Page: 8
Checked Page: 9
Checked Page: 10
Checked Page: 11
Checked Page: 12
Checked Page: 13
Checked Page: 14
Checked Page: 15
Checked Page: 16
Checked Page: 17
Checked Page: 18
Checked Page: 19


In [10]:
for review in reviews_list:
    print(f'A snippet from the review: {review["snippet"]}')
    print('---------------------------')

A snippet from the review: The documentary looks at the mass killings of kangaroos for pet-food companies, leather processors and ranchers in Australia.
---------------------------
A snippet from the review: Morgan Neville’s movie tells the tumultuous behind-the-scenes story of the making and near-unmaking of “The Other Side of the Wind.”
---------------------------
A snippet from the review: Sharon Shattuck’s documentary tells her family story of growing up with a transgender parent.
---------------------------
A snippet from the review: Drawn from the plotline of a Todd Snider song, the film follows a pool shark and an escort, taking twists that are both violent and silly.
---------------------------
A snippet from the review: It’s hard to find a reliable, talented, reasonably priced, eco-friendly contractor these days.
---------------------------
A snippet from the review: This superficial take on the writing and initial staging of “Cyrano de Bergerac” is a whirlwind of soapy declar

In [69]:
# json.dumps with the argument indent=4 is used to preview the first five results

print(json.dumps(reviews_list, indent=4))


[
    {
        "web_url": "https://www.nytimes.com/2018/01/18/movies/kangaroo-a-love-hate-story-review.html",
        "snippet": "The documentary looks at the mass killings of kangaroos for pet-food companies, leather processors and ranchers in Australia.",
        "source": "The New York Times",
        "headline": {
            "main": "Review: \u2018Kangaroo: A Love-Hate Story\u2019 Exposes a Wildlife Massacre",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "Kangaroo: A Love-Hate Story",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "creative_works",
                "value": "Kangaroo: A Love-Hate Story (Movie)",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "subject",
                "value": "Kangaroos",
                "rank": 2,
                "major": "N"
            }

In [87]:
# Convert the results to a DataFrame, normalizing the JSON
reviews_list_df = pd.json_normalize(reviews_list)
reviews_list_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2018/01/18/movies/kang...,The documentary looks at the mass killings of ...,The New York Times,"[{'name': 'creative_works', 'value': 'Kangaroo...",2018-01-18T12:00:23+0000,263,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",
1,https://www.nytimes.com/2018/11/01/movies/they...,Morgan Neville’s movie tells the tumultuous be...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2018-11-01T13:56:39+0000,794,Review: ‘They’ll Love Me When I’m Dead’ Docume...,,,The Legend Behind The Scenes,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
2,https://www.nytimes.com/2016/06/24/movies/from...,Sharon Shattuck’s documentary tells her family...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2016-06-23T21:11:31+0000,252,Review: ‘From This Day Forward’ Attests to Lov...,,,Review: ‘From This Day Forward’ Attests to Lov...,,,,By Andy Webster,"[{'firstname': 'Andy', 'middlename': None, 'la...",
3,https://www.nytimes.com/2021/10/14/movies/hard...,"Drawn from the plotline of a Todd Snider song,...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2021-10-14T12:27:03+0000,320,‘Hard Luck Love Song’ Review: A Glossy Take on...,,,Hard Luck Love Song,,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
4,https://www.nytimes.com/2019/08/29/movies/fall...,"It’s hard to find a reliable, talented, reason...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-08-29T14:56:44+0000,374,‘Falling Inn Love’ Review: A Prize That Needs ...,Streaming Movie Review,,A Fine Romance to Watch While Sorting Socks,,,,By Helen T. Verongos,"[{'firstname': 'Helen', 'middlename': 'T.', 'l...",
5,https://www.nytimes.com/2019/10/17/movies/cyra...,This superficial take on the writing and initi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-10-17T11:00:03+0000,291,"‘Cyrano, My Love’ Review: A Nose for Romance",,,"Cyrano, My Love",,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
6,https://www.nytimes.com/2013/11/22/movies/detr...,A gas station looms large in “Detroit Unleaded...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2013-11-21T23:52:02+0000,237,Love Among the Gas Pumps,Movie Review,,Detroit Unleaded,,,,By Neil Genzlinger,"[{'firstname': 'Neil', 'middlename': None, 'la...",
7,https://www.nytimes.com/2019/03/14/movies/five...,Haley Lu Richardson and Cole Sprouse navigate ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-03-14T13:00:07+0000,258,‘Five Feet Apart’ Review: Ailing Teenagers Liv...,,,Five Feet Apart,,,,By Ben Kenigsberg,"[{'firstname': 'Ben', 'middlename': None, 'las...",
8,https://www.nytimes.com/2022/08/04/movies/i-lo...,This comedy is a daddy-issues movie with a que...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2022-08-04T11:00:05+0000,418,‘I Love My Dad’ Review: A Father Catfishes His...,,,A Father Catfishes His Son. We Just Wait for t...,,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
9,https://www.nytimes.com/2019/05/16/movies/asak...,The Japanese director Ryusuke Hamaguchi follow...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-05-16T11:00:01+0000,547,‘Asako I & II’ Review: Double the Love (and th...,Critic’s Pick,,Falling For a Drifter And His Lookalike,,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",


In [88]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019.
# End string should include " Review" to avoid cutting title early

reviews_list_df['title'] = reviews_list_df['headline.main'].apply(lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")])
reviews_list_df['title']




0     Kangaroo: A Love-Hate Story’ Exposes a Wildlif...
1     They’ll Love Me When I’m Dead’ Documents Orson...
2     From This Day Forward’ Attests to Love’s Adapt...
3                                   Hard Luck Love Song
4                                      Falling Inn Love
5                                       Cyrano, My Love
6                               Love Among the Gas Pump
7                                       Five Feet Apart
8                                         I Love My Dad
9                                          Asako I & II
10                                             Ammonite
11                                           About Fate
12                                  Love Wedding Repeat
13                                    God’s Own Country
14                 Dina,’ a Differently Abled Love Stor
15                                            Love Hard
16                      69: The Saga of Danny Hernandez
17                   Eva,’ Robot Designer Loves 

In [89]:
# Extract 'name' and 'value' from items in "keywords" column

def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_list_df['keywords'] =  reviews_list_df['keywords'].apply(extract_keywords) 
reviews_list_df['keywords']

0     creative_works: Kangaroo: A Love-Hate Story (M...
1     subject: Documentary Films and Programs;creati...
2     subject: Documentary Films and Programs;person...
3     subject: Movies;persons: Dorman, Michael (1981...
4     subject: Movies;organizations: Netflix Inc;per...
5     subject: Movies;creative_works: Cyrano, My Lov...
6     subject: Movies;creative_works: Detroit Unlead...
7     subject: Movies;creative_works: Five Feet Apar...
8     subject: Movies;persons: Oswalt, Patton;creati...
9     subject: Movies;creative_works: Asako I & II (...
10    subject: Movies;persons: Ronan, Saoirse;person...
11    subject: Movies;creative_works: About Fate (Mo...
12    subject: Movies;creative_works: Love Wedding R...
13    subject: Movies;creative_works: God's Own Coun...
14    creative_works: Dina (Movie);subject: Document...
15    subject: Movies;creative_works: Love Hard (Mov...
16    subject: Documentary Films and Programs;subjec...
17    subject: Movies;persons: Maillo, Kike;crea

In [49]:
# Create a list from the "title" column using to_list()
title_list = []
title_list = reviews_list_df['title'].tolist()
# These titles will be used in the query for The Movie Database

title_list



['Kangaroo: A Love-Hate Story’ Exposes a Wildlife Massacr',
 'They’ll Love Me When I’m Dead’ Documents Orson Welles’s Last Fil',
 'From This Day Forward’ Attests to Love’s Adaptabilit',
 'Hard Luck Love Song',
 'Falling Inn Love',
 'Cyrano, My Love',
 'Love Among the Gas Pump',
 'Five Feet Apart',
 'I Love My Dad',
 'Asako I & II',
 'Ammonite',
 'About Fate',
 'Love Wedding Repeat',
 'God’s Own Country',
 'Dina,’ a Differently Abled Love Stor',
 'Love Hard',
 '69: The Saga of Danny Hernandez',
 'Eva,’ Robot Designer Loves Cute Ki',
 'A Tuba to Cuba',
 'Solution to His Love Problems?: Baby Formul']

### Access The Movie Database API

In [16]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

In [17]:
# Create an empty list to store the results


# Create a request counter to sleep the requests after a multiple
# of 50 requests


# Loop through the titles

    # Check if we need to sleep before making a request


    # Add 1 to the request counter

    
    # Perform a "GET" request for The Movie Database


    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.

        # Get movie id


        # Make a request for a the full movie details


        # Execute "GET" request with url

        
        # Extract the genre names into a list


        # Extract the spoken_languages' English name into a list


        # Extract the production_countries' name into a list


        # Add the relevant data to a dictionary and
        # append it to the tmdb_movies_list list

        
        # Print out the title that was found



In [18]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data


In [19]:
# Convert the results to a DataFrame


### Merge and Clean the Data for Export

In [20]:
# Merge the New York Times reviews and TMDB DataFrames on title


In [21]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing


# Create a list of characters to remove


# Loop through the list of columns to fix

    # Convert the column to type 'str'


    # Loop through characters to remove


# Display the fixed DataFrame


In [22]:
# Drop "byline.person" column


In [23]:
# Delete duplicate rows and reset index


In [24]:
# Export data to CSV without the index
