### Import Required Libraries and Set Up Environment Variables

In [22]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json



In [4]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [5]:
# Set the base URL
def get_url(ak, bd, ed, fq, fl, so):
    url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key="+ak+"&begin_date="+bd+"&end_date="+ed+"&fq="+fq+"so"+so+"&fl="+fl
    return url

In [6]:
# Set the base URL
#url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'
#filter_query = 'Movies'
# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

url = get_url(nyt_api_key,begin_date,end_date,filter_query,field_list,sort)
#print(url)

In [7]:
# Create an empty list to store the review
reviews_list = []
#url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key=MebDfltQAgwkMaU61WR9BEXzAUB6W3fq&begin_date=20130101&end_date=20230531&fq=section_name:"Movies" AND type_of_material:"Review" AND headline:"love"&sort=newest&fl=headline,web_url,snippet,source,keywords,pub_date,byline,word_count'
# loop through pages 0-19
for page in range(20):
    # create query with page number
    query_url = f"{url}&page={str(page)}"
    reviews = requests.get(query_url).json()
    time. sleep(12)
    for review in reviews["response"]["docs"]:
        # Try and save the reviews to the reviews_list
            try:
        # loop through the reviews["response"]["docs"] and append each review to the list
        # Print the page that was just retrieved
                print(f"Checked Page: {page}")
                #print(reviews)
                reviews_list.append(review)
            except:
        #Print the page number that had no results then break from the loop
                print(f"No Results found for page: {page}")
            break

Checked Page: 0
Checked Page: 1
Checked Page: 2
Checked Page: 3
Checked Page: 4
Checked Page: 5
Checked Page: 6
Checked Page: 7
Checked Page: 8
Checked Page: 9
Checked Page: 10
Checked Page: 11
Checked Page: 12
Checked Page: 13
Checked Page: 14
Checked Page: 15
Checked Page: 16
Checked Page: 17
Checked Page: 18
Checked Page: 19


In [8]:
for review in reviews_list:
    print(f'A snippet from the review: {review["snippet"]}')
    print('---------------------------')

A snippet from the review: The documentary looks at the mass killings of kangaroos for pet-food companies, leather processors and ranchers in Australia.
---------------------------
A snippet from the review: Morgan Neville’s movie tells the tumultuous behind-the-scenes story of the making and near-unmaking of “The Other Side of the Wind.”
---------------------------
A snippet from the review: Sharon Shattuck’s documentary tells her family story of growing up with a transgender parent.
---------------------------
A snippet from the review: Drawn from the plotline of a Todd Snider song, the film follows a pool shark and an escort, taking twists that are both violent and silly.
---------------------------
A snippet from the review: It’s hard to find a reliable, talented, reasonably priced, eco-friendly contractor these days.
---------------------------
A snippet from the review: This superficial take on the writing and initial staging of “Cyrano de Bergerac” is a whirlwind of soapy declar

In [47]:
# json.dumps with the argument indent=4 is used to preview the first five results

print(json.dumps(reviews_list, indent=4))



[
    {
        "web_url": "https://www.nytimes.com/2018/01/18/movies/kangaroo-a-love-hate-story-review.html",
        "snippet": "The documentary looks at the mass killings of kangaroos for pet-food companies, leather processors and ranchers in Australia.",
        "source": "The New York Times",
        "headline": {
            "main": "Review: \u2018Kangaroo: A Love-Hate Story\u2019 Exposes a Wildlife Massacre",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "Kangaroo: A Love-Hate Story",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "creative_works",
                "value": "Kangaroo: A Love-Hate Story (Movie)",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "subject",
                "value": "Kangaroos",
                "rank": 2,
                "major": "N"
            }

In [17]:
# Convert the results to a DataFrame, normalizing the JSON
reviews_list_df = pd.json_normalize(reviews_list)
reviews_list_df.head(5)

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2018/01/18/movies/kang...,The documentary looks at the mass killings of ...,The New York Times,"[{'name': 'creative_works', 'value': 'Kangaroo...",2018-01-18T12:00:23+0000,263,Review: ‘Kangaroo: A Love-Hate Story’ Exposes ...,,,Kangaroo: A Love-Hate Story,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",
1,https://www.nytimes.com/2018/11/01/movies/they...,Morgan Neville’s movie tells the tumultuous be...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2018-11-01T13:56:39+0000,794,Review: ‘They’ll Love Me When I’m Dead’ Docume...,,,The Legend Behind The Scenes,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
2,https://www.nytimes.com/2016/06/24/movies/from...,Sharon Shattuck’s documentary tells her family...,The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2016-06-23T21:11:31+0000,252,Review: ‘From This Day Forward’ Attests to Lov...,,,Review: ‘From This Day Forward’ Attests to Lov...,,,,By Andy Webster,"[{'firstname': 'Andy', 'middlename': None, 'la...",
3,https://www.nytimes.com/2021/10/14/movies/hard...,"Drawn from the plotline of a Todd Snider song,...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2021-10-14T12:27:03+0000,320,‘Hard Luck Love Song’ Review: A Glossy Take on...,,,Hard Luck Love Song,,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
4,https://www.nytimes.com/2019/08/29/movies/fall...,"It’s hard to find a reliable, talented, reason...",The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2019-08-29T14:56:44+0000,374,‘Falling Inn Love’ Review: A Prize That Needs ...,Streaming Movie Review,,A Fine Romance to Watch While Sorting Socks,,,,By Helen T. Verongos,"[{'firstname': 'Helen', 'middlename': 'T.', 'l...",


In [11]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019.
# End string should include " Review" to avoid cutting title early

reviews_list_df['title'] = reviews_list_df['headline.main'].apply(lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")])
reviews_list_df['title']




0     Kangaroo: A Love-Hate Story’ Exposes a Wildlif...
1     They’ll Love Me When I’m Dead’ Documents Orson...
2     From This Day Forward’ Attests to Love’s Adapt...
3                                   Hard Luck Love Song
4                                      Falling Inn Love
5                                       Cyrano, My Love
6                               Love Among the Gas Pump
7                                       Five Feet Apart
8                                         I Love My Dad
9                                          Asako I & II
10                                             Ammonite
11                                           About Fate
12                                  Love Wedding Repeat
13                                    God’s Own Country
14                 Dina,’ a Differently Abled Love Stor
15                                            Love Hard
16                      69: The Saga of Danny Hernandez
17                   Eva,’ Robot Designer Loves 

In [12]:
# Extract 'name' and 'value' from items in "keywords" column

def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_list_df['keywords'] =  reviews_list_df['keywords'].apply(extract_keywords) 
reviews_list_df['keywords']

0     creative_works: Kangaroo: A Love-Hate Story (M...
1     subject: Documentary Films and Programs;creati...
2     subject: Documentary Films and Programs;person...
3     subject: Movies;persons: Dorman, Michael (1981...
4     subject: Movies;organizations: Netflix Inc;per...
5     subject: Movies;creative_works: Cyrano, My Lov...
6     subject: Movies;creative_works: Detroit Unlead...
7     subject: Movies;creative_works: Five Feet Apar...
8     subject: Movies;persons: Oswalt, Patton;creati...
9     subject: Movies;creative_works: Asako I & II (...
10    subject: Movies;persons: Ronan, Saoirse;person...
11    subject: Movies;creative_works: About Fate (Mo...
12    subject: Movies;creative_works: Love Wedding R...
13    subject: Movies;creative_works: God's Own Coun...
14    creative_works: Dina (Movie);subject: Document...
15    subject: Movies;creative_works: Love Hard (Mov...
16    subject: Documentary Films and Programs;subjec...
17    subject: Movies;persons: Maillo, Kike;crea

In [13]:
# Create a list from the "title" column using to_list()
title_list = []
title_list = reviews_list_df['title'].tolist()
# These titles will be used in the query for The Movie Database

title_list



['Kangaroo: A Love-Hate Story’ Exposes a Wildlife Massacr',
 'They’ll Love Me When I’m Dead’ Documents Orson Welles’s Last Fil',
 'From This Day Forward’ Attests to Love’s Adaptabilit',
 'Hard Luck Love Song',
 'Falling Inn Love',
 'Cyrano, My Love',
 'Love Among the Gas Pump',
 'Five Feet Apart',
 'I Love My Dad',
 'Asako I & II',
 'Ammonite',
 'About Fate',
 'Love Wedding Repeat',
 'God’s Own Country',
 'Dina,’ a Differently Abled Love Stor',
 'Love Hard',
 '69: The Saga of Danny Hernandez',
 'Eva,’ Robot Designer Loves Cute Ki',
 'A Tuba to Cuba',
 'Solution to His Love Problems?: Baby Formul']

### Access The Movie Database API

In [14]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie/11"
tmdb_key_string = "?api_key=" + tmdb_api_key

url = f"https://api.themoviedb.org/3/search/movie{tmdb_key_string}&include_adult=false&language=en-US&page=1"

print(url)

https://api.themoviedb.org/3/search/movie?api_key=edd86bc79f18410743b10d55284de412&include_adult=false&language=en-US&page=1


In [115]:
# Create an empty tmdb_movies_list list to store the results
# and Add the relevant data to it.
# Empty list for results
movie_id_list = []  
tmdb_movies_list = []
tmdb_movies_list_details = []
results_list = []
genre_list = []
unique_genre_names = []
spoken_languages_list = []
unique_spoken_languages_names = []
production_countries_list = []
unique_production_countries_names = []
details_dictionary = {}
request_counter = 0
# loop through the title_list
    # Loop through the titles
for title in title_list:
    # Set up the query
    query_url = f"{url}&query={title}"
    #print(query_url)
    # Perform a "GET" request for The Movie Database
    results = requests.get(query_url).json()
    if results['total_results'] > 0:
        #print(json.dumps(results, indent=4))
        # Save results to results_list 
        tmdb_movies_list.append(results)
        # Print out the title that was found
        print(f"Found {results['total_results']} result(s) for {title}")
        # add id and title to filtered_data
        for result in results["results"]:
            if result["id"] not in movie_id_list:
                movie_id_list.append(result["id"])
                print(f'Found id: {result["id"]} for {title}')
    # Check if we need to sleep before making a request 
    # Add 1 to the request counter
    request_counter += 1
    #print(f"Request Counter: {request_counter%50}")
    if request_counter%50 == 0:
        print("sleeping")
    # Add a 12 second interval between queries to stay within API query limits
        time.sleep(12)
# Include a try clause to search for the full movie details.
pop_movie_id_list = movie_id_list.copy()
total_pop= len(movie_id_list)
for x in range(0, len(movie_id_list)):
    try:
        movie_id = pop_movie_id_list.pop()
        movie_id_str = str(movie_id)
    # Make a request for a the full movie details
    # Execute "GET" request with url  
        details_url ='https://api.themoviedb.org/3/movie/'+movie_id_str+tmdb_key_string
        #print(details_url)
        details_results = requests.get(details_url).json()
        tmdb_movies_list_details.append(details_results)
        #print(details_results)
         # Check if we need to sleep 
        # Add 1 to the request counter
        request_counter += 1
        #print(f"Request Counter: {request_counter%50}")
        if request_counter%50 == 0:
           print('sleeping')
        # Add a 12 second interval between queries to stay within API query limits
           time.sleep(12)
        # Use the except clause to print out a statement if a movie
        # is not found.
    except:
        print(f"Full movie details not found for id: {movie_id_list[x]} for {title}")
        continue
print(f'finished {total_pop} details for movies')
for details_results in tmdb_movies_list_details:
    # Extract the genre names into a list
    genre_list.append(details_results['genres'])
    # Extract the spoken_languages' English name into a list
    spoken_languages_list.append(details_results['spoken_languages'])   
    # Extract the production_countries' name into a list
    production_countries_list.append(details_results['production_countries'])
#print(genre_list)
#print(spoken_languages_list)
#print(production_countries_list)    
for genre in genre_list:
    for item in genre:
        if(item['name'] not in unique_genre_names):
            unique_genre_names.append(item['name'])
print(f'unique_genre_names = {unique_genre_names}')
for spoken_language in spoken_languages_list:
    for item in spoken_language:
        if(item['english_name'] not in unique_spoken_languages_names):
            unique_spoken_languages_names.append(item['english_name'])
print(f'unique_spoken_languages_names = {unique_spoken_languages_names}')
for production_countries in production_countries_list:
    for item in production_countries:
        if(item['name'] not in unique_production_countries_names):
            unique_production_countries_names.append(item['name'])
print(f'unique_production_countries_names = {unique_production_countries_names}')
#print(json.dumps(details_results, indent=4))
# Add the relevant data to a dictionary and
details_dictionary["unique_genre_names"] = unique_genre_names
details_dictionary["unique_spoken_languages_names"] = unique_spoken_languages_names
details_dictionary["unique_production_countries_names"] = unique_production_countries_names
#print(f'details_dictonary = {details_dictionary}')
# append it to the tmdb_movies_list list
tmdb_movies_list.append(details_dictionary) 
#print(tmdb_movies_list)

        
       



Found 1 result(s) for Hard Luck Love Song
Found id: 536208 for Hard Luck Love Song
Found 1 result(s) for Falling Inn Love
Found id: 623195 for Falling Inn Love
Found 2 result(s) for Cyrano, My Love
Found id: 544510 for Cyrano, My Love
Found id: 445179 for Cyrano, My Love
Found 1 result(s) for Five Feet Apart
Found id: 527641 for Five Feet Apart
Found 3 result(s) for I Love My Dad
Found id: 876825 for I Love My Dad
Found id: 1152894 for I Love My Dad
Found id: 1239290 for I Love My Dad
Found 1 result(s) for Asako I & II
Found id: 487850 for Asako I & II
Found 3 result(s) for Ammonite
Found id: 568467 for Ammonite
Found id: 1116655 for Ammonite
Found id: 392170 for Ammonite
Found 3 result(s) for About Fate
Found id: 828613 for About Fate
Found id: 919068 for About Fate
Found id: 41187 for About Fate
Found 1 result(s) for Love Wedding Repeat
Found id: 624808 for Love Wedding Repeat
Found 2 result(s) for God’s Own Country
Found id: 428493 for God’s Own Country
Found id: 269820 for God’s Ow

In [117]:

# Use json.dumps with argument indent=4 to format data

print(json.dumps(tmdb_movies_list, indent=4))


[
    {
        "page": 1,
        "results": [
            {
                "adult": false,
                "backdrop_path": "/l17qkQS5Goz1asb2Zs5LsopGIK3.jpg",
                "genre_ids": [
                    10749,
                    10402,
                    18,
                    53
                ],
                "id": 536208,
                "original_language": "en",
                "original_title": "Hard Luck Love Song",
                "overview": "Jesse, a charismatic but down on his luck troubadour, finds himself at an existential crossroads as bad choices catch up with him during an unexpected reunion with Carla, an old flame.",
                "popularity": 3.533,
                "poster_path": "/fWUbYGT0zTJoEZVyc2SvsenoHPF.jpg",
                "release_date": "2021-10-15",
                "title": "Hard Luck Love Song",
                "video": false,
                "vote_average": 4.9,
                "vote_count": 10
            }
        ],
        "total_

In [None]:
# Convert the results to a DataFrame

results_list_df = pd.DataFrame(tmdb_movies_list)
results_list_df

Unnamed: 0,page,results,total_pages,total_results
0,1,"[{'adult': False, 'backdrop_path': '/l17qkQS5G...",1,1
1,1,"[{'adult': False, 'backdrop_path': '/lVJdFZc8h...",1,1
2,1,"[{'adult': False, 'backdrop_path': '/dn7wTIud3...",1,2
3,1,"[{'adult': False, 'backdrop_path': '/27ZkYMWyn...",1,1
4,1,"[{'adult': False, 'backdrop_path': '/3X4t5Zzbd...",1,3
5,1,"[{'adult': False, 'backdrop_path': '/qDKmkuJhx...",1,1
6,1,"[{'adult': False, 'backdrop_path': '/DpPoSYKlf...",1,3
7,1,"[{'adult': False, 'backdrop_path': '/30COvn7g7...",1,3
8,1,"[{'adult': False, 'backdrop_path': '/ruxOWt2iC...",1,1
9,1,"[{'adult': False, 'backdrop_path': '/R2cpJHKCY...",1,2


### Merge and Clean the Data for Export

In [None]:
# Merge the New York Times reviews and TMDB DataFrames on title


In [None]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing


# Create a list of characters to remove


# Loop through the list of columns to fix

    # Convert the column to type 'str'


    # Loop through characters to remove


# Display the fixed DataFrame


In [None]:
# Drop "byline.person" column


In [None]:
# Delete duplicate rows and reset index


In [None]:
# Export data to CSV without the index
