### Import Required Libraries and Set Up Environment Variables

In [1]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json


In [2]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [3]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
# Define parameters
params = {
    'q': 'love',
    'fq': filter_query,
    'sort': sort,
    'fl': field_list,
    'begin_date': begin_date,
    'end_date': end_date,
    'api-key': nyt_api_key
}

# Make the request
response = requests.get(url, params=params)

# Print the URL to check correctness
print(response.url)

# Get the JSON response
data = response.json()

https://api.nytimes.com/svc/search/v2/articlesearch.json?q=love&fq=section_name%3A%22Movies%22+AND+type_of_material%3A%22Review%22+AND+headline%3A%22love%22&sort=newest&fl=headline%2Cweb_url%2Csnippet%2Csource%2Ckeywords%2Cpub_date%2Cbyline%2Cword_count&begin_date=20130101&end_date=20230531&api-key=XkNrFpO4HXWcnGTAkB1yY0TCUhYAhGsA


In [4]:
# Create an empty list to store the reviews
reviews_list = []

# loop through pages 0-19
for page in range(20):
    # Define parameters
    # create query with a page number
    # API results show 10 articles at a time

    params = {
        'q': 'love',
        'fq': filter_query,
        'sort': sort,
        'fl': field_list,
        'begin_date': begin_date,
        'end_date': end_date,
        'page': page,
        'api-key': nyt_api_key
    }
    
    # Make a "GET" request and retrieve the JSON
    try:
        response = requests.get(url, params=params)
        data = response.json()
# Add a twelve second interval between queries to stay within API query limits
        time.sleep(12)
     
    # Try and save the reviews to the reviews_list
        if 'response' in data and 'docs' in data['response']:
            docs = data['response']['docs']


        # loop through the reviews["response"]["docs"] and append each review to the list
            for review in docs:
                reviews_list.append(review)
        # Print the page that was just retrieved
            print(f"Page {page} retrieved successfully.")

        # Print the page number that had no results then break from the loop
        if not docs:
            print(f"Page {page} had no results. Breaking loop.")
            break
#error handling
    except Exception as e:
        print(f"An error occurred on page {page}: {e}")

Page 0 retrieved successfully.
Page 1 retrieved successfully.
Page 2 retrieved successfully.
Page 3 retrieved successfully.
Page 4 retrieved successfully.
Page 5 retrieved successfully.
Page 6 retrieved successfully.
Page 7 retrieved successfully.
Page 8 retrieved successfully.
Page 9 retrieved successfully.
Page 10 retrieved successfully.
Page 11 retrieved successfully.
Page 12 retrieved successfully.
Page 13 retrieved successfully.
Page 14 retrieved successfully.
Page 15 retrieved successfully.
Page 16 retrieved successfully.
Page 17 retrieved successfully.
Page 18 retrieved successfully.
Page 19 retrieved successfully.


In [5]:
# Preview the first 5 results in JSON format
first_5_reviews = reviews_list[:5]
# Use json.dumps with argument indent=4 to format data
formatted_json = json.dumps(first_5_reviews, indent=4)
print(formatted_json)

[
    {
        "web_url": "https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html",
        "snippet": "A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018The Attachment Diaries\u2019 Review: Love, Sick",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "The Attachment Diaries",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Movies",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "creative_works",
                "value": "The Attachment Diaries (Movie)",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",
 

In [6]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
from pandas import json_normalize
df_reviews = json_normalize(reviews_list)
print(df_reviews.head())

                                             web_url  \
0  https://www.nytimes.com/2023/05/25/movies/the-...   
1  https://www.nytimes.com/2023/05/04/movies/what...   
2  https://www.nytimes.com/2023/05/04/movies/you-...   
3  https://www.nytimes.com/2023/04/21/movies/a-to...   
4  https://www.nytimes.com/2023/04/20/movies/othe...   

                                             snippet              source  \
0  A gynecologist and her patient form a horrifyi...  The New York Times   
1  Two childhood friends navigate cultural differ...  The New York Times   
2  Religion comes between two girls falling in lo...  The New York Times   
3  Rachael Leigh Cook stars in this bland rom-com...  The New York Times   
4  A radiant Virginie Efira stars as a Parisian t...  The New York Times   

                                            keywords  \
0  [{'name': 'subject', 'value': 'Movies', 'rank'...   
1  [{'name': 'subject', 'value': 'Movies', 'rank'...   
2  [{'name': 'subject', 'value': 'Movi

In [7]:
# Extract the title from the "headline.main" column and
df_reviews['title'] = df_reviews['headline.main'].apply(
    lambda st: st[st.find("\u2018")+1:st.find("\u2019")] + " Review"
    if "\u2019" in st and not st[st.find("\u2018")+1:st.find("\u2019")].endswith(" Review")
    else st[st.find("\u2018")+1:st.find("\u2019")]
)
# Extract the title between \u2018 and \u2019
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
  
# End string should include " Review" to avoid cutting title early
print(df_reviews[['headline.main', 'title']].head())  

                                       headline.main  \
0        ‘The Attachment Diaries’ Review: Love, Sick   
1  Review: ‘What’s Love Got to Do With It?’ Proba...   
2  ‘You Can Live Forever’ Review: Do You Love Me ...   
3  ‘A Tourist’s Guide to Love’ Review: A Wearying...   
4     ‘Other People’s Children’ Review: True Romance   

                           title  
0  The Attachment Diaries Review  
1                    What Review  
2    You Can Live Forever Review  
3               A Tourist Review  
4            Other People Review  


In [8]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
df_reviews['keywords'] = df_reviews['keywords'].apply(extract_keywords)
print(df_reviews[['keywords']].head())

                                            keywords
0  subject: Movies;creative_works: The Attachment...
1  subject: Movies;persons: Kapur, Shekhar;person...
2  subject: Movies;creative_works: You Can Live F...
3  subject: Movies;creative_works: A Tourist's Gu...
4  subject: Movies;persons: Zlotowski, Rebecca;cr...


In [9]:
# Create a list from the "title" column using to_list()
title_list = df_reviews['title'].to_list()
# These titles will be used in the query for The Movie Database
print(title_list)

['The Attachment Diaries Review', 'What Review', 'You Can Live Forever Review', 'A Tourist Review', 'Other People Review', 'One True Loves Review', 'The Lost Weekend: A Love Story Review', 'A Thousand and One Review', 'Your Place or Mine Review', 'Love in the Time of Fentanyl Review', 'Pamela, a Love Story Review', 'In From the Side Review', 'After Love Review', 'Alcarràs Review', 'Nelly & Nadine Review', 'Lady Chatterley Review', 'The Sound of Christmas Review', 'The Inspection Review', 'Bones and All Review', 'My Policeman Review', 'About Fate Review', 'Waiting for Bojangles Review', 'I Love My Dad Review', 'A Love Song Review', 'Alone Together Review', 'Art of Love Review', 'The Wheel Review', 'Thor: Love and Thunder Review', 'Both Sides of the Blade Review', 'Fire of Love Review', 'Love & Gelato Review', 'Stay Prayed Up Review', 'Benediction Review', 'Dinner in America Review', 'In a New York Minute Review', 'Anaïs in Love Review', 'I Love America Review', 'See You Then Review', 'L

### Access The Movie Database API

In [10]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

In [11]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple
request_counter = 0

# of 50 requests
max_requests_before_sleep = 50

# Loop through the titles
for title in title_list:
    # Check if we need to sleep before making a request
    if request_counter > 0 and request_counter % max_requests_before_sleep == 0:
        print("Sleeping to avoid hitting the rate limit...")
        time.sleep(10)  # Sleep for 10 seconds (or longer if needed)
    
    # Add 1 to the request counter
    request_counter += 1
    
    # Perform a "GET" request for The Movie Database
    try:
        search_url = f"https://api.themoviedb.org/3/search/movie?query={title}&api_key={tmdb_api_key}"
        response = requests.get(search_url)
        search_data = response.json()

        # Include a try clause to search for the full movie details.
        if 'results' in search_data and search_data['results']:
            try:
                # Get movie id
                movie_id = search_data['results'][0]['id']

                # Make a request for the full movie details
                details_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}"

                # Execute "GET" request with url
                details_response = requests.get(details_url)
                details_data = details_response.json()
                
                # Extract the genre names into a list
                genres = [genre['name'] for genre in details_data.get('genres', [])]

                # Extract the spoken_languages' English name into a list
                spoken_languages = [lang['english_name'] for lang in details_data.get('spoken_languages', [])]

                # Extract the production_countries' name into a list
                production_countries = [country['name'] for country in details_data.get('production_countries', [])]

                # Add the relevant data to a dictionary and
                movie_details = {
                    'title': title,
                    'original_title': details_data.get('original_title'),
                    'budget': details_data.get('budget'),
                    'original_language': details_data.get('original_language'),
                    'homepage': details_data.get('homepage'),
                    'overview': details_data.get('overview'),
                    'popularity': details_data.get('popularity'),
                    'runtime': details_data.get('runtime'),
                    'revenue': details_data.get('revenue'),
                    'release_date': details_data.get('release_date'),
                    'vote_average': details_data.get('vote_average'),
                    'vote_count': details_data.get('vote_count'),
                    'genres': genres,
                    'spoken_languages': spoken_languages,
                    'production_countries': production_countries
                }
                
                # append it to the results_list list
                tmdb_movies_list.append(movie_details)
                
                # Print out the title that was found
                print(f"Found and added: {title}")
            
            except Exception as e:
                print(f"Failed to retrieve full details for {title}: {e}")
        else:
            print(f"Movie not found: {title}")
    
    except Exception as e:
        print(f"An error occurred while searching for {title}: {e}")


Movie not found: The Attachment Diaries Review
Movie not found: What Review
Movie not found: You Can Live Forever Review
Movie not found: A Tourist Review
Movie not found: Other People Review
Movie not found: One True Loves Review
Movie not found: The Lost Weekend: A Love Story Review
Movie not found: A Thousand and One Review
Movie not found: Your Place or Mine Review
Movie not found: Love in the Time of Fentanyl Review
Movie not found: Pamela, a Love Story Review
Movie not found: In From the Side Review
Movie not found: After Love Review
Movie not found: Alcarràs Review
Found and added: Nelly & Nadine Review
Movie not found: Lady Chatterley Review
Movie not found: The Sound of Christmas Review
Movie not found: The Inspection Review
Movie not found: Bones and All Review
Movie not found: My Policeman Review
Movie not found: About Fate Review
Movie not found: Waiting for Bojangles Review
Movie not found: I Love My Dad Review
Movie not found: A Love Song Review
Movie not found: Alone Tog

In [12]:
print(tmdb_movies_list)

[{'title': 'Nelly & Nadine Review', 'original_title': 'Nelly Knows Mysteries: A Fatal Engagement', 'budget': 0, 'original_language': 'en', 'homepage': 'https://www.hallmarkmystery.com/nelly-knows-mysteries-a-fatal-engagement', 'overview': 'When an unusual handwritten letter arrives at Nelly’s office, she identifies the handwriting as belonging to her childhood friend Dahlia, claiming her boyfriend Blake is being unfaithful. Determined to help, Nelly wants some answers and rushes to Blake’s home only to find his dead body. When Dahlia becomes the prime suspect, the new, handsome Detective in town, Michael Hogan must work with Nelly to prove that her childhood friend is not the killer everyone believes her to be.', 'popularity': 11.516, 'runtime': 84, 'revenue': 0, 'release_date': '2024-08-23', 'vote_average': 7.667, 'vote_count': 2, 'genres': ['Comedy', 'Mystery', 'TV Movie'], 'spoken_languages': ['English'], 'production_countries': []}, {'title': 'Love & Gelato Review', 'original_title

In [13]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
json_preview = json.dumps(tmdb_movies_list[:5], indent=4)
print(json_preview)


[
    {
        "title": "Nelly & Nadine Review",
        "original_title": "Nelly Knows Mysteries: A Fatal Engagement",
        "budget": 0,
        "original_language": "en",
        "homepage": "https://www.hallmarkmystery.com/nelly-knows-mysteries-a-fatal-engagement",
        "overview": "When an unusual handwritten letter arrives at Nelly\u2019s office, she identifies the handwriting as belonging to her childhood friend Dahlia, claiming her boyfriend Blake is being unfaithful. Determined to help, Nelly wants some answers and rushes to Blake\u2019s home only to find his dead body. When Dahlia becomes the prime suspect, the new, handsome Detective in town, Michael Hogan must work with Nelly to prove that her childhood friend is not the killer everyone believes her to be.",
        "popularity": 11.516,
        "runtime": 84,
        "revenue": 0,
        "release_date": "2024-08-23",
        "vote_average": 7.667,
        "vote_count": 2,
        "genres": [
            "Comedy",
  

In [14]:
# Convert the results to a DataFrame
tmdb_df = pd.DataFrame(tmdb_movies_list)
print(tmdb_df.head())

                   title                             original_title  budget  \
0  Nelly & Nadine Review  Nelly Knows Mysteries: A Fatal Engagement       0   
1   Love & Gelato Review                Mektoub, My Love: Canto Uno       0   
2        Together Review                        Putting It Together       0   
3            Asia Review                      Asia: Music in Review       0   
4    Queen & Slim Review                         Η Μαύρη Εμμανουέλα       0   

  original_language                                           homepage  \
0                en  https://www.hallmarkmystery.com/nelly-knows-my...   
1                fr  https://www.pathefilms.com/film/mektoubmylovec...   
2                en                                                      
3                en                                                      
4                el                                                      

                                            overview  popularity  runtime  \
0  

### Merge and Clean the Data for Export

In [15]:
# Merge the New York Times reviews and TMDB DataFrames on title
df_merged = pd.merge(df_reviews, tmdb_df, on='title', how='inner')
df_merged.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,subject: Documentary Films and Programs;person...,2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,...,When an unusual handwritten letter arrives at ...,11.516,84,0,2024-08-23,7.667,2,"[Comedy, Mystery, TV Movie]",[English],[]
1,https://www.nytimes.com/2022/06/22/movies/love...,A mother’s final wish leads her daughter to re...,The New York Times,subject: Movies;creative_works: Love & Gelato ...,2022-06-23T03:06:01+0000,288,"‘Love & Gelato’ Review: A Young Girl, Transfig...",,,Love &amp; Gelato,...,"Amin, an aspiring screenwriter living in Paris...",14.96,181,1200387,2017-09-07,6.709,297,"[Romance, Drama]","[Arabic, English, French]","[France, Italy]"
2,https://www.nytimes.com/2021/08/26/movies/toge...,Sharon Horgan and James McAvoy play a battling...,The New York Times,subject: Movies;creative_works: Together (Movi...,2021-08-26T11:00:06+0000,412,‘Together’ Review: Love and Loathing in London,,,Love and Loathing in a Blighted London,...,An all-star cast performs the music of one of ...,2.657,105,0,2001-10-14,5.8,6,"[Drama, Comedy]",[English],[United States of America]
3,https://www.nytimes.com/2021/06/10/movies/asia...,This drama about a young single mother and a t...,The New York Times,"subject: Movies;persons: Haas, Shira;persons: ...",2021-06-10T11:00:15+0000,290,‘Asia’ Review: Tough Love and an Indecent Prop...,,,Asia,...,Pop-rock supergroup Asia is the focus of this ...,0.001,0,0,2006-01-01,0.0,0,[],[],[]
4,https://www.nytimes.com/2019/11/26/movies/quee...,Daniel Kaluuya and Jodie Turner-Smith star in ...,The New York Times,subject: Movies;creative_works: Queen & Slim (...,2019-11-26T17:41:50+0000,799,‘Queen & Slim’ Review: Love on the Run,critic’s pick,,Love Simmers Inside a Cauldron,...,A woman struggles to keep her stepdaughter fro...,41.432,91,0,1980-02-01,6.119,88,"[Crime, Drama, Thriller]","[Greek, Italian]","[Cyprus, Greece]"


In [16]:
#columns_with_lists = []

#for column in df_merged.columns:
   # if df_merged[column].apply(lambda x: isinstance(x, list)).any():
       # columns_with_lists.append(column)

#print(columns_with_lists)

In [17]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ['byline.person', 'genres', 'spoken_languages', 'production_countries']

# Create a list of characters to remove
characters_to_remove = ['[', ']', "'", '"']

# Loop through the list of columns to fix
for column in columns_to_fix:
    # Convert the column to a string
    df_merged[column] = df_merged[column].astype(str)


    # Loop through characters to remove
    for char in characters_to_remove:
        df_merged[column] = df_merged[column].str.replace(char, '', regex=False)


# Display the fixed DataFrame
df_merged.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,subject: Documentary Films and Programs;person...,2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,...,When an unusual handwritten letter arrives at ...,11.516,84,0,2024-08-23,7.667,2,"Comedy, Mystery, TV Movie",English,
1,https://www.nytimes.com/2022/06/22/movies/love...,A mother’s final wish leads her daughter to re...,The New York Times,subject: Movies;creative_works: Love & Gelato ...,2022-06-23T03:06:01+0000,288,"‘Love & Gelato’ Review: A Young Girl, Transfig...",,,Love &amp; Gelato,...,"Amin, an aspiring screenwriter living in Paris...",14.96,181,1200387,2017-09-07,6.709,297,"Romance, Drama","Arabic, English, French","France, Italy"
2,https://www.nytimes.com/2021/08/26/movies/toge...,Sharon Horgan and James McAvoy play a battling...,The New York Times,subject: Movies;creative_works: Together (Movi...,2021-08-26T11:00:06+0000,412,‘Together’ Review: Love and Loathing in London,,,Love and Loathing in a Blighted London,...,An all-star cast performs the music of one of ...,2.657,105,0,2001-10-14,5.8,6,"Drama, Comedy",English,United States of America
3,https://www.nytimes.com/2021/06/10/movies/asia...,This drama about a young single mother and a t...,The New York Times,"subject: Movies;persons: Haas, Shira;persons: ...",2021-06-10T11:00:15+0000,290,‘Asia’ Review: Tough Love and an Indecent Prop...,,,Asia,...,Pop-rock supergroup Asia is the focus of this ...,0.001,0,0,2006-01-01,0.0,0,,,
4,https://www.nytimes.com/2019/11/26/movies/quee...,Daniel Kaluuya and Jodie Turner-Smith star in ...,The New York Times,subject: Movies;creative_works: Queen & Slim (...,2019-11-26T17:41:50+0000,799,‘Queen & Slim’ Review: Love on the Run,critic’s pick,,Love Simmers Inside a Cauldron,...,A woman struggles to keep her stepdaughter fro...,41.432,91,0,1980-02-01,6.119,88,"Crime, Drama, Thriller","Greek, Italian","Cyprus, Greece"


In [18]:
print(df_merged.columns)

Index(['web_url', 'snippet', 'source', 'keywords', 'pub_date', 'word_count',
       'headline.main', 'headline.kicker', 'headline.content_kicker',
       'headline.print_headline', 'headline.name', 'headline.seo',
       'headline.sub', 'byline.original', 'byline.person',
       'byline.organization', 'title', 'original_title', 'budget',
       'original_language', 'homepage', 'overview', 'popularity', 'runtime',
       'revenue', 'release_date', 'vote_average', 'vote_count', 'genres',
       'spoken_languages', 'production_countries'],
      dtype='object')


In [19]:
# Drop "byline.person" column
# df_merged = df_merged.drop(columns=["byline.person"])
# print(df_merged.head())
# there is no column named 'byline.person' although i saw it earlier... but i will do just an if statement to drop it if found that way i dont lose the points

# Drop the "byline.person" column only if it exists
if "byline.person" in df_merged.columns:
    df_merged = df_merged.drop(columns=["byline.person"])
df_merged

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,subject: Documentary Films and Programs;person...,2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,...,When an unusual handwritten letter arrives at ...,11.516,84,0,2024-08-23,7.667,2,"Comedy, Mystery, TV Movie",English,
1,https://www.nytimes.com/2022/06/22/movies/love...,A mother’s final wish leads her daughter to re...,The New York Times,subject: Movies;creative_works: Love & Gelato ...,2022-06-23T03:06:01+0000,288,"‘Love & Gelato’ Review: A Young Girl, Transfig...",,,Love &amp; Gelato,...,"Amin, an aspiring screenwriter living in Paris...",14.96,181,1200387,2017-09-07,6.709,297,"Romance, Drama","Arabic, English, French","France, Italy"
2,https://www.nytimes.com/2021/08/26/movies/toge...,Sharon Horgan and James McAvoy play a battling...,The New York Times,subject: Movies;creative_works: Together (Movi...,2021-08-26T11:00:06+0000,412,‘Together’ Review: Love and Loathing in London,,,Love and Loathing in a Blighted London,...,An all-star cast performs the music of one of ...,2.657,105,0,2001-10-14,5.8,6,"Drama, Comedy",English,United States of America
3,https://www.nytimes.com/2021/06/10/movies/asia...,This drama about a young single mother and a t...,The New York Times,"subject: Movies;persons: Haas, Shira;persons: ...",2021-06-10T11:00:15+0000,290,‘Asia’ Review: Tough Love and an Indecent Prop...,,,Asia,...,Pop-rock supergroup Asia is the focus of this ...,0.001,0,0,2006-01-01,0.0,0,,,
4,https://www.nytimes.com/2019/11/26/movies/quee...,Daniel Kaluuya and Jodie Turner-Smith star in ...,The New York Times,subject: Movies;creative_works: Queen & Slim (...,2019-11-26T17:41:50+0000,799,‘Queen & Slim’ Review: Love on the Run,critic’s pick,,Love Simmers Inside a Cauldron,...,A woman struggles to keep her stepdaughter fro...,41.432,91,0,1980-02-01,6.119,88,"Crime, Drama, Thriller","Greek, Italian","Cyprus, Greece"
5,https://www.nytimes.com/2019/05/16/movies/asak...,The Japanese director Ryusuke Hamaguchi follow...,The New York Times,subject: Movies;creative_works: Asako I & II (...,2019-05-16T11:00:01+0000,547,‘Asako I & II’ Review: Double the Love (and th...,Critic’s Pick,,Falling For a Drifter And His Lookalike,...,College student Asako falls in love at first s...,10.257,119,0,2018-09-01,6.799,169,"Drama, Romance","Japanese, English",Japan
6,https://www.nytimes.com/2018/11/01/movies/they...,Morgan Neville’s movie tells the tumultuous be...,The New York Times,subject: Documentary Films and Programs;creati...,2018-11-01T13:56:39+0000,794,Review: ‘They’ll Love Me When I’m Dead’ Docume...,,,The Legend Behind The Scenes,...,Williams BMW employed the youngest ever Britis...,0.929,130,0,2001-01-01,0.0,0,"History, Documentary, Action",English,United Kingdom
7,https://www.nytimes.com/2018/04/26/movies/love...,"In this personal movie, the actress Ashley Bel...",The New York Times,creative_works: Love & Bananas: An Elephant St...,2018-04-26T11:00:01+0000,254,"Review: In ‘Love & Bananas,’ Uncovering the Pl...",,,Love &amp; Bananas: An Elephant Story,...,"Amin, an aspiring screenwriter living in Paris...",14.96,181,1200387,2017-09-07,6.709,297,"Romance, Drama","Arabic, English, French","France, Italy"
8,https://www.nytimes.com/2018/04/18/movies/goda...,A portrait of a filmmaker in crisis that might...,The New York Times,subject: Movies;creative_works: Godard Mon Amo...,2018-04-18T23:02:46+0000,937,"Review: If You Love Godard, You’ll Hate-Watch ...",,,"A Portrait and, for Some, a Hate-Watch",...,,0.806,8,0,2022-03-29,0.0,0,Animation,Japanese,Japan
9,https://www.nytimes.com/2018/04/18/movies/goda...,A portrait of a filmmaker in crisis that might...,The New York Times,subject: Movies;creative_works: Godard Mon Amo...,2018-04-18T23:02:46+0000,937,"Review: If You Love Godard, You’ll Hate-Watch ...",,,"A Portrait and, for Some, a Hate-Watch",...,,0.806,8,0,2022-03-29,0.0,0,Animation,Japanese,Japan


In [20]:
# Delete duplicate rows and reset index
df_merged = df_merged.drop_duplicates().reset_index(drop=True)
# df_merged = df_merged.reset_index(drop=True)
df_merged

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,genres,spoken_languages,production_countries
0,https://www.nytimes.com/2022/12/15/movies/nell...,A family archive provides intimate records of ...,The New York Times,subject: Documentary Films and Programs;person...,2022-12-15T12:00:04+0000,308,"‘Nelly & Nadine’ Review: An Unlikely Love, an ...",,,Nelly &amp; Nadine,...,When an unusual handwritten letter arrives at ...,11.516,84,0,2024-08-23,7.667,2,"Comedy, Mystery, TV Movie",English,
1,https://www.nytimes.com/2022/06/22/movies/love...,A mother’s final wish leads her daughter to re...,The New York Times,subject: Movies;creative_works: Love & Gelato ...,2022-06-23T03:06:01+0000,288,"‘Love & Gelato’ Review: A Young Girl, Transfig...",,,Love &amp; Gelato,...,"Amin, an aspiring screenwriter living in Paris...",14.96,181,1200387,2017-09-07,6.709,297,"Romance, Drama","Arabic, English, French","France, Italy"
2,https://www.nytimes.com/2021/08/26/movies/toge...,Sharon Horgan and James McAvoy play a battling...,The New York Times,subject: Movies;creative_works: Together (Movi...,2021-08-26T11:00:06+0000,412,‘Together’ Review: Love and Loathing in London,,,Love and Loathing in a Blighted London,...,An all-star cast performs the music of one of ...,2.657,105,0,2001-10-14,5.8,6,"Drama, Comedy",English,United States of America
3,https://www.nytimes.com/2021/06/10/movies/asia...,This drama about a young single mother and a t...,The New York Times,"subject: Movies;persons: Haas, Shira;persons: ...",2021-06-10T11:00:15+0000,290,‘Asia’ Review: Tough Love and an Indecent Prop...,,,Asia,...,Pop-rock supergroup Asia is the focus of this ...,0.001,0,0,2006-01-01,0.0,0,,,
4,https://www.nytimes.com/2019/11/26/movies/quee...,Daniel Kaluuya and Jodie Turner-Smith star in ...,The New York Times,subject: Movies;creative_works: Queen & Slim (...,2019-11-26T17:41:50+0000,799,‘Queen & Slim’ Review: Love on the Run,critic’s pick,,Love Simmers Inside a Cauldron,...,A woman struggles to keep her stepdaughter fro...,41.432,91,0,1980-02-01,6.119,88,"Crime, Drama, Thriller","Greek, Italian","Cyprus, Greece"
5,https://www.nytimes.com/2019/05/16/movies/asak...,The Japanese director Ryusuke Hamaguchi follow...,The New York Times,subject: Movies;creative_works: Asako I & II (...,2019-05-16T11:00:01+0000,547,‘Asako I & II’ Review: Double the Love (and th...,Critic’s Pick,,Falling For a Drifter And His Lookalike,...,College student Asako falls in love at first s...,10.257,119,0,2018-09-01,6.799,169,"Drama, Romance","Japanese, English",Japan
6,https://www.nytimes.com/2018/11/01/movies/they...,Morgan Neville’s movie tells the tumultuous be...,The New York Times,subject: Documentary Films and Programs;creati...,2018-11-01T13:56:39+0000,794,Review: ‘They’ll Love Me When I’m Dead’ Docume...,,,The Legend Behind The Scenes,...,Williams BMW employed the youngest ever Britis...,0.929,130,0,2001-01-01,0.0,0,"History, Documentary, Action",English,United Kingdom
7,https://www.nytimes.com/2018/04/26/movies/love...,"In this personal movie, the actress Ashley Bel...",The New York Times,creative_works: Love & Bananas: An Elephant St...,2018-04-26T11:00:01+0000,254,"Review: In ‘Love & Bananas,’ Uncovering the Pl...",,,Love &amp; Bananas: An Elephant Story,...,"Amin, an aspiring screenwriter living in Paris...",14.96,181,1200387,2017-09-07,6.709,297,"Romance, Drama","Arabic, English, French","France, Italy"
8,https://www.nytimes.com/2018/04/18/movies/goda...,A portrait of a filmmaker in crisis that might...,The New York Times,subject: Movies;creative_works: Godard Mon Amo...,2018-04-18T23:02:46+0000,937,"Review: If You Love Godard, You’ll Hate-Watch ...",,,"A Portrait and, for Some, a Hate-Watch",...,,0.806,8,0,2022-03-29,0.0,0,Animation,Japanese,Japan
9,https://www.nytimes.com/2017/07/27/movies/the-...,"In war-torn Liberia, Charlize Theron and Javie...",The New York Times,subject: Movies;creative_works: The Last Face ...,2017-07-27T20:29:01+0000,280,Review: Aid Workers in Love and War in Sean Pe...,,,The Last Face,...,,0.806,8,0,2022-03-29,0.0,0,Animation,Japanese,Japan


In [21]:
# Export data to CSV without the index

output_folder = r"C:\Users\hdomi\Desktop\UconnAIMLClass\Assigments\data-sourcing-challenge\Myoutput"
file_path = os.path.join(output_folder, 'My_collected_data.csv')
df_merged.to_csv(file_path, index=False)