In [7]:
import requests
import time
import pandas as pd
import json

**Part I. Data Collection**



1.1 Collection of New York Times articles with the word "anxiety" or "depression" on the headline

In [2]:
def collect_articles_NYT(word, output_file_name): 
    '''
    Interact with The New York Times API to collect articles' data. Restrict
    the search to those articles containing the given word in the headline.
    Lastly, generate a json file to store the data so it can be easily 
    retrieved later without needing to make requests every time. 

    Inputs: 
        word (str): word of interest for the query
        output_file_name (str): desired name for the json file to be created

    Outputs:
        json file
    '''

    #Basic details needed to interact with The New York Times API
    api_key = "deleted after collection of data"
    base_url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"

    articles = {}

    for year in range(2004,2025):

        year_data = []

        #Since the API only retrieves 10 results per query, we have to loop 
        #through different pages. The range 0-100 is set up since it is the
        #maximum number of pages that can be requested.
        for page in range(0,100):

            begindate = "{}0101".format(year)
            enddate = "{}1231".format(year)
            nyt_params = {"q": word, "fq": f'headline:{word}',
                        "begin_date": begindate, "end_date": enddate, 
                        "page": page, "api-key": api_key}
            
            response = requests.get(base_url, params= nyt_params)
            page_data = response.json()
            time.sleep(14)

            #After collecting all available data, subsequent pages retrieve
            #empty results. For that reason, we can break the loop when pages 
            #are no longer useful. It also avoids making unnecessary and 
            #time-consuming requests
        
            if len(page_data["response"]["docs"]) == 0:
                break

            year_data += page_data["response"]["docs"]

        articles[year] = year_data

    #Export the dictionary to a JSON file to save it locally to avoid needing
    #to make requests every time we work on the project. 
    with open(output_file_name, 'w') as json_file:
        json.dump(articles, json_file, indent=4)

In [3]:
#Call function to retrieve NYT articles related to anxiety and depression

#collect_articles_NYT("anxiety", "anxiety_articles_NYT.json")
#collect_articles_NYT("depresion", "depression_articles_NYT.json")

1.2 Collection of The Guardian articles with the word "anxiety" or "depression" on the headline

**Part I. Data Cleaning**

In [50]:
def create_df_NYT_articles(jsonfilepath): 
    '''
    Given a JSON file containing NYT's articles data, create a data frame that 
    contains useful information 

    Inputs: 
        jsonfilepath (str): name of the file preceeded by its path

    Outputs:
        DataFrame containing information of the articles (year published,
        specific date, headlines and abstracts)
    '''

    #Open json file as a python dictinary
    with open(jsonfilepath, "r") as json_file:
        dictdata = json.load(json_file)

    #Loop through each item in the dictionary to get the most relevant information
    #of the articles and store it in a dataframe

    cols = {"year" : [], "specific_date": [], 'headlines':[], 'abstracts':[]}

    for year_x in range(2004,2025):
        year_data = dictdata[str(year_x)]
        for index, article in enumerate(year_data):
            cols["year"].append(year_x)
            cols['specific_date'].append(article['pub_date'])
            cols['headlines'].append(article["headline"]["main"])
            cols['abstracts'].append(article['abstract'])

    df = pd.DataFrame(cols)
    return df



In [51]:
#Call function to create data frame for NYT's anxiety articles
anx_nyt_df = create_df_NYT_articles("./anxiety_articles_NYT.json")

#Create dataframe for NYT's depression articles
dep_nyt_df = create_df_NYT_articles("./depression_articles_NYT.json")

In [52]:
anx_nyt_df

Unnamed: 0,year,specific_date,headlines,abstracts
0,2004,2004-03-14T05:00:00+0000,High Anxiety,James Glanz article on safety precautions bein...
1,2004,2004-08-22T05:00:00+0000,'Status Anxiety',"""Money, fame and influence may be valued more ..."
2,2004,2004-12-06T05:00:00+0000,Anxiety for an American Family,"Interview with Carolyn Richard, Adairsville, G..."
3,2004,2004-09-05T05:00:00+0000,Short Stories: Anxiety Attacks,Jeff Turrentine reviews books The Secret Goldf...
4,2004,2004-02-29T05:00:00+0000,"High Velocity, Some Anxiety",New York Yankees pitcher Kevin Brown prepares ...
...,...,...,...,...
1110,2024,2024-01-06T11:00:02+0000,Canada’s Foreign Student Surge Prompts Changes...,"As international students flock to Canada, the..."
1111,2024,2024-01-13T10:01:58+0000,On the Ballot in Iowa: Fear. Anxiety. Hopeless...,"As Monday’s caucuses approach, voters casually..."
1112,2024,2024-02-03T10:00:24+0000,"Anxiety, Mood Swings and Sleepless Nights: Lif...","Pushed by an advocacy group, Arkansas became t..."
1113,2024,2024-01-29T16:25:56+0000,Teen Drug and Alcohol Use Linked to Mental Hea...,Substance use by adolescents may prove valuabl...


In [53]:
dep_nyt_df

Unnamed: 0,year,specific_date,headlines,abstracts
0,2004,2004-12-10T05:00:00+0000,Study Pursues a Genetic Link to Depression,Scientists at Duke University find genetic var...
1,2004,2004-12-17T05:00:00+0000,Study Suggests Way to Predict Whom Antidepress...,Harvard and UCLA researchers find that common ...
2,2004,2004-12-07T05:00:00+0000,Tracking Stress and Depression Back to the Womb,Studies indicate that stress and depression in...
3,2004,2004-09-12T05:00:00+0000,"Depression, a Frequent Visitor to Wall St.",Rate of chronic depression seems to be high am...
4,2004,2004-08-25T05:00:00+0000,New Therapy On Depression Finds Phone Is Effec...,Study reported in Journal of the American Medi...
...,...,...,...,...
711,2023,2023-08-22T07:06:10+0000,Harold Is Downgraded to Tropical Depression Af...,The storm unleashed heavy rainfall that caused...
712,2023,2023-07-13T18:07:54+0000,Shining a Light on Postpartum Depression,A psychiatrist and a new mother discuss the co...
713,2024,2024-01-10T23:30:06+0000,Women With Depression During or After Pregnanc...,Two studies concluded that depression that beg...
714,2024,2024-02-06T10:03:28+0000,A Rising Democratic Star Shares Her Mental Hea...,"Lina Hidalgo, who became Houston’s top executi..."
