In [1]:
import requests
import pandas as pd

In [13]:
API_KEY = "qrZUHabbbGHJWU95326BITrpe1ZX6SbC79MvFmbIKuEICM9l"

def get_nyt_articles(query, begin_date, end_date, page=0):
    url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"
    
    params = {
        "q": query,
        "begin_date": begin_date,  # format YYYYMMDD
        "end_date": end_date,
        "page": page,
        "api-key": API_KEY
    }
    
    response = requests.get(url, params=params)
    
    # ðŸ”Ž Debugging output
    print("Status Code:", response.status_code)
    print("Response JSON snippet:", response.json())
    
    data = response.json()
    
    if response.status_code != 200 or "response" not in data:
        print("Error or unexpected response")
        return pd.DataFrame()
    
    articles = []
    for doc in data["response"]["docs"]:
        articles.append({
            "headline": doc["headline"]["main"],
            "pub_date": doc["pub_date"],
            "snippet": doc.get("snippet", ""),
            "web_url": doc["web_url"]
        })
    
    return pd.DataFrame(articles)

In [42]:
all_articles = pd.DataFrame()

for page in range(10):
    df = get_nyt_articles(
        query="economy",
        begin_date="20060101",
        end_date="20060220",
        page=page
    )
    all_articles = pd.concat([all_articles, df], ignore_index=True)

all_articles.head()

Status Code: 200
Response JSON snippet: {'status': 'OK', 'copyright': 'Copyright (c) 2026 The New York Times Company. All Rights Reserved.', 'response': {'docs': [{'abstract': 'Alan Greenspan left his successor both a wide berth to set his own policy and some major uncertainties about the future.', 'byline': {'original': 'By EDMUND L. ANDREWS'}, 'document_type': 'article', 'headline': {'main': 'Exit Greenspan, Amid Questions on Economy', 'kicker': '', 'print_headline': ''}, '_id': 'nyt://article/4de832f3-1181-5aee-adfe-5e3fb5337e89', 'keywords': [], 'multimedia': {'caption': '', 'credit': '', 'default': {'url': '', 'height': 0, 'width': 0}, 'thumbnail': {'url': '', 'height': 0, 'width': 0}}, 'news_desk': 'Business', 'print_page': '', 'print_section': '', 'pub_date': '2006-02-01T05:00:00Z', 'section_name': 'Business', 'snippet': 'Alan Greenspan left his successor both a wide berth to set his own policy and some major uncertainties about the future.', 'source': 'The New York Times', 'sub

Unnamed: 0,headline,pub_date,snippet,web_url
0,"Exit Greenspan, Amid Questions on Economy",2006-02-01T05:00:00Z,Alan Greenspan left his successor both a wide ...,https://www.nytimes.com/2006/02/01/business/01...
1,Chinese Economy Becomes 4th Largest in the World,2006-01-25T05:00:00Z,"The Chinese statistics, showing a national eco...",https://www.nytimes.com/2006/01/25/business/wo...
2,Bush Cites 2 Million New Jobs in 2005 and Heal...,2006-01-07T05:00:00Z,"The United States generated about 200,000 new ...",https://www.nytimes.com/2006/01/07/politics/bu...
3,U.S. Economy Slowed Sharply at End of 2005,2006-01-28T05:00:00Z,Economic growth weakened unexpectedly in the f...,https://www.nytimes.com/2006/01/28/business/us...
4,Chinese Economy Grows to 4th Largest in the World,2006-01-25T05:00:00Z,"The Chinese statistics, showing a national eco...",https://www.nytimes.com/2006/01/25/business/wo...


In [43]:
all_articles.shape

(50, 4)

In [44]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):
    return analyzer.polarity_scores(text)["compound"]

all_articles["sentiment"] = all_articles["snippet"].apply(get_sentiment)

In [45]:
all_articles["pub_date"] = pd.to_datetime(all_articles["pub_date"])
all_articles["day"] = all_articles["pub_date"].dt.date

daily_sentiment = (
    all_articles
    .groupby("day")["sentiment"]
    .mean()
    .reset_index()
)

daily_sentiment.head()

Unnamed: 0,day,sentiment
0,2006-01-01,0.6808
1,2006-01-03,0.0258
2,2006-01-04,0.0258
3,2006-01-06,-0.113733
4,2006-01-07,0.2294


In [46]:
daily_sentiment.shape

(27, 2)

In [47]:
all_articles.shape

(50, 6)