In [2]:
from dotenv import load_dotenv
import os

# Looking for .env file and loading it
load_dotenv() 

nyt_api = os.getenv("NYT_ID")
guardian_api =  os.getenv("GUARDIAN_ID")

# News Collection
Pull today's headline from the NYT API

In [22]:
import requests

URL = f'https://api.nytimes.com/svc/topstories/v2/home.json?api-key={nyt_api}'

response = requests.get(URL)

nyt_articles = []


if response.status_code == 200:
    data = response.json()
    if data['results']:
        # Assume the first article is the main front-page article
        for article in data['results']:
            nyt_articles.append(article['title'] + ": " + article['abstract'])
    else:
        print("No articles found.")
else:
    print(f"Error: {response.status_code}, {response.text}")
    
# curr_article = nyt_articles[0]
# print("Top NYT article:", curr_article)


In [40]:
from datetime import datetime


# Parameters
SECTION = 'us-news'
DATE = datetime.now().strftime('%Y-%m-%d')
URL = 'https://content.guardianapis.com/search'
NUM_ARTICLES = 8

params = {
    'section': SECTION,
    'from-date': DATE,
    'to-date': DATE,
    'order-by': 'newest',
    'page-size': NUM_ARTICLES,
    'show-fields': 'trailText',
    'api-key': guardian_api
}

guardian_articles = []

try:
    response = requests.get(URL, params=params)
    response.raise_for_status()
    data = response.json()

    if data.get('response', {}).get('status') == 'ok' and data['response']['results']:
        articles = data['response']['results']
        for idx, article in enumerate(articles, start=1):
            title = article.get('webTitle', 'No Title')
            abstract = article.get('fields', {}).get('trailText', 'No Abstract')
            guardian_articles.append(title + ": " + abstract)
    else:
        print("No articles found for today.")

except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")

curr_article = guardian_articles[4]
print("Top Guardian article:", curr_article)

Top Guardian article: Woman in Florida deported to Cuba says she was forced to leave baby daughter: Heidy Sánchez says she was told her 17-month-old, who has health problems and is breastfeeding, couldn’t go with her


# NLP Analysis of News
Calculate sentiment of news articles

In [8]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

model_name = "SamLowe/roberta-base-go_emotions"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def get_emotions(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        logits = model(**inputs).logits
    probs = F.softmax(logits, dim=1)[0]
    labels = model.config.id2label
    return {labels[i]: float(probs[i]) for i in range(len(probs))}

  from .autonotebook import tqdm as notebook_tqdm


In [41]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Setup NLTK
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')

# Disable TensorFlow in HuggingFace Transformers (optional for PyTorch-only)
os.environ["TRANSFORMERS_NO_TF"] = "1"

# Stopwords and Sentiment
stop_words = set(stopwords.words("english"))
sentiment_analyzer = SentimentIntensityAnalyzer()

# --- NYT Headline ---
tokenized_nyt = word_tokenize(curr_article)
filtered_nyt = [word.lower() for word in tokenized_nyt if word.isalnum() and word.lower() not in stop_words]

nyt_text = ' '.join(filtered_nyt)

# VADER Sentiment
news_sentiment_nyt = sentiment_analyzer.polarity_scores(nyt_text)
print("NYT Headline Sentiment:", news_sentiment_nyt)

# Emotion Detection
emotions_nyt = get_emotions(curr_article)  # Truncate if needed
print("NYT Headline Emotions:", emotions_nyt)


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/maggiehollis/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/maggiehollis/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/maggiehollis/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


NYT Headline Sentiment: {'neg': 0.33, 'neu': 0.67, 'pos': 0.0, 'compound': -0.7096}
NYT Headline Emotions: {'admiration': 9.457051783101633e-05, 'amusement': 0.000182974457857199, 'anger': 0.0004586175491567701, 'annoyance': 0.0014701030449941754, 'approval': 0.0009689170401543379, 'caring': 0.0002534352825023234, 'confusion': 0.00021465042664203793, 'curiosity': 0.00014806790568400174, 'desire': 0.0002997353149112314, 'disappointment': 0.0014162200968712568, 'disapproval': 0.0007111019222065806, 'disgust': 0.0006654822500422597, 'embarrassment': 0.00019676871306728572, 'excitement': 5.028406667406671e-05, 'fear': 0.00033870089100673795, 'gratitude': 5.10587306052912e-05, 'grief': 0.0002485248551238328, 'joy': 8.179308497346938e-05, 'love': 0.00011305588122922927, 'nervousness': 0.00010256931273033842, 'optimism': 0.00019025354413315654, 'pride': 2.6401559807709418e-05, 'realization': 0.0014604241587221622, 'relief': 6.225807737791911e-05, 'remorse': 0.0001954384642885998, 'sadness': 0

# Song Recs

In [42]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load csv
df = pd.read_csv("song_sentiments_emotions.csv")
df.columns = [col.strip().lower() for col in df.columns]
df_full = df.copy()

# Remove non-numeric columns
df_vectors = df.iloc[:, 2:]
df_matrix = df_vectors.apply(pd.to_numeric, errors='coerce').fillna(0.0).to_numpy()

# Define keys for emotions and sentiments
emotion_keys = [
    'admiration','amusement','anger','annoyance','approval','caring','confusion','curiosity',
    'desire','disappointment','disapproval','disgust','embarrassment','excitement','fear',
    'gratitude','grief','joy','love','nervousness','neutral','optimism','pride','realization',
    'relief','remorse','sadness','surprise'
]

sentiment_keys = ['negative', 'neutral', 'positive', 'compound']

vader_to_csv = {
    'negative': news_sentiment_nyt['neg'],
    'neutral': news_sentiment_nyt['neu'],
    'positive': news_sentiment_nyt['pos'],
    'compound': news_sentiment_nyt['compound']
}

# Create input vector
input_vector = [vader_to_csv[key] for key in sentiment_keys] + [
    emotions_nyt.get(key, 0.0) for key in emotion_keys
]

# Compute cosine similarity
similarities = cosine_similarity([input_vector], df_matrix)
best_index = np.argmax(similarities)
best_match = df_full.iloc[best_index]

# Output song rec
print("Best Matching Song:")
print("Artist:", best_match['artist'])
print("Title:", best_match['title'])


Best Matching Song:
Artist: Peter Kruder
Title: High Noon
