In [1]:
from dotenv import load_dotenv
import os

# Looking for .env file and loading it
load_dotenv() 

nyt_api = os.getenv("NYT_ID")
guardian_api =  os.getenv("GUARDIAN_ID")

# News Collection
Pull today's headline from the NYT API

In [None]:
import requests

URL = f'https://api.nytimes.com/svc/topstories/v2/home.json?api-key={nyt_api}'

response = requests.get(URL)

nyt_articles = []


if response.status_code == 200:
    data = response.json()
    if data['results']:
        # Assume the first article is the main front-page article
        for article in data['results']:
            nyt_articles.append(article['title'] + ": " + article['abstract'])
    else:
        print("No articles found.")
else:
    print(f"Error: {response.status_code}, {response.text}")
    
# curr_article = nyt_articles[6]
# print("Top NYT article:", curr_article)


Top NYT article: 100 Days of Solitude: Trump and the Retreat of America: President Trump’s approach to foreign policy in his second term has been transactional, unpredictable and exploitative. Allies and enemies alike are beginning to adapt.


In [56]:
from datetime import datetime


# Parameters
SECTION = 'us-news'
DATE = datetime.now().strftime('%Y-%m-%d')
URL = 'https://content.guardianapis.com/search'
NUM_ARTICLES = 8

params = {
    'section': SECTION,
    'from-date': DATE,
    'to-date': DATE,
    'order-by': 'newest',
    'page-size': NUM_ARTICLES,
    'show-fields': 'trailText',
    'api-key': guardian_api
}

guardian_articles = []

try:
    response = requests.get(URL, params=params)
    response.raise_for_status()
    data = response.json()

    if data.get('response', {}).get('status') == 'ok' and data['response']['results']:
        articles = data['response']['results']
        for idx, article in enumerate(articles, start=1):
            title = article.get('webTitle', 'No Title')
            abstract = article.get('fields', {}).get('trailText', 'No Abstract')
            guardian_articles.append(title + ": " + abstract)
    else:
        print("No articles found for today.")

except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")

curr_article = guardian_articles[1]
print("Top Guardian article:", curr_article)

Top Guardian article: White House opens inquiry into Chicago school program aimed at helping Black students: Education department says school program to improve Black academic performance violates 1964 Civil Rights Act


# NLP Analysis of News
Calculate sentiment of news articles

In [28]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

model_name = "SamLowe/roberta-base-go_emotions"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def get_emotions(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        logits = model(**inputs).logits
    probs = F.softmax(logits, dim=1)[0]
    labels = model.config.id2label
    return {labels[i]: float(probs[i]) for i in range(len(probs))}

In [57]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Setup NLTK
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')

# Disable TensorFlow in HuggingFace Transformers (optional for PyTorch-only)
os.environ["TRANSFORMERS_NO_TF"] = "1"

# Stopwords and Sentiment
stop_words = set(stopwords.words("english"))
sentiment_analyzer = SentimentIntensityAnalyzer()

# --- NYT Headline ---
tokenized_nyt = word_tokenize(curr_article)
filtered_nyt = [word.lower() for word in tokenized_nyt if word.isalnum() and word.lower() not in stop_words]

nyt_text = ' '.join(filtered_nyt)

# VADER Sentiment
news_sentiment_nyt = sentiment_analyzer.polarity_scores(nyt_text)
print("NYT Headline Sentiment:", news_sentiment_nyt)

# Emotion Detection
emotions_nyt = get_emotions(curr_article)  # Truncate if needed
print("NYT Headline Emotions:", emotions_nyt)


NYT Headline Sentiment: {'neg': 0.109, 'neu': 0.724, 'pos': 0.168, 'compound': 0.2023}
NYT Headline Emotions: {'admiration': 0.00013471378770191222, 'amusement': 0.00015377420641016215, 'anger': 0.0005468361196108162, 'annoyance': 0.004671675618737936, 'approval': 0.003305030521005392, 'caring': 0.00022645157878287137, 'confusion': 0.0004121381207369268, 'curiosity': 0.00045816448982805014, 'desire': 0.00018865620950236917, 'disappointment': 0.0018068073550239205, 'disapproval': 0.001724222325719893, 'disgust': 0.0006506206700578332, 'embarrassment': 0.0004710674111265689, 'excitement': 5.2985102229285985e-05, 'fear': 0.0001318247668677941, 'gratitude': 4.4513715693028644e-05, 'grief': 8.407147834077477e-05, 'joy': 5.984858944430016e-05, 'love': 7.615025242557749e-05, 'nervousness': 7.241293496917933e-05, 'optimism': 0.00022064350196160376, 'pride': 3.0131264793453738e-05, 'realization': 0.0022846886422485113, 'relief': 4.5873417548136786e-05, 'remorse': 0.00018521983292885125, 'sadnes

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/maggiehollis/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/maggiehollis/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/maggiehollis/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Song Recs

In [58]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# === Load and prepare CSV ===
df = pd.read_csv("song_sentiments_emotions.csv")

# Normalize column names
df.columns = [col.strip().lower() for col in df.columns]

# Create full DataFrame (with artist and title)
df_full = df.copy()

# Create vector DataFrame by dropping the first two columns (artist, title)
df_vectors = df.iloc[:, 2:]  # assumes first two are metadata

# Convert df_vectors to numeric matrix
df_matrix = df_vectors.apply(pd.to_numeric, errors='coerce').fillna(0.0).to_numpy()

# === Define emotion & sentiment keys (in order) ===
emotion_keys = [
    'admiration','amusement','anger','annoyance','approval','caring','confusion','curiosity',
    'desire','disappointment','disapproval','disgust','embarrassment','excitement','fear',
    'gratitude','grief','joy','love','nervousness','neutral','optimism','pride','realization',
    'relief','remorse','sadness','surprise'
]

sentiment_keys = ['negative', 'neutral', 'positive', 'compound']

# === Build input_vector from your VADER and emotion outputs ===
vader_to_csv = {
    'negative': news_sentiment_nyt['neg'],
    'neutral': news_sentiment_nyt['neu'],
    'positive': news_sentiment_nyt['pos'],
    'compound': news_sentiment_nyt['compound']
}

input_vector = [vader_to_csv[key] for key in sentiment_keys] + [
    emotions_nyt.get(key, 0.0) for key in emotion_keys
]

# === Compute cosine similarity ===
similarities = cosine_similarity([input_vector], df_matrix)
best_index = np.argmax(similarities)
best_match = df_full.iloc[best_index]

# === Output ===
print("Best Matching Song:")
print("Artist:", best_match['artist'])
print("Title:", best_match['title'])


Best Matching Song:
Artist: Black Label Society
Title: Fire It Up
