In [3]:
import os
from dotenv import load_dotenv
import re
import requests
import pandas as pd
import time
import lyricsgenius
import requests

In [4]:
load_dotenv()

GENIUS_API_KEY = os.getenv("GENIUS_API")
LASTFM_API_KEY = os.getenv("LASTFM_API")
LASTFM_URL = "http://ws.audioscrobbler.com/2.0/"
print("Using Genius API:", GENIUS_API_KEY[:5] + "*****")  # sanity check
print("Using LastFM API:", LASTFM_API_KEY[:5] + "*****")  # sanity check

Using Genius API: co8_v*****
Using LastFM API: fc10b*****


In [5]:
genius = lyricsgenius.Genius(GENIUS_API_KEY)
genius.timeout = 15
genius.remove_section_headers = True

# === Last.fm Setup ===
LASTFM_URL = "http://ws.audioscrobbler.com/2.0/"

def get_lyrics(title, artist):
    """Fetch lyrics from Genius API"""
    try:
        song = genius.search_song(title, artist)
        if song:
            return song.lyrics
    except Exception as e:
        print(f"Error fetching lyrics for {title} by {artist}: {e}")
    return None

In [6]:
def clean_title(title):
    """Remove common extras like (Remastered), feat., etc."""
    title = re.sub(r"\(.*?\)", "", title)  # remove parentheses
    title = re.sub(r"feat\.|ft\.", "", title, flags=re.IGNORECASE)  # remove features
    return title.strip()

def get_genres(title, artist):
    try:
        # First try track-level tags
        params = {
            "method": "track.gettoptags",
            "artist": artist,
            "track": title,
            "api_key": LASTFM_API_KEY,
            "format": "json"
        }
        r = requests.get(LASTFM_URL, params=params)
        data = r.json()

        tags = data.get("toptags", {}).get("tag", [])
        if isinstance(tags, dict):
            tags = [tags]

        if tags:  # track tags found
            return ", ".join([t["name"] for t in tags[:5]])

        # Fallback: try artist-level tags
        params = {
            "method": "artist.gettoptags",
            "artist": artist,
            "api_key": LASTFM_API_KEY,
            "format": "json"
        }
        r = requests.get(LASTFM_URL, params=params)
        data = r.json()

        tags = data.get("toptags", {}).get("tag", [])
        if isinstance(tags, dict):
            tags = [tags]

        if tags:
            return ", ".join([t["name"] for t in tags[:5]])

        return None
    except Exception as e:
        print(f"Error fetching genres for {title} by {artist}: {e}")
        return None


In [7]:
year = 2012

In [8]:
while year < 2014:
    lyrics_list = []
    genres_list = []
    df = pd.read_csv(f"billboard-year-end-top100-singles/billboard_{year}.csv")  # columns: title, artist
    for i, row in df.iterrows():
        title, artist = row["Title"], row["Artist(s)"]
    
        # Fetch lyrics
        lyrics = get_lyrics(title, artist)
        lyrics_list.append(lyrics)
    
        # Throttle between Genius and Last.fm
        # time.sleep(1) who needs caution?
    
        # Fetch genres
        genres = get_genres(title, artist)
        genres_list.append(genres)
    
        print(f"Processed: {title} - {artist}")
        time.sleep(1)  # small delay before next song
    
    # Add to DataFrame
    df["lyrics"] = lyrics_list
    df["genres"] = genres_list
    
    # Save enriched CSV
    df.to_csv(f"{year}song_lyrics_and_genres.csv", index=False)
    year += 1

Searching for ""Somebody That I Used to Know"" by Gotye featuring Kimbra...
Done.
Processed: "Somebody That I Used to Know" - Gotye featuring Kimbra
Searching for ""Call Me Maybe"" by Carly Rae Jepsen...
Done.
Processed: "Call Me Maybe" - Carly Rae Jepsen
Searching for ""We Are Young"" by Fun featuring Janelle Monáe...
Done.
Processed: "We Are Young" - Fun featuring Janelle Monáe
Searching for ""Payphone"" by Maroon 5 featuring Wiz Khalifa...
Done.
Processed: "Payphone" - Maroon 5 featuring Wiz Khalifa
Searching for ""Lights"" by Ellie Goulding...
Done.
Processed: "Lights" - Ellie Goulding
Searching for ""Glad You Came"" by The Wanted...
Done.
Processed: "Glad You Came" - The Wanted
Searching for ""Stronger (What Doesn't Kill You)"" by Kelly Clarkson...
Done.
Processed: "Stronger (What Doesn't Kill You)" - Kelly Clarkson
Searching for ""We Found Love"" by Rihanna featuring Calvin Harris...
Done.
Processed: "We Found Love" - Rihanna featuring Calvin Harris
Searching for ""Starships"" by

Done.
Processed: "Heart Attack" - Trey Songz
Searching for ""Drank in My Cup"" by Kirko Bangz...
Done.
Processed: "Drank in My Cup" - Kirko Bangz
Searching for ""Birthday Cake"" by Rihanna featuring Chris Brown...
Done.
Processed: "Birthday Cake" - Rihanna featuring Chris Brown
Searching for ""So Good"" by B.o.B...
Done.
Processed: "So Good" - B.o.B
Searching for ""50 Ways to Say Goodbye"" by Train...
Done.
Processed: "50 Ways to Say Goodbye" - Train
Searching for ""Red Solo Cup"" by Toby Keith...
Done.
Processed: "Red Solo Cup" - Toby Keith
Searching for ""Love You Like a Love Song"" by Selena Gomez & the Scene...
Done.
Processed: "Love You Like a Love Song" - Selena Gomez & the Scene
Searching for ""Turn Up the Music"" by Chris Brown...
Done.
Processed: "Turn Up the Music" - Chris Brown
Searching for ""Die Young"" by Kesha...
Done.
Processed: "Die Young" - Kesha
Searching for ""5 O'Clock"" by T-Pain featuring Wiz Khalifa and Lily Allen...
Done.
Processed: "5 O'Clock" - T-Pain featuri

Error fetching lyrics for "It's Time" by Imagine Dragons: Request timed out:
HTTPSConnectionPool(host='api.genius.com', port=443): Read timed out. (read timeout=15)
Processed: "It's Time" - Imagine Dragons
Searching for ""Power Trip"" by J. Cole featuring Miguel...
Done.
Processed: "Power Trip" - J. Cole featuring Miguel
Searching for ""Girl on Fire"" by Alicia Keys featuring Nicki Minaj...
Done.
Processed: "Girl on Fire" - Alicia Keys featuring Nicki Minaj
Searching for ""Heart Attack"" by Demi Lovato...
Done.
Processed: "Heart Attack" - Demi Lovato
Searching for ""Love Somebody"" by Maroon 5...
Done.
Processed: "Love Somebody" - Maroon 5
Searching for ""I Will Wait"" by Mumford & Sons...
Done.
Processed: "I Will Wait" - Mumford & Sons
Searching for ""Try"" by Pink...
Done.
Processed: "Try" - Pink
Searching for ""Wagon Wheel"" by Darius Rucker...
Done.
Processed: "Wagon Wheel" - Darius Rucker
Searching for ""Gangnam Style"" by Psy...
Done.
Processed: "Gangnam Style" - Psy
Searching fo