In [1]:
import os
from dotenv import load_dotenv
import re
import requests
import pandas as pd
import time
import lyricsgenius
import requests

In [2]:
load_dotenv()

GENIUS_API_KEY = os.getenv("GENIUS_API")
LASTFM_API_KEY = os.getenv("LASTFM_API")
LASTFM_URL = "http://ws.audioscrobbler.com/2.0/"
print("Using Genius API:", GENIUS_API_KEY[:5] + "*****")  # sanity check
print("Using LastFM API:", LASTFM_API_KEY[:5] + "*****")  # sanity check

Using Genius API: iQe2k*****
Using LastFM API: fc10b*****


In [3]:
genius = lyricsgenius.Genius(GENIUS_API_KEY)
genius.timeout = 15
genius.remove_section_headers = True

# === Last.fm Setup ===
LASTFM_URL = "http://ws.audioscrobbler.com/2.0/"

def get_lyrics(title, artist):
    """Fetch lyrics from Genius API"""
    try:
        song = genius.search_song(title, artist)
        if song:
            return song.lyrics
    except Exception as e:
        print(f"Error fetching lyrics for {title} by {artist}: {e}")
    return None

In [4]:
def clean_title(title):
    """Remove common extras like (Remastered), feat., etc."""
    title = re.sub(r"\(.*?\)", "", title)  # remove parentheses
    title = re.sub(r"feat\.|ft\.", "", title, flags=re.IGNORECASE)  # remove features
    return title.strip()

def get_genres(title, artist):
    try:
        # First try track-level tags
        params = {
            "method": "track.gettoptags",
            "artist": artist,
            "track": title,
            "api_key": LASTFM_API_KEY,
            "format": "json"
        }
        r = requests.get(LASTFM_URL, params=params)
        data = r.json()

        tags = data.get("toptags", {}).get("tag", [])
        if isinstance(tags, dict):
            tags = [tags]

        if tags:  # track tags found
            return ", ".join([t["name"] for t in tags[:5]])

        # Fallback: try artist-level tags
        params = {
            "method": "artist.gettoptags",
            "artist": artist,
            "api_key": LASTFM_API_KEY,
            "format": "json"
        }
        r = requests.get(LASTFM_URL, params=params)
        data = r.json()

        tags = data.get("toptags", {}).get("tag", [])
        if isinstance(tags, dict):
            tags = [tags]

        if tags:
            return ", ".join([t["name"] for t in tags[:5]])

        return None
    except Exception as e:
        print(f"Error fetching genres for {title} by {artist}: {e}")
        return None


In [5]:
year = 1980

In [None]:
while year < 2025:
    lyrics_list = []
    genres_list = []
    df = pd.read_csv(f"billboard-year-end-top100-singles/billboard_{year}.csv")  # columns: title, artist
    for i, row in df.iterrows():
        title, artist = row["Title"], row["Artist(s)"]
    
        # Fetch lyrics
        lyrics = get_lyrics(title, artist)
        lyrics_list.append(lyrics)
    
        # Throttle between Genius and Last.fm
        # time.sleep(1) who needs caution?
    
        # Fetch genres
        genres = get_genres(title, artist)
        genres_list.append(genres)
    
        print(f"Processed: {title} - {artist}")
        time.sleep(1)  # small delay before next song
    
    # Add to DataFrame
    df["lyrics"] = lyrics_list
    df["genres"] = genres_list
    
    # Save enriched CSV
    df.to_csv(f"{year}song_lyrics_and_genres.csv", index=False)
    year += 1

Searching for ""Call Me"" by Blondie...
Done.
Processed: "Call Me" - Blondie
Searching for ""Another Brick in the Wall, Part II"" by Pink Floyd...
Done.
Processed: "Another Brick in the Wall, Part II" - Pink Floyd
Searching for ""Magic"" by Olivia Newton-John...
Done.
Processed: "Magic" - Olivia Newton-John
Searching for ""Rock with You"" by Michael Jackson...
Done.
Processed: "Rock with You" - Michael Jackson
Searching for ""Do That to Me One More Time"" by Captain & Tennille...
Done.
Processed: "Do That to Me One More Time" - Captain & Tennille
Searching for ""Crazy Little Thing Called Love"" by Queen...
Done.
Processed: "Crazy Little Thing Called Love" - Queen
Searching for ""Coming Up"" by Paul McCartney...
Done.
Processed: "Coming Up" - Paul McCartney
Searching for ""Funkytown"" by Lipps Inc....
Done.
Processed: "Funkytown" - Lipps Inc.
Searching for ""It's Still Rock and Roll to Me"" by Billy Joel...
Done.
Processed: "It's Still Rock and Roll to Me" - Billy Joel
Searching for ""T