In [72]:
import pandas as pd
import numpy as np

In [73]:
df = pd.read_csv("spotify_songs.csv")
len(df)

32833

In [74]:
drop_columns_arr = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'playlist_id', 'track_album_id', 'key', 'mode', 'playlist_name', 'playlist_subgenre', 'track_id']


# could track_popularity be used to rank the results ? 
df = df.drop(columns=drop_columns_arr)

In [75]:
df = df.sort_values("track_popularity", ascending=False)

# don't like this because tracks could be the same name from different artists
df = df.drop_duplicates(subset=['track_artist'], keep='first') 
df = df.reset_index(drop=True)

In [80]:
df = df[:10000]

In [82]:
df 

Unnamed: 0,track_name,track_artist,track_popularity,track_album_name,track_album_release_date,playlist_genre,duration_ms
0,Dance Monkey,Tones and I,100,Dance Monkey (Stripped Back) / Dance Monkey,2019-10-17,latin,209438
1,ROXANNE,Arizona Zervas,99,ROXANNE,2019-10-10,r&b,163636
2,The Box,Roddy Ricch,98,Please Excuse Me For Being Antisocial,2019-12-06,rap,196653
3,Circles,Post Malone,98,Hollywood's Bleeding,2019-09-06,pop,215280
4,Memories,Maroon 5,98,Memories,2019-09-20,latin,189486
...,...,...,...,...,...,...,...
9995,Drive-by Stalking,Nightstop,1,Drive-by Stalking,2013-11-09,pop,320000
9996,Badman - Will Clarke Remix - Edit,Idris Elba,1,Badman (Will Clarke Remix),2018-08-24,edm,232508
9997,riding on,Akane Hatanaka,1,riding on,2019-08-24,edm,208846
9998,Stamm Fort (feat. Sfera Ebbasta),Luche,1,Stamm Fort,2019-01-18,rap,171375


In [2]:
import requests
from bs4 import BeautifulSoup
import os
from dotenv import load_dotenv

load_dotenv()
API_TOKEN = os.getenv('api_key')
BASE_URL = 'https://api.genius.com'

# Function to search for a song
def search_song(artist, title):
    search_url = f"{BASE_URL}/search"
    headers = {'Authorization': f'Bearer {API_TOKEN}'}
    params = {'q': f"{title} {artist}"}
    response = requests.get(search_url, headers=headers, params=params)
    return response.json()

# Function to get song lyrics
def get_lyrics(song_api_path):
    song_url = f"{BASE_URL}{song_api_path}"
    headers = {'Authorization': f'Bearer {API_TOKEN}'}
    response = requests.get(song_url, headers=headers)
    json_response = response.json()
    path = json_response['response']['song']['path']
    
    # Fetch the song lyrics from the Genius website (not directly available via API)
    page_url = f"https://genius.com{path}"
    page = requests.get(page_url)
    soup = BeautifulSoup(page.text, 'html.parser')

    # Find the lyrics container with the data-lyrics-container="true" attribute
    lyrics_div = soup.find('div', {'data-lyrics-container': 'true'})
    lyrics = lyrics_div.get_text(separator='\n') if lyrics_div else "Lyrics not found."
    return lyrics

def get_lyrics_for_song(artist, title):
    search_result = search_song(artist, title)
    if search_result['response']['hits']:
        song_api_path = search_result['response']['hits'][0]['result']['api_path']
        print(f"FOUND - Title: {title} - Artist: {artist}")
        return get_lyrics(song_api_path)            
    print(f"FAIL - Title: {title} - Artist: {artist}")
    return None





def get_lyrics_for_df(df: pd.DataFrame):
    df['lyrics'] = None
    
    for index, row in df.iterrows():
        lyrics: str = get_lyrics_for_song(row['track_name'], row['track_artist'])   
        df.at[index, 'lyrics'] = lyrics

        df.to_csv('song_data.csv', index=True)

get_lyrics_for_df(df)
    





Lyrics for 'Bad Blood' by Taylor Swift:
[Chorus]
'Cause, baby, now we got bad blood
You know it used to be mad love
So take a look what you've done
'Cause, baby, now we got bad blood, hey
Now we got problems
And I don't think we can solve them
You made a really deep cut
And, baby, now we got bad blood, hey
[Verse 1]
Did you have to do this?
I was thinking that you could be trusted
Did you have to ruin
What was shiny? Now it's all rusted
Did you have to hit me
Where I'm weak? Baby, I couldn't breathe
And rub it in so deep
Salt in the wound like you're laughin' right at me
[Pre-Chorus]
Oh, it's so sad to
Think about the good times
You and I
[Chorus]
'Cause, baby, now we got bad blood
You know it used to be mad love
So take a look what you've done
'Cause, baby, now we got bad blood, hey
Now we got problems
And I don't think we can solve them
You made a really deep cut
And, baby, now we got bad blood, hey

