In [19]:
import os
from dotenv import load_dotenv
import musicbrainzngs
import pprint
import requests
import lyricsgenius
import pandas as pd
import pprint

# Load API Keys
GENIUS_API_KEY = os.getenv("GENIUS_API_KEY")
LASTFM_API_KEY = os.getenv("LAST_FM_API_KEY")

# GENIUS Lyrics Init
genius = lyricsgenius.Genius(GENIUS_API_KEY, skip_non_songs=True, remove_section_headers=True)

# MusicBrainz MetaData Init
musicbrainzngs.set_useragent("music-study", "0.1", "email@email.com")

Building the records for each data point. Each row will contain info about the song, like the name, artist, playcount, listeners, and other features. We are looking to have some columns that are the end point (y) and a lot of columns that lead to the descisions (features). 


In [20]:
def build_song_row(title, artist):
    row: {
        "title": title, 
        "artist": artist, 
        "mbid": None, 
        "playcount": None, 
        "listeners": None, 
        "tags": [], 
        "lyrics_snippet": None, 
        "genre": None, 
        "mood": None, 
        "danceability": None, 
        "voice_instrumental": None
    }
    
    try: 
        # Getting info about songs in MusicBrainz
        result = musicbrainzngs.search_recordings(query=f"{title} {artist}", limit=1)
        recording = result['recording-list'][0]
        mbid = recording['id']
        row['mbid'] = mbid
    except Exception as e: 
        print(f"MusicBrainz failed for {title} by {artist} : {e}")
    
    # now grab actual song info from AcousticBrainz based on mbid
    try: 
        acou_url = f"https://acousticbrainz.org/api/v1/{mbid}/high-level"
        response = requests.get(acou_url)
        if response.status_code == 200: 
            # aight gut we got a legit response
            data = response.json().get('highlevel', {})
            row['genre'] = data.get('genre_dortmund', {}).get('value')
            row['mood'] = data.get('mood_acoustic', {}).get('value')
            row['danceability'] = data.get('danceability', {}).get('value')
            row['voice_instrumentals'] = data.get('voice_instrumentals', {}).get('value')
        else:
            print(f"Get Request failed with status {response.status_code}")
    except Exception as e: 
        print(f"AcousticBrainz error: {e}")

    # now we are getting y type info from Last FM 
    try: 
        last_url = f"http://ws.audioscrobbler.com/2.0/?method=track.getInfo&artist={artist}&track={title}&api_key={LASTFM_API_KEY}&format=json"
        lfm_resp = requests.get(last_url)
        if lfm_resp.status_code == 200: 
            # all gut with request
            lfm = lfm_resp.json().get('track', {})
            row['playcount'] = lfm.get('playcount')
            row['listeners'] = lfm.get('listeners')
            tags = [tag['name'] for tag in lfm.get('toptags', {}).get('tag', [])]
            row['tags'] = tags
        else: 
            print(f"Get Request failed with status: {lfm_resp.status_code}")
    except Exception as e: 
        print(f"Last FM error: {e}")
    # grabbing lyrical info from Genius
    try:
        song = genius.search_song(title, artist)
        if song: 
            row['lyrics_snippet'] = song.lyrics[:600]
        else: 
            print("No lyrics added. ")
    except Exception as e: 
        print(f"Genius failed to retrieve lyrics for {title}: {e}")
    
    return row
        

In [21]:
row = build_song_row("bad guy", "billie eilish")
pd.DataFrame([row])

MusicBrainz failed for bad guy by billie eilish : cannot access local variable 'row' where it is not associated with a value
Get Request failed with status 404
Last FM error: cannot access local variable 'row' where it is not associated with a value
Searching for "bad guy" by billie eilish...
Genius failed to retrieve lyrics for bad guy: [Errno 401] 401 Client Error: Unauthorized for url: https://api.genius.com/songs/4271772?text_format=plain
The access token provided is expired, revoked, malformed or invalid for other reasons.


UnboundLocalError: cannot access local variable 'row' where it is not associated with a value

In [12]:

# Set a user agent
musicbrainzngs.set_useragent("music-trend-project", "0.1", "your_email@example.com")

# Search for a track
result = musicbrainzngs.search_recordings(query="Euphoria", limit=1)
pprint.pprint(result)
track = result['recording-list'][0]

print("🎵 Title:", track['title'])
print("🧑‍🎤 Artist:", track['artist-credit'][0]['name'])
print("🔑 MBID:", track['id'])


{'recording-count': 10408,
 'recording-list': [{'artist-credit': [{'artist': {'alias-list': [{'alias': 'Scott '
                                                                            'Anthony '
                                                                            'Arceneaux '
                                                                            'Jr.',
                                                                   'sort-name': 'Arceneaux, '
                                                                                'Scott '
                                                                                'Anthony '
                                                                                'Jr.',
                                                                   'type': 'Legal '
                                                                           'name'},
                                                                  {'alias': 'Scrim',
               

In [3]:
import requests

mbid = track['id']  # from previous step
url = f"https://acousticbrainz.org/api/v1/{mbid}/high-level"

response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    print("🎛️ Acoustic Features (high-level):")
    print(data["highlevel"])
else:
    print(f"❌ Could not fetch features for {mbid}")


🎛️ Acoustic Features (high-level):
{'danceability': {'all': {'danceable': 0.00907512474805, 'not_danceable': 0.99092489481}, 'probability': 0.99092489481, 'value': 'not_danceable', 'version': {'essentia': '2.1-beta4', 'essentia_build_sha': 'b0b9016bb43cc2dafcda53132c1610db4853c6a1', 'essentia_git_sha': 'v2.1_beta4', 'extractor': 'music 1.0', 'gaia': '2.4.5', 'gaia_git_sha': 'v2.4.4-44-g95f4851', 'models_essentia_git_sha': 'v2.1_beta1'}}, 'gender': {'all': {'female': 0.20377586782, 'male': 0.796224117279}, 'probability': 0.796224117279, 'value': 'male', 'version': {'essentia': '2.1-beta4', 'essentia_build_sha': 'b0b9016bb43cc2dafcda53132c1610db4853c6a1', 'essentia_git_sha': 'v2.1_beta4', 'extractor': 'music 1.0', 'gaia': '2.4.5', 'gaia_git_sha': 'v2.4.4-44-g95f4851', 'models_essentia_git_sha': 'v2.1_beta1'}}, 'genre_dortmund': {'all': {'alternative': 3.45486239617e-09, 'blues': 4.17774437267e-09, 'electronic': 0.999997079372, 'folkcountry': 1.06058018901e-06, 'funksoulrnb': 6.4876736871

In [14]:


load_dotenv()

lastfm_key = os.getenv("LAST_FM_API_KEY")
artist = "Kendrick Lamar"
track_name = "Euphoria"

url = f"http://ws.audioscrobbler.com/2.0/?method=track.getInfo&artist={artist}&track={track_name}&api_key={lastfm_key}&format=json"
response = requests.get(url)
if response.status_code == 200:
    info = response.json()
    print(info)
    print("🎧 Playcount:", info['track']['playcount'])
    print("🏷️ Tags:", [tag['name'] for tag in info['track']['toptags']['tag']])
else:
    print("❌ Last.fm request failed")


{'track': {'name': 'euphoria', 'url': 'https://www.last.fm/music/Kendrick+Lamar/_/euphoria', 'duration': '385000', 'streamable': {'#text': '0', 'fulltrack': '0'}, 'listeners': '911170', 'playcount': '11976505', 'artist': {'name': 'Kendrick Lamar', 'mbid': '381086ea-f511-4aba-bdf9-71c753dc5077', 'url': 'https://www.last.fm/music/Kendrick+Lamar'}, 'album': {'artist': 'Kendrick Lamar', 'title': 'euphoria', 'url': 'https://www.last.fm/music/Kendrick+Lamar/euphoria', 'image': [{'#text': 'https://lastfm.freetls.fastly.net/i/u/34s/b2946e7a8ca6279fd346cc4f6ae95087.png', 'size': 'small'}, {'#text': 'https://lastfm.freetls.fastly.net/i/u/64s/b2946e7a8ca6279fd346cc4f6ae95087.png', 'size': 'medium'}, {'#text': 'https://lastfm.freetls.fastly.net/i/u/174s/b2946e7a8ca6279fd346cc4f6ae95087.png', 'size': 'large'}, {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/b2946e7a8ca6279fd346cc4f6ae95087.png', 'size': 'extralarge'}]}, 'toptags': {'tag': [{'name': 'Diss', 'url': 'https://www.last.fm/tag/D

In [5]:
import lyricsgenius
load_dotenv()
genius_token = os.getenv("GENIUS_API_TOKEN")
genius = lyricsgenius.Genius(genius_token)

song = genius.search_song("Womanizer", "britney spears")
print(song.lyrics[:500])  # show first 500 chars


Searching for "Womanizer" by britney spears...
Done.
67 ContributorsTranslations한국어日本語Womanizer Lyrics[Verse 1]
Superstar, where you from? How's it goin'?
I know you got a clue what you're doin'
You can play brand new to all the other chicks out here
But I know what you are, what you are, baby
Look at you, gettin' more than just a re-up
Baby, you got all the puppets with their strings up
Fakin' like a good one, but I call 'em like I see 'em
I know what you are, what you are, baby

[Pre-Chorus]
Womanizer, woman-womanizer, you're a womanizer
Oh, wom


In [18]:
# we are playing around 
import musicbrainzngs

musicbrainzngs.set_useragent("music-study", "0.1", "ruchipatil@outlook.com")




result = musicbrainzngs.search_recordings(query="womanizer britney", limit=1)
track = result['recording-list'][0]

print("title: ", track['title'])
print("artist: ", track['artist-credit'][0]['name'])
print("MBID: ", track['id'])
mbid = track['id']
# for k, v in track.items():
#     print("--------------------------------------------------------")
#     print(f'{k}: {v}')

pprint.pprint(track)
    

title:  Womanizer (Britney Spears)
artist:  Periods
MBID:  1c31a827-38a4-48c6-9d2a-e5cb9f5a7353
{'artist-credit': [{'artist': {'disambiguation': 'France',
                               'id': 'b6e9dc90-3b10-48c2-800c-9cc84dedf937',
                               'name': 'Periods',
                               'sort-name': 'Periods'},
                    'name': 'Periods'}],
 'artist-credit-phrase': 'Periods',
 'ext:score': '100',
 'id': '1c31a827-38a4-48c6-9d2a-e5cb9f5a7353',
 'length': '202000',
 'release-list': [{'artist-credit': [{'artist': {'id': 'ee91ddf6-36bb-45b1-8613-bf527c36f7cb',
                                                 'name': 'Sick Sad World Music',
                                                 'sort-name': 'Sick Sad World '
                                                              'Music'},
                                      'name': 'Sick Sad World Music'}],
                   'artist-credit-phrase': 'Sick Sad World Music',
                   'country':

MusicBrianz seems to be a metadat databased; they have informations about the songs, artists, release not about the song itself?
MBID -> MusicBrainz ID
UUID (long string of characters) that is uniquely IDing a track, artist, an album,k release grou, etc.

this MBID will be important, as it has been implemented as the foreign key for AcousticBrainz, and they are another database (with other info?)



In [19]:
# mbid = 'eb1ce8de-c51a-4e6a-a03f-15db3163f67e'

url = f"https://acousticbrainz.org/api/v1/{mbid}/high-level"

response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    print("features according to acoustic brainz")
    pprint.pprint(data)
else:
    print("error", response.status_code)

features according to acoustic brainz
{'highlevel': {'danceability': {'all': {'danceable': 0.916314721107,
                                        'not_danceable': 0.0836852863431},
                                'probability': 0.916314721107,
                                'value': 'danceable',
                                'version': {'essentia': '2.1-beta4',
                                            'essentia_build_sha': 'b0b9016bb43cc2dafcda53132c1610db4853c6a1',
                                            'essentia_git_sha': 'v2.1_beta4',
                                            'extractor': 'music 1.0',
                                            'gaia': '2.4.5',
                                            'gaia_git_sha': 'v2.4.4-44-g95f4851',
                                            'models_essentia_git_sha': 'v2.1_beta1'}},
               'gender': {'all': {'female': 0.998635828495,
                                  'male': 0.00136419385672},
                       

ok so acoustic brainz i guess goes into more cateogirizing the acoustics of the song... based on probabilities
what is gaia?

are they using human catogozing too? ok anyways this pretty cool 

hopefully they have variety in nationalities and genres

In [22]:

load_dotenv()

genius_token = os.getenv("GENIUS_API_TOKEN")
genius = lyricsgenius.Genius(genius_token)

song = genius.search_song("womanizer", "britney")

if song:
    print("yay we foun song: ", song.title, "by", song.artist)
    print(song.lyrics[:500])
else: 
    print("WOMPWOMP song not found")

Searching for "womanizer" by britney...
Done.
yay we foun song:  Womanizer by Britney Spears
67 ContributorsTranslations한국어日本語Womanizer Lyrics“Womanizer” was released as the lead single and first track from Britney Spears‘ sixth studio album, Circus (2008).

It’s basically saying, ‘We know what you’re up to.’ It’s about guys cheating on girls. It’s… Read More [Verse 1]
Superstar, where you from? How's it goin'?
I know you got a clue what you're doin'
You can play brand new to all the other chicks out here
But I know what you are, what you are, baby
Look at you, gettin' more than just a 


In [24]:
load_dotenv()
lastfm_key = os.getenv("LAST_FM_API_KEY")
artist = "britney"
track = "womanizer"
url = f"http://ws.audioscrobbler.com/2.0/?method=track.getInfo&artist={artist}&track={track}&api_key={lastfm_key}&format=json"

response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    pprint.pprint(data)
else:
    print("erm idk error: ", response.status_code)


{'track': {'album': {'@attr': {'position': '1'},
                     'artist': 'Britney Spears',
                     'image': [{'#text': 'https://lastfm.freetls.fastly.net/i/u/34s/9d297c35bd0a5feb9f9b697d9b96bf07.png',
                                'size': 'small'},
                               {'#text': 'https://lastfm.freetls.fastly.net/i/u/64s/9d297c35bd0a5feb9f9b697d9b96bf07.png',
                                'size': 'medium'},
                               {'#text': 'https://lastfm.freetls.fastly.net/i/u/174s/9d297c35bd0a5feb9f9b697d9b96bf07.png',
                                'size': 'large'},
                               {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/9d297c35bd0a5feb9f9b697d9b96bf07.png',
                                'size': 'extralarge'}],
                     'mbid': '7a782eb6-3c6a-3168-93ea-a554acc85430',
                     'title': 'Circus',
                     'url': 'https://www.last.fm/music/Britney+Spears/Circus'},
         

# data set for exploratory analysis