In [196]:
import requests
from bs4 import BeautifulSoup
import os
import pandas as pd
from tqdm import tqdm

<h3>Create Dataset from Rap Genius Data</h3>

In [4]:
with open("../api-key.txt", "r") as file:
    access_token = file.read()
base_url = "http://api.genius.com"
headers = {'Authorization': f'Bearer {access_token}'}
search_url = base_url + "/search"
song_title = "Lithium"
params = {'q': song_title}
response = requests.get(search_url, params=params, headers=headers)
response.status_code

200

In [6]:
data = response.json()['response']['hits']
for idx, i in enumerate(data):
    print(data[idx]['result']['title'], "----", data[idx]['result']['primary_artist']['name'])
    print(data[idx]["result"]["api_path"])
    print()

Lithium ---- Nirvana
/songs/56433

Lithium ---- Evanescence
/songs/183356

Lithium ---- Trippie Redd
/songs/3454487

Lithium ---- BONES
/songs/4524735

Black Lithium ---- Canibus
/songs/3795666

Lithium (Synthesis) ---- Evanescence
/songs/3235559

Lithium ---- Method Man
/songs/3945651

Lithium Sunset ---- Sting
/songs/389349

Lithium Lips ---- Mac Lethal
/songs/151212

Lithium ---- Muse
/songs/766569



In [11]:
headers = {'Authorization': f'Bearer {access_token}'}
lyric_query = base_url + "/referents"

params = {'song_id': "56433"}
response = requests.get(lyric_query, params=params, headers=headers)
response.json()['response']['referents'][0]['fragment']

'I’m so lonely, that’s okay, I shaved my head'

In [12]:
for referent in response.json()['response']['referents']:
    print(referent["id"])
    print(referent['fragment'])
    print(referent['annotations'][0]['body']['dom']['children'][0]['children'])
    print()

17701902
I’m so lonely, that’s okay, I shaved my head
['As far as anyone knows, Kurt Cobain has never actually shaved his head and made this line up for the purpose of the song.']

11055484
And I’m not sad
['He uses a negative phrase like “I’m not sad” rather than “I am happy” or something positive. This puts the word “sad” in the listener’s head, and conveys a more depressed meaning than an uplifting one.']

7619144
Yeah, yeah
 Yeah, yeah
 Yeah, yeah
 Yeah, yeah
 Yeah, yeah
 Yeah, yeah
 Yeah
['The chorus also contrasts the verses as to illustrate the contrast between mania and depression in the Bipolar mind.']

1970348
I’m so happy ’cause today I found my friends, they’re in my head
['This song is about a man becoming religious to get over the death of his spouse, so I would suggest that these lines set the tone of that story. He’s happy because he has finally found friends in his head and this makes him happy because it is allowing him to finally get over his depression of his wife d

In [None]:
def get_annotation_string_recurse(annotation_str, annotation):
    for data in annotation:
        if isinstance(data, str):
            annotation_str += data
        elif isinstance(data, dict):
                #print(data)
                #print(data['children'][0])
                #print(data['children'][3])
                annotation_str += get_annotation_string_recurse(annotation_str, annotation)

In [157]:
def get_lyrics_annotations(referents):
    lyric_list = []
    annotation_list = []
    for referent in referents:
        # print(referent["id"])
        # sometimes a list of dicts
        annotation = referent['annotations'][0]['body']['dom']['children'][0]['children']
        lyric = referent['fragment']
        # print(lyric)
        # print(annotation)
        # print()
        annotation_str = "" 
        for data in annotation:
            #print(data)
            if isinstance(data, str):
                annotation_str += data
            elif isinstance(data, dict):
                tag = data["tag"]
                if tag == "p":
                    # do something
                    annotation_str += data["children"][0]
        annotation_list.append(annotation_str)
        lyric_list.append(lyric)
    return lyric_list, annotation_list
referents = response.json()['response']['referents']
lyric_list, annotation_list = get_lyrics_annotations(referents)

In [158]:
def get_song_ids_list_from_query(headers, song_title):
    search_url = base_url + "/search"
    params = {'q': song_title}
    response = requests.get(search_url, params=params, headers=headers)
    data = response.json()['response']['hits']
    song_ids_list = []
    for idx, i in enumerate(data):
        artist, song = data[idx]['result']['title'], data[idx]['result']['primary_artist']['name']
        song_id = data[idx]["result"]["api_path"].split("/")[-1]
        song_ids_list.append((song_id, artist, song))
    return song_ids_list

In [174]:
def get_lyrics_annotations_from_song_ids(headers, song_ids_list):
    
    song_data_dict = {}
    
    for song_id, song, artist in song_ids_list:
        params = {'song_id': song_id}
        response = requests.get(lyric_query, params=params, headers=headers)
        song_referents = response.json()['response']['referents']
        #print(song_id, artist, song)
        song_data_dict[song_id] = {}
        
        lyric_list, annotation_list = get_lyrics_annotations(song_referents)
        
        song_data_dict[song_id]["artist"] = artist
        song_data_dict[song_id]["song"] = song
        song_data_dict[song_id]["lyric_list"] = lyric_list
        song_data_dict[song_id]["annotation_list"] = annotation_list
        
    return song_data_dict

In [197]:
songs_to_search = ["lithium", "You", "Hey", "Congratulations", "love", "amazing", "New York", "Paris"]
song_data_dict = {}
for song in tqdm(songs_to_search):
    song_ids_list = get_song_ids_list_from_query(headers, song)
    song_data_dict.update(get_lyrics_annotations_from_song_ids(headers, song_ids_list))

100%|██████████| 8/8 [00:24<00:00,  3.11s/it]


In [198]:
song_data_dict.keys()
song_list = []
artist_list = []
full_lyric_list = []
full_annotation_list = []
song_id_list = []

for song_id in song_data_dict.keys():
    song = song_data_dict[song_id]["song"]
    artist = song_data_dict[song_id]["artist"]
    lyric_list = song_data_dict[song_id]["lyric_list"]
    annotation_list = song_data_dict[song_id]["annotation_list"]
    
    song_list += len(lyric_list) * [song]
    artist_list += len(lyric_list) * [artist]
    song_id_list += len(lyric_list) * [song_id]
    full_lyric_list += lyric_list
    full_annotation_list += annotation_list
    
    
df = pd.DataFrame({"song_id":song_id_list, "artist":artist_list, "song":song_list, 
                   "lyric": full_lyric_list, "annotation": full_annotation_list}) 

In [199]:
df.head()

Unnamed: 0,song_id,artist,song,lyric,annotation
0,56433,Nirvana,Lithium,"I’m so lonely, that’s okay, I shaved my head","As far as anyone knows, Kurt Cobain has never ..."
1,56433,Nirvana,Lithium,And I’m not sad,He uses a negative phrase like “I’m not sad” r...
2,56433,Nirvana,Lithium,"Yeah, yeah\n Yeah, yeah\n Yeah, yeah\n Yeah, y...",The chorus also contrasts the verses as to ill...
3,56433,Nirvana,Lithium,"I’m so happy ’cause today I found my friends, ...",This song is about a man becoming religious to...
4,56433,Nirvana,Lithium,"I’m so excited, I can’t wait to meet you there...",Cobain is speaking on terms of depression and ...


In [200]:
len(df)

544

In [201]:
df.to_csv("lyrics_annotations.csv")