# Gathering Data

In [62]:
from lyricsgenius import Genius
import pandas as pd
import os, requests, json, time

In [63]:
# Establishing API keys
GENIUS_API_KEY = os.getenv("GENIUS_API_KEY")
if GENIUS_API_KEY is None:
    raise ValueError("GENIUS_API_KEY cannot be found")
genius = Genius(GENIUS_API_KEY, skip_non_songs=False, 
                retries=5, remove_section_headers=True, 
                sleep_time=0.5)
    
LASTFM_API_KEY = os.getenv("LASTFM_API_KEY")
if LASTFM_API_KEY is None:
    raise ValueError("LASTFM_API_KEY cannot be found")

In [64]:
# Gathering top tracks of desired genres
BASE_URL = "http://ws.audioscrobbler.com/2.0/"
GENRES = ["pop", "rock", "electronic", "country",
          "metal", "hip hop", "jazz", "R&B"] # Derived from musicmap.info
raw_data = {}

def getTopTracks(genre, limit=10, key=0):    
    params = {
        "method": "tag.gettoptracks",
        "tag": genre,
        "api_key": LASTFM_API_KEY,
        "format": "json",
        "limit": limit
    }
    
    response = requests.get(BASE_URL, params=params)
    data = response.json()

    # Parsing results
    for track in data["tracks"]["track"]:
        raw_data[key] = {
            "name": track["name"],
            "artist": track["artist"]["name"],
            "genre": genre,
            # "duration": track["duration"]
        }
        key += 1
    return key

In [65]:
# Populating dataset
SONGS_PER_GENRE = 250 # 8 Genres
SLEEP_TIME = 2 # seconds between requests


key = 0
for genre in GENRES:
    key = getTopTracks(genre, SONGS_PER_GENRE, key)
    time.sleep(SLEEP_TIME)

In [66]:
# Gathering lyrics for songs
def getLyrics(data, elm=0):
    try:
        for track in data:
            print(track, end=") ")
            song = genius.search_song(data[track]["name"], artist=data[track]["artist"])
            if hasattr(song, "lyrics"):
                raw_data[track]["lyrics"] = song.lyrics
            else:
                raw_data[track]["lyrics"] = ""
            elm += 1
    except:
        raw_data_update = dict(list(data.items())[elm:])
        getLyrics(raw_data_update)
        
getLyrics(raw_data)

0) Searching for "Pink Pony Club" by Chappell Roan...
Done.
1) Searching for "no tears left to cry" by Ariana Grande...
Done.
2) Searching for "Look What You Made Me Do" by Taylor Swift...
Done.
3) Searching for "Style" by Taylor Swift...
Done.
4) Searching for "Locked Out of Heaven" by Bruno Mars...
Done.
5) Searching for "Espresso" by Sabrina Carpenter...
Done.
6) Searching for "Taste" by Sabrina Carpenter...
Done.
7) Searching for "Toxic" by Britney Spears...
Done.
8) Searching for "Manchild" by Sabrina Carpenter...
Done.
9) Searching for "Last Friday Night (T.G.I.F.)" by Katy Perry...
Done.
10) Searching for "thank u, next" by Ariana Grande...
Done.
11) Searching for "Judas" by Lady Gaga...
Done.
12) Searching for "Guilty as Sin?" by Taylor Swift...
Done.
13) Searching for "Please Please Please" by Sabrina Carpenter...
Done.
14) Searching for "Rock Your Body" by Justin Timberlake...
Done.
15) Searching for "The One That Got Away" by Katy Perry...
Done.
16) Searching for "Call It Wh

In [61]:
raw_data_update

{187: {'name': 'Girlfriend',
  'artist': 'Avril Lavigne',
  'genre': 'pop',
  'lyrics': "Hey, hey, you, you, I don't like your girlfriend\nNo way, no way, I think you need a new one\nHey, hey, you, you, I could be your girlfriend\nHey, hey, you, you, I know that you like me\nNo way, no way, no it's not a secret\nHey, hey, you, you, I want to be your girlfriend\n\nYou're so fine, I want you mine, you're so delicious\nI think about you all the time, you're so addictive\nDon't you know what I can do to make you feel alright?\nDon't pretend, I think you know I'm damn precious\nAnd hell yeah, I'm the motherfuckin' princess\nI can tell you like me too, and you know I'm right\n\nShe's like, so whatever\nYou could do so much better\nI think we should get together now\nAnd that's what everyone's talkin' about\n\nHey, hey, you, you, I don't like your girlfriend\nNo way, no way, I think you need a new one\nHey, hey, you, you, I could be your girlfriend\nHey, hey, you, you, I know that you like me

In [67]:
# Writing data to file
json_data = json.dumps(raw_data, indent=4)
with open("data.json", "w") as f:
    f.write(json_data)