# Gathering Data

In [8]:
from lyricsgenius import Genius
import pandas as pd
import os, requests, json

In [2]:
# Establishing API keys
GENIUS_API_KEY = os.getenv("GENIUS_API_KEY")
if GENIUS_API_KEY is None:
    raise ValueError("GENIUS_API_KEY cannot be found")
genius = Genius(GENIUS_API_KEY)
    
LASTFM_API_KEY = os.getenv("LASTFM_API_KEY")
if LASTFM_API_KEY is None:
    raise ValueError("LASTFM_API_KEY cannot be found")

In [3]:
# Gathering top tracks of desired genres
BASE_URL = "http://ws.audioscrobbler.com/2.0/"
GENRES = ["pop", "rock", "electronic", "country",
          "religious", "metal", "hip hop", "jazz",
          "R&B", "classical"] # Derived from musicmap.info
raw_data = {}


def getTopTracks(genre, limit=10, key=0):    
    params = {
        "method": "tag.gettoptracks",
        "tag": genre,
        "api_key": LASTFM_API_KEY,
        "format": "json",
        "limit": limit
    }
    
    response = requests.get(BASE_URL, params=params)
    data = response.json()

    # Parsing results
    for track in data["tracks"]["track"]:
        raw_data[key] = {
            "name": track["name"],
            "artist": track["artist"]["name"],
            "genre": genre,
            "duration": track["duration"]
        }
        key += 1
    return key

In [5]:
SONGS_PER_GENRE = 1000 # 10 Genres

key = 0
for genre in GENRES:
    key = getTopTracks(genre, SONGS_PER_GENRE, key)

In [6]:
# Gathering lyrics for songs
genius.remove_section_headers = True
genius.skip_non_songs = False

for track in raw_data:
    song = genius.search_song(raw_data[track]["name"])
    raw_data[track]["lyrics"] = song.lyrics

Searching for "Pink Pony Club"...
Done.
Searching for "no tears left to cry"...
Done.
Searching for "Look What You Made Me Do"...
Done.
Searching for "Style"...
Done.
Searching for "Locked Out of Heaven"...
Done.
Searching for "Espresso"...
Done.
Searching for "Taste"...
Done.
Searching for "Toxic"...
Done.
Searching for "Manchild"...
Done.
Searching for "Last Friday Night (T.G.I.F.)"...
Done.
Searching for "thank u, next"...
Done.
Searching for "Judas"...
Done.
Searching for "Guilty as Sin?"...
Done.
Searching for "Please Please Please"...
Done.
Searching for "Rock Your Body"...
Done.
Searching for "The One That Got Away"...
Done.
Searching for "Call It What You Want"...
Done.
Searching for "Blank Space"...
Done.
Searching for "Love You Like a Love Song"...
Done.
Searching for "Maneater"...
Done.
Searching for "Fortnight (feat. Post Malone)"...
Done.
Searching for "Delicate"...
Done.
Searching for "Oops!...I Did It Again"...
Done.
Searching for "Teenage Dream"...
Done.
Searching for "

Timeout: Request timed out:
HTTPSConnectionPool(host='api.genius.com', port=443): Read timed out. (read timeout=5)

In [7]:
raw_data

{0: {'name': 'Pink Pony Club',
  'artist': 'Chappell Roan',
  'genre': 'pop',
  'duration': '0',
  'lyrics': 'I know you wanted me to stay\nBut I can\'t ignore the crazy visions of me\u2005in\u2005L.A.\xa0\nAnd I heard\u2005that there\'s a special place\nWhere boys\u2005and girls can all be queens every single day\n\nI\'m having wicked dreams of leaving Tennessee\nHear Santa Monica, I swear it\'s calling me\nWon\'t make my mama proud, it\'s gonna cause a scene\nShe sees her baby girl, I know she\'s gonna scream\n\n"God, what have you done?\nYou\'re a pink pony girl, and you dance at the club," oh mama\nI\'m just having fun\nOn the stage in my heels, it\'s where I belong, down at the\n\nPink Pony Club, I\'m gonna keep on dancing at the\nPink Pony Club, I\'m gonna keep on dancing down in\nWest Hollywood, I\'m gonna keep on dancing at the\nPink Pony Club, Pink Pony Club\n\nI\'m up and jaws are on the floor\nLovers in the bathroom and a line outside the door\nBlack lights and a mirrored di

In [9]:
json_data = json.dumps(raw_data, indent=4)
with open("sample_data.json", "w") as f:
    f.write(json_data)