# Gathering Data

In [2]:
from lyricsgenius import Genius
import pandas as pd
import os, requests, json

In [17]:
# Establishing API keys
GENIUS_API_KEY = os.getenv("GENIUS_API_KEY")
if GENIUS_API_KEY is None:
    raise ValueError("GENIUS_API_KEY cannot be found")
genius = Genius(GENIUS_API_KEY, skip_non_songs=False, 
                retries=5, remove_section_headers=True, 
                sleep_time=0.5)
    
LASTFM_API_KEY = os.getenv("LASTFM_API_KEY")
if LASTFM_API_KEY is None:
    raise ValueError("LASTFM_API_KEY cannot be found")

In [10]:
# Gathering top tracks of desired genres
BASE_URL = "http://ws.audioscrobbler.com/2.0/"
GENRES = ["pop", "rock", "electronic", "country",
          "religious", "metal", "hip hop", "jazz",
          "R&B"] # Derived from musicmap.info
raw_data = {}


def getTopTracks(genre, limit=10, key=0):    
    params = {
        "method": "tag.gettoptracks",
        "tag": genre,
        "api_key": LASTFM_API_KEY,
        "format": "json",
        "limit": limit
    }
    
    response = requests.get(BASE_URL, params=params)
    data = response.json()

    # Parsing results
    for track in data["tracks"]["track"]:
        raw_data[key] = {
            "name": track["name"],
            "artist": track["artist"]["name"],
            "genre": genre,
            "duration": track["duration"]
        }
        key += 1
    return key

In [None]:
# Populating dataset
SONGS_PER_GENRE =  # 10 Genres

key = 0
for genre in GENRES:
    key = getTopTracks(genre, SONGS_PER_GENRE, key)

In [25]:
# Gathering lyrics for songs
for track in raw_data:
    song = genius.search_song(raw_data[track]["name"], artist=raw_data[track]["artist"])
    if hasattr(song, "lyrics"):
        raw_data[track]["lyrics"] = song.lyrics
    else:
        raw_data[track]["lyrics"] = ""   

Searching for "Gymnopédie No. 1" by Erik Satie...
Done.
Searching for "Clair de lune" by Claude Debussy...
Done.
Searching for "Clair de lune, L. 32" by Claude Debussy...
No results found for: 'Clair de lune, L. 32 Claude Debussy'
Searching for "Carnival of the Animals: The Swan" by Camille Saint-Saëns...
No results found for: 'Carnival of the Animals: The Swan Camille Saint-Saëns'
Searching for "Dies irae" by Giuseppe Verdi...
Done.
Searching for "Symphony No. 5 in C Minor, Op. 67: I. Allegro con brio" by Ludwig van Beethoven...
Done.
Searching for "Adagio for Strings, Op. 11" by Samuel Barber...
Done.
Searching for "The Planets, Op. 32: IV. Jupiter, the Bringer of Jollity" by Gustav Holst...
No results found for: 'The Planets, Op. 32: IV. Jupiter, the Bringer of Jollity Gustav Holst'
Searching for "Für Elise" by Ludwig van Beethoven...
Done.
Searching for "Air on a G String" by Johann Sebastian Bach...
Done.


In [20]:
json_data = json.dumps(raw_data, indent=4)
with open("sample_data2.json", "w") as f:
    f.write(json_data)