In [1]:
import requests
import pandas as pd

from pymongo import MongoClient

from config import genius_access_token

### Connecting to MongoDB

In [4]:
client = MongoClient()
db = client.genius
db.list_collection_names()

['song_urls', 'song_lyrics', 'artist_ids']

## Get Artist IDs

In [5]:
cursor = db.artist_ids.find({}, {'_id': 0, 'artist_id': 1})
artist_ids = pd.DataFrame(list(cursor))
artist_ids = artist_ids.artist_id.values.tolist()
artist_ids[:5]

[358098, 63636, 1602422, 353058, 21765]

## Write to mongoDB

In [6]:
def get_json(headers, artist_id, page_num):
    """
    Takes the header, artist ID and page number as inputs,
    generated urls and requests the json for that url.
    """

    url = ("http://api.genius.com/artists/{}/songs?page={}"
           .format(artist_id, page_num))

    resp = requests.get(url, headers=headers)
    resp.raise_for_status()
    json = resp.json()
    return json

In [9]:
def save_song_info_to_mongoDB(json, artist_id):

    """
    Takes the json and artist ID and saves
    the information to a mongoDB.
    """

    song_list = json['response']['songs']

    for song in song_list:
        song_info = {}
        song_info['artist_id'] = artist_id
        features = ['title', 'id', 'url']

        for feature in features:
            song_info[feature] = song[feature]

        db.song_urls.insert_one(song_info)

In [8]:
def get_artist_ids(artist_ids, access_token, headers):

    """
    Takes artists IDs, api access token, and headers as inputs.
    Calls the get_json function which calls for the information
    for Genius, then uses the save_song_info_to_mongoDB function
    to save the information.
    """

    for artist_id in artist_ids:
        page_num = 1
        json = get_json(headers, artist_id, page_num)
        save_song_info_to_mongoDB(json, artist_id)

        while json['response']['next_page']:
            page_num += 1
            json = get_json(headers, artist_id, page_num)
            save_song_info_to_mongoDB(json, artist_id)

In [10]:
access_token = genius_access_token
headers = {
         "Accept": "application/json",
         "Content-Type": "application/json",
         "Authorization": "Bearer " + access_token
        }

In [11]:
get_artist_ids(artist_ids, access_token, headers)