In [1]:
import time

import requests

import pickle
from pymongo import MongoClient

from config import api_access_token

## Import and format artist names for API call

In [2]:
pickle_in = open('../Data/indie_pop_artists', 'rb')
artists = pickle.load(pickle_in)
pickle_in.close()

In [3]:
len(artists)

716

Replaces spaces with `%20` for API call

In [4]:
artists = [x.replace(' ', '%20') for x in artists]

### Connecting to MongoDB

In [5]:
client = MongoClient()
db = client.spotify
db.list_collection_names()

['track_features', 'tracks', 'artists', 'albums', 'track_analysis']

## Calling Search APIs and adding output to MongoDB 

In [29]:
def get_artist_info(json):
    """
    Takes in a JSON object and returns:
    artist_id
    artist_name
    artist_popularity
    artists_thumbnail
    num_followers
    genres
    artist_info_api_url
    uri
    artist_spotify_page

    Performs a try/except for each artist. Not all artists are within spotify.
    Then performs a try/except for each item so empty items are not stored.
    """

    artist_info = {}

    try:
        artist = json['artists']['items'][0]

        try:
            artist_info['artist_id'] = artist['id']
        except:
            pass
        try:
            artist_info['artist_name'] = artist['name']
        except:
            pass
        try:
            artist_info['artist_popularity'] = artist['popularity']
        except:
            pass
        try:
            artist_info['artists_thumbnail'] = artist['images'][-1]['url']
        except:
            pass
        try:
            artist_info['num_followers'] = artist['followers']['total']
        except:
            pass
        try:
            artist_info['genres'] = artist['genres']
        except:
            pass
        try:
            artist_info['artist_info_api_url'] = artist['href']
        except:
            pass
        try:
            artist_info['uri'] = artist['uri']
        except:
            pass
        try:
            artist_info['artist_spotify_page'] = artist['external_urls']['spotify']
        except:
            pass
    
    except:
        return False
    
    return artist_info

In [30]:
def query_artists(artists, access_token, headers):
    """Takes in a list of artists, an access token, and headers.
    Calls the 'get_artist_info' function and saves the output to a mongoDB.
    """

    for artist in artists:
        url = ("https://api.spotify.com/v1/search?q={}&type=artist"
               .format(artist))

        resp = requests.get(url, headers=headers)
        print(resp.raise_for_status())
        json = resp.json()

        entry = get_artist_info(json)
        if entry:
            db.artists.insert_one(entry)
        else:
            pass

        time.sleep(.1)

    return

In [31]:
access_token = api_access_token
headers = {
         "Accept": "application/json",
         "Content-Type": "application/json",
         "Authorization": "Bearer " + access_token
        }

In [None]:
query_artists(artists[4:], access_token, headers)