## Grab album info for all Artists

In [55]:
import time
import os

import pandas as pd
import pickle

import requests
from pymongo import MongoClient

import spotipy

### Connecting to MongoDB

In [2]:
client = MongoClient()

In [3]:
db = client.spotify

In [4]:
db.list_collection_names()

['artists', 'albums']

In [51]:
cursor = db.artists.find({}, 
    {'_id':0, 'artist_name':1, 'artist_id':1, 'uri':1})
df = pd.DataFrame(list(cursor))
df.head()

Unnamed: 0,artist_id,artist_name,uri
0,7kAKqNxPBkfjgdHzUIdBtI,Suburban Kids With Biblical Names,spotify:artist:7kAKqNxPBkfjgdHzUIdBtI
1,163tK9Wjr9P9DmM0AVK7lm,Lorde,spotify:artist:163tK9Wjr9P9DmM0AVK7lm
2,6GktICKX3ztnngGl4WPCNV,Rhythm Spectacle,spotify:artist:6GktICKX3ztnngGl4WPCNV
3,14H7ag1wpQOsPPQJOD6Dqr,Zero 7,spotify:artist:14H7ag1wpQOsPPQJOD6Dqr
4,4wavWMHhIiUXxwCuWsHzXo,The Delgados,spotify:artist:4wavWMHhIiUXxwCuWsHzXo


In [6]:
df.shape[0]

698

## Removing non-queried artists

In [53]:
returned_artists = df.artist_name.tolist()

In [56]:
pickle_in = open('Data/indie_pop_artists','rb')
queried_artists = pickle.load(pickle_in)
pickle_in.close()

In [60]:
print(len(queried_artists))
print(len(returned_artists))
print(len(set(queried_artists).intersection(set(returned_artists))))

716
698
573


In [63]:
artists_df = pd.DataFrame(list(set(queried_artists).intersection(set(returned_artists))))
artists_df.columns = ['artist_name']
artists_df.head()

Unnamed: 0,artist_name
0,Basia Bulat
1,Amy Millan
2,Emma Pollock
3,Elliott Smith
4,Ladytron


In [64]:
df = df.merge(artists_df,on='artist_name')
df.head()

Unnamed: 0,artist_id,artist_name,uri
0,7kAKqNxPBkfjgdHzUIdBtI,Suburban Kids With Biblical Names,spotify:artist:7kAKqNxPBkfjgdHzUIdBtI
1,163tK9Wjr9P9DmM0AVK7lm,Lorde,spotify:artist:163tK9Wjr9P9DmM0AVK7lm
2,14H7ag1wpQOsPPQJOD6Dqr,Zero 7,spotify:artist:14H7ag1wpQOsPPQJOD6Dqr
3,4wavWMHhIiUXxwCuWsHzXo,The Delgados,spotify:artist:4wavWMHhIiUXxwCuWsHzXo
4,6aA1dl6HYftGka1fWjC5HB,Doleful Lions,spotify:artist:6aA1dl6HYftGka1fWjC5HB


In [72]:
df.uri.nunique()

573

## Calling Search APIs and adding output to MongoDB 

In [67]:
def get_album_info(album):
    """Takes in album info and returns a dictionary containing relevant information."""
    
    album_info = {}
    
    try:
        album_info['album_id'] = album['id']
    except:
        pass
    try:
        album_info['album_name'] = album['name']
    except:
        pass
    try:
        album_info['artist'] = album['artists'][0]['name']
    except:
        pass
    try:
        album_info['album_uri'] = album['uri']
    except:
        pass
    try:
        album_info['album_spotify_page'] = album['external_urls']['spotify']
    except:
        pass
    try:
        album_info['album_thumbnail'] = album['images'][1]['url']
    except:
        pass
    try:
        album_info['artist_id'] = album['artists'][0]['id']
    except:
        pass
    
    return album_info

In [68]:
def get_artist_albums(artist_uri_s, access_token):
    """Takes in an access token and a list of artist URIs and writes info for each of those artists' albums to a MongoDB."""
    
    sp = spotipy.Spotify(auth=access_token)
    
    for artist_uri in artist_uri_s:
        
        results = sp.artist_albums(artist_uri, album_type='album')
        albums = results['items']
        
        while results['next']:
            results = sp.next(results)
            albums.extend(results['items'])
            
        for album in albums:
            db.albums.insert_one(get_album_info(album))

In [69]:
access_token = "BQBcClUoLEaSmQIU4agSb36YUMsSGQMSPODqD--XzFSxy99HP3C2lcUlQywsom_eiNM5DwOGxym06gDUGU0tx9fTejpRZ5VV8P8YlGpgrVnch_Ga2aGDGm_9cCr9JlR8EQXQ_tRrdK64FQ"

In [73]:
uri_s = df.uri.values.tolist()
uri_s = list(set(uri_s))

In [75]:
get_artist_albums(uri_s, access_token)