# Scraping Spotify Data for All Songs from All Albums by a Producer

## End Goal:

Take All Pages in Wikipedia's [Category:Albums by producer](https://en.wikipedia.org/wiki/Category:Albums_by_producer) and scrape all album info and then call featurized audio from Spotify's API.


### Goal for Today (March 28, 2019):

Take a page like Wikipedia's [Category:Albums produced by Rick Rubin](https://en.wikipedia.org/wiki/Category:Albums_produced_by_Rick_Rubin) and return featurized audio from every song on every album in the list.

In [83]:
# Standard Imports

import numpy as np
import pandas as pd
import os
import sys
from collections import defaultdict
from importlib import reload
from bs4 import BeautifulSoup
import requests
from time import sleep
import time

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'


# Load MongoDB

from pymongo import MongoClient
client = MongoClient()
# Access/Initiate Database
db = client['producer_db']
# Access/Initiate Table
tab = db['songs']
collection = db.tab

# Authorize Spotify API

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = os.environ['SPOTIFY_CLIENT_ID']
client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [84]:
def get_category_links(wiki_url):
    """
    Takes a link to a category Wikipedia page and returns a list of urls to the hyperlinks
    """
    
    wiki_urls = []
    domain = 'https://en.wikipedia.org'
    
    html = requests.get(wiki_url).content
    soup = BeautifulSoup(html, 'html.parser')
    
    wiki_links = soup.find_all('div', class_="mw-category")[0].find_all('a')
    for link in wiki_links:
        path = link['href']
        url = domain + path
        wiki_urls.append(url)
        
    # check for a "next page" button
    next_page_url = ''
    if soup.find_all('div', {'id':"mw-subcategories"}):
        next_page_links = soup.find_all('div', {'id':"mw-subcategories"})[0].find_all('a') #next page like will be within the first 5 links
        for link in next_page_links:
            if link.text == 'next page':
                next_page_path = link['href']
                next_page_url = domain + next_page_path
        
    # Append links from next pages recursively
    if next_page_url: 
        print('getting links from {}'.format(next_page_url))
        next_page_wiki_urls = get_category_links(next_page_url)
        wiki_urls = wiki_urls + next_page_wiki_urls
    
    
    return wiki_urls

In [15]:
wiki_url2 = 'https://en.wikipedia.org/wiki/Category:Albums_by_producer'
links = get_category_links(wiki_url2)
len(links)

getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Bellotte%2C+Pete%0AAlbums+produced+by+Pete+Bellotte#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Chess%2C+Leonard%0AAlbums+produced+by+Leonard+Chess#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Dj+Premier%0AAlbums+produced+by+DJ+Premier#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Frayne%2C+George%0AAlbums+produced+by+George+Frayne#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Hayton%2C+Lennie%0AAlbums+produced+by+Lennie+Hayton#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Juicy+J%0AAlbums+produced+by+Juicy+J#mw-subcategories
gett

2581

In [16]:
links[:10]

['https://en.wikipedia.org/wiki/Category:Albums_produced_by_4th_Disciple',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_8Ball_%26_MJG',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_9th_Wonder',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_The_45_King',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_88-Keys',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_Kenny_Aaronson',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_Jim_Abbiss',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_Dave_Abbruzzese',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_Daniel_Abraham_(record_producer)',
 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_Josh_Abraham']

In [17]:
wiki_url = 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_Rick_Rubin'
rr_albums = get_category_links(wiki_url)

In [18]:
rr_albums

['https://en.wikipedia.org/wiki/12_Songs_(Neil_Diamond_album)',
 'https://en.wikipedia.org/wiki/13_(Black_Sabbath_album)',
 'https://en.wikipedia.org/wiki/21_(Adele_album)',
 'https://en.wikipedia.org/wiki/All_World_2',
 'https://en.wikipedia.org/wiki/All_World:_Greatest_Hits',
 'https://en.wikipedia.org/wiki/American_IV:_The_Man_Comes_Around',
 'https://en.wikipedia.org/wiki/American_Grafishy',
 'https://en.wikipedia.org/wiki/American_III:_Solitary_Man',
 'https://en.wikipedia.org/wiki/American_Recordings_(album)',
 'https://en.wikipedia.org/wiki/American_V:_A_Hundred_Highways',
 'https://en.wikipedia.org/wiki/American_VI:_Ain%27t_No_Grave',
 'https://en.wikipedia.org/wiki/Amethyst_Rock_Star',
 'https://en.wikipedia.org/wiki/Angus_%26_Julia_Stone_(album)',
 'https://en.wikipedia.org/wiki/Antennas_to_Hell',
 'https://en.wikipedia.org/wiki/Anthology:_Through_the_Years',
 'https://en.wikipedia.org/wiki/Armed_Love',
 'https://en.wikipedia.org/wiki/Artpop',
 'https://en.wikipedia.org/wiki/

In [19]:
wiki_url2 = 'https://en.wikipedia.org/wiki/Category:Albums_by_producer'
links = get_category_links(wiki_url2)

getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Bellotte%2C+Pete%0AAlbums+produced+by+Pete+Bellotte#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Chess%2C+Leonard%0AAlbums+produced+by+Leonard+Chess#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Dj+Premier%0AAlbums+produced+by+DJ+Premier#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Frayne%2C+George%0AAlbums+produced+by+George+Frayne#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Hayton%2C+Lennie%0AAlbums+produced+by+Lennie+Hayton#mw-subcategories
getting links from https://en.wikipedia.org/w/index.php?title=Category:Albums_by_producer&subcatfrom=Juicy+J%0AAlbums+produced+by+Juicy+J#mw-subcategories
gett

In [20]:
len(links)

2581

Cool! We now have a way to get lists of albums. Can Spotify give us a list of songs in an album?

In [21]:
album_id = sp.search(q='album:just in capes',type='album')['albums']['items'][0]['id']
for song in sp.album_tracks(album_id)['items']:
    print(song['name'], '\t|\t', song['id'])

Drumming Song 	|	 3DQGus4N9O9lk343trfNno
Emotions 	|	 2yrHacdhmKCntWO8pdfj6t
Cry Me a River 	|	 1rj6DjmbzgIj5muwrRvoX5
Renegade 	|	 2UlAJ40lg23zpfAF2fMhLL
Robots 	|	 4y6RREsxp7FFI0TWvDqkGm
Valerie 	|	 7r9CtQbliORexZwbOMeDgO
Gone 	|	 6SYptf8cpTTsVmmI0GWj2F
Forget You 	|	 0IqTlV1tMqAG7fhnv4xRcZ
Mashup (Love the Way You Lie/Dynamite/Teenage Dream) 	|	 1lZW1mmm6cQlHOG89Uoj4s
Sparkling Diamonds 	|	 12SEa6vgBhlpgwM8F91GQs


Affirmative.

Two more things to do

- Write code that takes an Album page and returns the Album Name and the Artist Name
- Write code that takes an Album and Artist name and puts the audio analysis into the mongo DB (without duplicates)

In [33]:
# Find Album Name

alb_url = 'https://en.wikipedia.org/wiki/Peasants,_Pigs_%26_Astronauts'
alb_html = requests.get(alb_url).content
alb_soup = BeautifulSoup(alb_html, 'html.parser')
alb_name = alb_soup.find_all('th', class_='summary album')[0].text
print(alb_name)

Peasants, Pigs & Astronauts


In [34]:
# Can spotify's query handle the comma and ampersand?

album_id = sp.search(q='album:{}'.format(alb_name),type='album')['albums']['items'][0]['id']
for song in sp.album_tracks(album_id)['items']:
    print(song['name'], '\t|\t', song['id'])
    
# That's a 10-4, roger.

Great Hosannah 	|	 4vxsWMxEMbmAPoCVdtUysm
Mystical Machine Gun 	|	 1to0i1rS29pUCdKQjDdMIF
S.O.S. 	|	 39cnQvXueMrwRkBsovBuxW
Radhe Radhe 	|	 1E7wdPm8dsZ6Tz3QjzEnUb
I'm Still Here 	|	 420bdNzSzj0EQI14yiLo0r
Shower Your Love - With Shenai 	|	 3SbocF4Hfqk10f3ZFTkfV8
108 Battles (Of The Mind) 	|	 2m6OoG5j12fu8vnLmNASyZ
Sound Of Drums 	|	 2GBVHsfTYgIT4x5yhToC3S
Timeworm 	|	 0fdNoDw9W5bu1rTiwgBWv8
Last Farewell 	|	 1kJHW1e87MkPhCadtPcfHX
Golden Avatar 	|	 3W0ttwJzW5XSVqO6pmCuWr
Namami Nanda-Nandana 	|	 2LF6nAg7CifHtL3OlYOunG
Stotra 	|	 5WGP42OpPRLk5Jru6DfBnU


Here's some issues with the data and doing data collection this way.  From wikipedia (**emphasis** mine):

> Initial recording sessions for the album were produced by John Leckie but the band soon decided to bring in producers George Drakoulias and Rick Rubin instead.[1] Eventually **Drakoulias and Rubin were rejected by the band and Bob Ezrin was brought in to complete the album.[1] As a result of this, the production credits for the album's lead single, "Sound of Drums", name Drakoulias and Rubin as producers, while the rest of Peasants, Pigs & Astronauts is produced by Ezrin.** Like its predecessor, K, the album continues the band's hybrid of 1960s-style psychedelic rock, groovy indie pop, and Indian instrumentation, albeit with a more progressive rock slant than on previous releases.[1] Musically, many of the songs make use of Beatles-influenced psychedelic effects, swirling guitars, and Indian chants.[1] This musical eclecticism prompted the band themselves to refer to Peasants, Pigs & Astronauts as their "kitchen sink album".[2]

In [38]:
# Find Artist Name:

alb_url = 'https://en.wikipedia.org/wiki/Peasants,_Pigs_%26_Astronauts'
alb_html = requests.get(alb_url).content
alb_soup = BeautifulSoup(alb_html, 'html.parser')
art_name = alb_soup.find_all('div', class_='contributor')[0].text
art_name

'Kula Shaker'

As a reminder, the MongoDB data form is:

            
            new_entry = {'track':track,
                         'artist':artist,
                         'album':album,
                         'producer':producer,
                         'spotify_id':song_id,
                         'track_info':song_info,
                         'audio_analysis':song_analysis,
                         'audio_features':song_features}

In [85]:
"""Make a function that takes album_url and a spotipy object and returns the tuple 
(track, artist, album, producer, spotify_id, track_info, audio_analysis, Audio_features)
"""

def entry_from_wiki_album(album_url, sp, producer):
    album_html = requests.get(album_url).content
    alb_soup = BeautifulSoup(album_html, 'html.parser')
    
    album = alb_soup.find_all('th', class_='summary album')[0].text
    artist = alb_soup.find_all('div', class_='contributor')[0].text
    
    print()
    print('- '*10)
    print('Pinging Spotify for {} by {}...'.format(album,artist))
    print('- '*10)
    print()
    try:
        album_results = sp.search(q='album:{} artist:{}'.format(album, artist),type='album')
        album_id = album_results['albums']['items'][0]['id']

        album_output = []

        for song in sp.album_tracks(album_id)['items']:
            track = song['name']
            spotify_id = song['id']
            track_info = sp.track(spotify_id)
            audio_analysis = sp.audio_analysis(spotify_id)
            audio_features = sp.audio_features(spotify_id)

            album_output.append( (track, artist, album, producer, spotify_id, track_info, audio_analysis, audio_features) )

        return album_output
    

    except Exception as ex:
        print( ex )
        print(album_results)
        print()
        if album_results == None:
            sleep(60)
            sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
            try:
                results = entry_from_wiki_album(album_url, sp, producer)
                return results
            except:
                raise Exception('Album {} not scraped'.format(album_url))

In [43]:
album_url = 'https://en.wikipedia.org/wiki/Peasants,_Pigs_%26_Astronauts'

import time

start = time.time()

album_output = entry_from_wiki_album(album_url, sp, 'Rick Rubin')

end = time.time()

print("Time elapsed: {} seconds".format(end - start))

Time elapsed: 19.37245202064514 seconds


In [47]:
# album_output
sys.getsizeof(album_output)

192

In [5]:
def get_size(obj, seen=None):
    """Recursively finds size of objects"""
    size = sys.getsizeof(obj)
    if seen is None:
        seen = set()
    obj_id = id(obj)
    if obj_id in seen:
        return 0
    # Important mark as seen *before* entering recursion to gracefully handle
    # self-referential objects
    seen.add(obj_id)
    if isinstance(obj, dict):
        size += sum([get_size(v, seen) for v in obj.values()])
        size += sum([get_size(k, seen) for k in obj.keys()])
    elif hasattr(obj, '__dict__'):
        size += get_size(obj.__dict__, seen)
    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        size += sum([get_size(i, seen) for i in obj])
    return size

get_size(album_output)/(2**20)

NameError: name 'album_output' is not defined

One album is approximately 20Mb worth of data!

In [52]:
# Sanity check:

2583 * 20 * 20 / 1000 #number of gigabytes of data if I scrape everything. (Approximately)

1033.2

In [62]:
segments = get_size(album_output[0][6]['segments'])
song = get_size(album_output[0])
segments/song
#70% of a song's information is in the segments. Probably doesn't make sense to make the files smaller.

0.70400435665649

Now make a script that starts at the Lists of Albums and returns every album URL with producer name:


In [86]:
def scrape_album_list(producer_url):
    
    """
    INPUT: producer_url (wikipedia)
    OUTPUT: list of (album_url, producer) tuples
    """
    
    html = requests.get(producer_url).content
    soup = BeautifulSoup(html, 'html.parser')
    producer = soup.find_all('h1', {'id':"firstHeading"})[0].text.split('by ')[-1]
    print('Producer: {}'.format(producer))
    
    album_url_list = get_category_links(producer_url)
    
    output = list(zip( album_url_list, [producer]*len(album_url_list) ))
    
    return output

In [66]:
scrape_album_list('https://en.wikipedia.org/wiki/Category:Albums_produced_by_Rick_Rubin')

Producer: Rick Rubin


[('https://en.wikipedia.org/wiki/12_Songs_(Neil_Diamond_album)', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/13_(Black_Sabbath_album)', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/21_(Adele_album)', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/All_World_2', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/All_World:_Greatest_Hits', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/American_IV:_The_Man_Comes_Around',
  'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/American_Grafishy', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/American_III:_Solitary_Man', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/American_Recordings_(album)', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/American_V:_A_Hundred_Highways',
  'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/American_VI:_Ain%27t_No_Grave', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/Amethyst_Rock_Star', 'Rick Rubin'),
 ('https://en.wikipedia.org/wiki/Angus_%26_Julia_Stone_(album)', 'Rick Rubin'),
 ('https://en.wikipe

# Gameplan:

    Get producer_url
    scrape_album_list(producer_url)
    for every album:
        album_entry_list = entry_from_wiki_album(album url, sp, producer)
        for track in album_entry_list:
            update/upsert the database where spotify_id and producer match (some songs may have multiple producers)

In [87]:
# Load MongoDB

from pymongo import MongoClient
client = MongoClient()
# Access/Initiate Database
db = client['producer_db']
# Access/Initiate Table
tab = db['songs']
collection = db.tab

In [94]:
producer_urls = ['https://en.wikipedia.org/wiki/Category:Albums_produced_by_Rick_Rubin',
                 'https://en.wikipedia.org/wiki/Category:Albums_produced_by_George_Martin']

failed_albums = defaultdict(list)
interval = 5

for producer_url in producer_urls:
    album_list = scrape_album_list(producer_url)

    for i, (album_url, producer) in enumerate(album_list):
        sleep(interval)
        sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
        try:
            album_entry_list = entry_from_wiki_album(album_url, sp, producer)
            print('\nalbum_entry_list length: {}'.format(len(album_entry_list)))
            album_name = album_entry_list[i][2]
            print('-'*20)
            print('ALBUM: {} -- COMPLETE'.format(album_name))
            print('-'*20)
            print()
            for track, artist, album, producer, spotify_id, track_info, audio_analysis, audio_features in album_entry_list:
                myquery = { "producer": producer,
                            "spotify_id" : spotify_id }

                newvalues = { "$set": {'track':track,
                                       'artist':artist,
                                       'album':album,
                                       'producer':producer,
                                       'spotify_id':spotify_id,
                                       'track_info':track_info,
                                       'audio_analysis':audio_analysis,
                                       'audio_features':audio_features
                                      }
                            }

                collection.update_one(myquery, newvalues, upsert = True)

                print('\t\t{} by {}'.format(track,artist))
            print()
            
        except Exception as ex:
            failed_albums[producer].append((i,album_url,producer))
            print( ex )
            print()
            print('x'*20)
            print('ALBUM FAILURE: {}'.format(album_url))
            print('album_entry_list:')
            print(album_entry_list)
            print()
            print('x'*20)
            print()

Producer: Rick Rubin

- - - - - - - - - - 
Pinging Spotify for 12 Songs by Neil Diamond...
- - - - - - - - - - 


album_entry_list length: 12
--------------------
ALBUM: 12 Songs -- COMPLETE
--------------------

		Oh Mary by Neil Diamond
		Hell Yeah by Neil Diamond
		Captain Of A Shipwreck by Neil Diamond
		Evermore by Neil Diamond
		Save Me A Saturday Night by Neil Diamond
		Delirious Love by Neil Diamond
		I'm On To You by Neil Diamond
		What's It Gonna Be by Neil Diamond
		Man Of God by Neil Diamond
		Create Me by Neil Diamond
		Face Me by Neil Diamond
		We by Neil Diamond


- - - - - - - - - - 
Pinging Spotify for 13 by Black Sabbath...
- - - - - - - - - - 


album_entry_list length: 12
--------------------
ALBUM: 13 -- COMPLETE
--------------------

		End Of The Beginning by Black Sabbath
		God Is Dead? by Black Sabbath
		Loner by Black Sabbath
		Zeitgeist by Black Sabbath
		Age Of Reason by Black Sabbath
		Live Forever by Black Sabbath
		Damaged Soul by Black Sabbath
		Dear Fath

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Amethyst Rock Star by Saul Williams...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Angus & Julia Stone by Angus & Julia Stone...
- - - - - - - - - - 


album_entry_list length: 16
--------------------
ALBUM: Angus & Julia Stone -- COMPLETE
--------------------

		A Heartbreak by Angus & Julia Stone
		My Word For It by Angus & Julia Stone
		Grizzly Bear by Angus & Julia Stone
		Heart Beats Slow by Angus & Julia Stone
		Wherever You Are by Angus & Julia Stone
		Get Home by Angus & Julia Stone
		Death Defying Acts by Angus & Julia Stone
		Little Whiskey by Angus & Julia Stone
		From The Stalls by Angus & Julia Stone
		Other Things by Angus & Julia Stone
		Please You by Angus & Julia Stone
		Main Street by Angus & Julia Stone
		Crash And Burn by Angus & Julia Stone
		Do Without by Angus & Julia Stone
		All This Love by Angus & Julia Stone
		Roses by Angus & Julia Stone


- - - - - - - - - - 
Pinging Spotify for Antennas to Hell by Slipknot...
- - - - - - - - - - 


album_entry_list length: 36
--------------------
ALBUM: Antennas 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Audioslave by Audioslave...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Back & Fourth by Pete Yorn...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Ballbreaker by AC/DC...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for A Better Tomorrow by Wu-Tang Clan...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Beyond Magnetic by Metallica...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for The Black Album by Jay-Z...
- - - - - - - - - - 

list index out of range
{'albums': {'href': 'https://api.spotify.com/v1/search?query=album%3AThe+Black+Album+artist%3AJay-Z&type=album&offset=0&limit=10', 'items': [], 'limit': 10, 'next': None, 'offset': 0, 'previous': None, 'total': 0}}

object of type 'NoneType' has no len()

xxxxxxxxxxxxxxxxxxxx
ALBUM FAILURE: https://en.wikipedia.org/wiki/The_Black_Album_(Jay-Z_album)
album_entry_list:
None

xxxxxxxxxxxxxxxxxxxx


- - - - - - - - - - 
Pinging Spotify for Blood Sugar Sex Magik by Red Hot Chili Peppers...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Born Free by Kid Rock...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Born to Die: The Paradise Edition by Lana Del Rey...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for By the Way by Red Hot Chili Peppers...
- - - - - - - - - - 



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




- - - - - - - - - - 
Pinging Spotify for Californication by Red Hot Chili Peppers...
- - - - - - - - - - 



KeyboardInterrupt: 

I'm getting a new error:

```
    IOPub data rate exceeded.
    The notebook server will temporarily stop sending output
    to the client in order to avoid crashing it.
    To change this limit, set the config variable
    `--NotebookApp.iopub_data_rate_limit`.

    Current values:
    NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
    NotebookApp.rate_limit_window=3.0 (secs)
```

In [23]:
requests.get("https://api.spotify.com/v1/search?q=tania%20bowra&type=artist&client_id={}&client_secret={}&token_type=Bearer".format(client_id, client_secret)).content

b'{\n  "error": {\n    "status": 401,\n    "message": "No token provided"\n  }\n}'

In [28]:
payload = {'client_id':client_id,
           'client_secret':client_secret,
           'response_type':'code',
           'request_uri':'https://example.com/callback'}
requests.get("https://api.spotify.com/v1/search?q=one+love", params = payload)

<Response [401]>

In [35]:
print(requests.Session().get("https://accounts.spotify.com/authorize",  params = payload).content.decode())


<!DOCTYPE html>
<html lang="en" dir="ltr" ng-app="accounts" ng-csp ng-strict-di>
<head>
  <meta charset="utf-8">
  <title ng-bind="(&#39;loginTitle&#39; | localize) + &#39; - Spotify&#39;">Spotify</title>
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
  <base href="/">
  <link rel="icon" href="https://accounts.scdn.co/images/favicon.ace4d8543bbb017893402a1e9d1ac1fa.ico">
  <link href="https://accounts.scdn.co/css/index.b7c0c683a2e34523a979.css" media="screen" rel="stylesheet">

  <script defer src="https://accounts.scdn.co/js/index.b7c0c683a2e34523a979.js" sp-bootstrap></script>
  <meta ng-non-bindable sp-bootstrap-data='{"phoneFeatureEnabled":false,"user":false,"BON":["0","0",-849195668]}'>
</head>
<body ng-controller="LoginController">
  <div ng-include="template"></div>
</body>
</html>



In [89]:
# client_id = # Enter your client id here
# client_secret = # Enter your client secret here

grant_type = 'client_credentials'

#Request based on Client Credentials Flow from https://developer.spotify.com/web-api/authorization-guide/

#Request body parameter: grant_type Value: Required. Set it to client_credentials
body_params = {'grant_type' : grant_type}

url='https://accounts.spotify.com/api/token'

response=requests.post(url, data=body_params, auth = (client_id, client_secret)) 
print (response.content.decode().split('"')[3])

BQCuL1z9A92TuESfEf9fycHdXc4snvio1nFxyGMCDF8sPWMuSujpFEBPoPHS1EOfMRL0R3tghu-KtXSuDs0


In [41]:
response = requests.get("https://api.spotify.com/v1/search?q=one+love", data=body_params, auth = (client_id, client_secret)).content.decode()

In [91]:
# THIS WORKS FINALLY

# client_id = # Enter your client id here
# client_secret = # Enter your client secret here

grant_type = 'client_credentials'
#Request based on Client Credentials Flow from https://developer.spotify.com/web-api/authorization-guide/

#Request body parameter: grant_type Value: Required. Set it to client_credentials
body_params = {'grant_type' : grant_type}

url = 'https://accounts.spotify.com/api/token'

response = requests.post(url, data=body_params, auth = (client_id, client_secret)) 
access_token = response.content.decode().split('"')[3]

payload = {'client_id':client_id,
           'client_secret':client_secret,
           'response_type':'code',
           'request_uri':'https://example.com/callback',
           'access_token':access_token,
           'token_type':'Bearer'}
a = requests.get("https://api.spotify.com/v1/search?q=one+love&type=track", params = payload)
print((a.status_code))

200


In [42]:
from IPython.core.display import display, HTML
display(HTML(response))

In [80]:
def spam_spotify():
    # THIS WORKS FINALLY

    start = time.time()
    
    client_id = os.environ['SPOTIFY_CLIENT_ID']
    client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
    grant_type = 'client_credentials'
    body_params = {'grant_type' : grant_type}
    
    url = 'https://accounts.spotify.com/api/token'

    response = requests.post(url, data=body_params, auth = (client_id, client_secret)) 
    access_token = response.content.decode().split('"')[3]

    payload = {'client_id':client_id,
               'client_secret':client_secret,
               'response_type':'code',
               'request_uri':'https://example.com/callback',
               'access_token':access_token,
               'token_type':'Bearer'}
    a = requests.get("https://api.spotify.com/v1/search?q=one+love&type=track", params = payload)
    i = 0
    
    while a.status_code == 200:
        a = requests.get("https://api.spotify.com/v1/audio-analysis/3JIxjvbbDrA9ztYlNcp3yL", params = payload)
        i += 1
        if i%100 == 0:
            end = time.time()
            print (i, '\t Time Elapsed: {} seconds'.format(end - start))
            
    return a

In [81]:
response = spam_spotify()

100 	 Time Elapsed: 117.40444421768188 seconds
200 	 Time Elapsed: 239.66219520568848 seconds
300 	 Time Elapsed: 344.2402923107147 seconds
400 	 Time Elapsed: 448.9668791294098 seconds
500 	 Time Elapsed: 544.3206331729889 seconds
600 	 Time Elapsed: 643.7038731575012 seconds
700 	 Time Elapsed: 759.9725451469421 seconds
800 	 Time Elapsed: 860.5243241786957 seconds
900 	 Time Elapsed: 956.285539150238 seconds
1000 	 Time Elapsed: 1050.4048414230347 seconds
1100 	 Time Elapsed: 1146.1569411754608 seconds
1200 	 Time Elapsed: 1233.4902153015137 seconds
1300 	 Time Elapsed: 1336.8394391536713 seconds
1400 	 Time Elapsed: 1432.9915771484375 seconds
1500 	 Time Elapsed: 1525.8727941513062 seconds
1600 	 Time Elapsed: 1614.4290702342987 seconds
1700 	 Time Elapsed: 1733.32932138443 seconds
1800 	 Time Elapsed: 1839.305159330368 seconds
1900 	 Time Elapsed: 1943.6623454093933 seconds
2000 	 Time Elapsed: 2045.2463171482086 seconds
2100 	 Time Elapsed: 2145.29558300972 seconds
2200 	 Time El

In [82]:
response.content.decode()

'{\n  "error": {\n    "status": 401,\n    "message": "The access token expired"\n  }\n}'