This notebook serves as a playground for experimenting with data science using Python and Spotify APIs. This one focuses on the a user logging in and extracting a user's data

First we install a few necessary libraries

In [1]:
!pip install -q numpy
!pip install -q pandas
!pip install requests



Next, we are using a spotify API client made using a tutorial found here: [video](https://www.youtube.com/watch?v=xdq6Gz33khQ) [GitHub](https://github.com/codingforentrepreneurs/30-Days-of-Python/blob/master/tutorial-reference/Day%2019/notebooks/spotify_client.py)

This class has been altered to add more access functions, and in this notebook, added the ability for a user to login.

In [2]:
import base64
import datetime
from urllib.parse import urlencode

import requests

#Follows Spotify's "Implicit Grant" Authorization flow
class SpotifyAPI(object):
    access_token = None
    access_token_expires = datetime.datetime.now()
    access_token_did_expire = True
    client_id = None
    client_secret = None
    token_url = "https://accounts.spotify.com/api/token"
    authorization_url = "https://accounts.spotify.com/authorize"
    
    #external functions ("public")
    

    def __init__(self, client_id, client_secret, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.client_id = client_id
        self.client_secret = client_secret

    def get_album(self, _id):
        return self.get_resource(_id, resource_type='albums')
    
    def get_artist(self, _id):
        return self.get_resource(_id, resource_type='artists')

    def get_playlist(self, _id):
        return self.get_resource(_id, resource_type='playlists')

    def get_playlist_tracks(self, _id):
        return self.get_resource(_id, resource_type='playlists', addendum='tracks')

    def get_track_features(self, _id):
        return self.get_resource(_id, resource_type='audio-features')

    def get_track_analysis(self, _id):
      return self.get_resource(_id, resource_type='audio-analysis') 

    def search(self, query=None, operator=None, operator_query=None, search_type='artist', limit=20 ):
        if query == None:
            raise Exception("A query is required")
        if isinstance(query, dict):
            query = " ".join([f"{k}:{v}" for k,v in query.items()])
        if operator != None and operator_query != None:
            if operator.lower() == "or" or operator.lower() == "not":
                operator = operator.upper()
                if isinstance(operator_query, str):
                    query = f"{query} {operator} {operator_query}"
        query_params = urlencode({"q": query, "type": search_type.lower(), "limit": str(limit)})
        return self.base_search(query_params)


    #internal functions ("private")


    def get_client_credentials(self):
        """
        Returns a base64 encoded string
        """
        client_id = self.client_id
        client_secret = self.client_secret
        if client_secret == None or client_id == None:
            raise Exception("You must set client_id and client_secret")
        client_creds = f"{client_id}:{client_secret}"
        client_creds_b64 = base64.b64encode(client_creds.encode())
        return client_creds_b64.decode()
    
    def get_token_headers(self):
        client_creds_b64 = self.get_client_credentials()
        return {
            "Authorization": f"Basic {client_creds_b64}"
        }
    
    def get_token_data(self):
        return {
            "grant_type": "client_credentials"
        } 
    
    def perform_auth(self):
        token_url = self.token_url
        token_data = self.get_token_data()
        token_headers = self.get_token_headers()
        r = requests.post(token_url, data=token_data, headers=token_headers)
        if r.status_code not in range(200, 299):
            raise Exception("Could not authenticate client.")
            # return False
        data = r.json()
        now = datetime.datetime.now()
        access_token = data['access_token']
        expires_in = data['expires_in'] # seconds
        expires = now + datetime.timedelta(seconds=expires_in)
        self.access_token = access_token
        self.access_token_expires = expires
        self.access_token_did_expire = expires < now
        return True
    
    def get_access_token(self):
        token = self.access_token
        expires = self.access_token_expires
        now = datetime.datetime.now()
        if expires < now:
            self.perform_auth()
            return self.get_access_token()
        elif token == None:
            self.perform_auth()
            return self.get_access_token() 
        return token
    
    def get_resource_header(self):
        access_token = self.get_access_token()
        headers = {
            "Authorization": f"Bearer {access_token}"
        }
        return headers
        
        
    def get_resource(self, lookup_id, resource_type='albums', version='v1', addendum=''):
        endpoint = f"https://api.spotify.com/{version}/{resource_type}/{lookup_id}/{addendum}"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()
    
    
    
    def base_search(self, query_params): # type
        headers = self.get_resource_header()
        endpoint = "https://api.spotify.com/v1/search"
        lookup_url = f"{endpoint}?{query_params}"
        r = requests.get(lookup_url, headers=headers)
        if r.status_code not in range(200, 299):  
            return {}
        return r.json()

    #new functions for user functions

    def get_current_user(self):
        endpoint = f"https://api.spotify.com/v1/me"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()

    def get_current_user_recently_played(self):
        endpoint = f"https://api.spotify.com/v1/me/player/recently-played"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()

    def get_current_user_top(self, top_type='tracks', time_range='medium_term'):
        #endpoint = f"https://api.spotify.com/v1/me/top/{top_type}?limit=50&time_range{time_range}"
        endpoint = f"https://api.spotify.com/v1/me/top/{top_type}?time_range={time_range}&limit=50"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            print(r.status_code)
            return {}
        return r.json()

    def get_current_user_top_amount(self, top_type='tracks', time_range='medium_term', amount=50):
        headers = self.get_resource_header()
        received_resources = 0
        return_list = []
        limit = amount % 50
        while received_resources < amount:
          endpoint = f"https://api.spotify.com/v1/me/top/{top_type}?time_range={time_range}&limit={str(limit)}&offset={received_resources}"
          r = requests.get(endpoint, headers=headers)
          if r.status_code not in range(200, 299):
              return {}
          return_list.append(r.json()['items'])

    def get_user(self, user):
        endpoint = f"https://api.spotify.com/v1/users/{user}"
        headers = self.get_resource_header()
        r = requests.get(endpoint, headers=headers)
        if r.status_code not in range(200, 299):
            return {}
        return r.json()

import webbrowser
import json

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import math

#paste your own client credentials below
client_id = ""
client_secret = ""

scopes = ['user-read-email', 'user-read-private', 'playlist-read-collaborative', 'playlist-modify-public', 'playlist-read-private', 'playlist-modify-private', 'user-library-read', 'user-top-read', 'user-read-recently-played', 'user-read-playback-state', 'user-read-currently-playing']
scopes_string = ' '.join(scopes)
scopes_string

authorization_url = "https://accounts.spotify.com/authorize"


params = {
            "client_id": client_id,
            "response_type": "token",
            "redirect_uri": "http://localhost:7777/callback",
            "scope": scopes_string
        } 
r = requests.get(authorization_url, params)
print("Successful if this is 200: " + str(r.status_code))
print("URL to paste in browser: " + str(r.url))


Successful if this is 200: 200
URL to paste in browser: https://accounts.spotify.com/login?continue=https%3A%2F%2Faccounts.spotify.com%2Fauthorize%3Fscope%3Duser-read-email%2Buser-read-private%2Bplaylist-read-collaborative%2Bplaylist-modify-public%2Bplaylist-read-private%2Bplaylist-modify-private%2Buser-library-read%2Buser-top-read%2Buser-read-recently-played%2Buser-read-playback-state%2Buser-read-currently-playing%26response_type%3Dtoken%26redirect_uri%3Dhttp%253A%252F%252Flocalhost%253A7777%252Fcallback%26client_id%3D5de0c3326eed4e17a4e2b226fc57a68f


Click the link above to authenticate with spotify. Enter the redirect url into the quotes after "response ="


In [3]:
#paste the entire url you are sent to here as a string(in quotes
response = ''
response_list_raw = response.split('&')

response_list = []
#hard coded extracting token and expires
for item in response_list_raw:
  response_list.append(item.split('='))

access_token = response_list[0][1]
expires_in = response_list[2][1]
print(access_token)
print(expires_in)

spotify_nouser = SpotifyAPI(client_id, client_secret)
spotify_user = SpotifyAPI(client_id, client_secret)

spotify_user.access_token = access_token
spotify_user.access_token_expires = datetime.datetime.now() + datetime.timedelta(seconds=3600)
spotify_user.access_token_did_expire = False

nu = spotify_nouser.search("Khalid", search_type='artist')

u = spotify_user.search("Khalid", search_type='artist')

print(spotify_nouser.access_token)
print(nu)
print(spotify_user.access_token)
print(u)

indices = {'resource_types':['tracks','artists'],'time_frames':['short_term','medium_term', 'long_term']}

raw_file_data = {}
for resource_type in indices['resource_types']:
  for time_frame in indices['time_frames']:
    index_string = resource_type + "_" + time_frame
    raw_file_data[index_string] = spotify_user.get_current_user_top(top_type=resource_type, time_range=time_frame)['items']

raw_file_data['profile'] = spotify_user.get_current_user()

file_name = raw_file_data['profile']['id'] + '.json'
with open(file_name, 'w') as outfile:
    json.dump(raw_file_data, outfile)

# Download the file.
from google.colab import files
files.download(file_name)

BQCe98n339gCIq4kwTFDlCcYgGJcwLkQAMiLEagderLv4xqg9WTrL_aewjInTxnIWn2hmTtpZQ05LA8T-X509rQObRkbz71m2tmHxsqQdwl2fYb0jlyC5LWTKnntrq6dIkZHF68XXQOtFwS0MYruIXO3-W_fwmZawWsCCPeNk8gAV-pSVt9F_nGrwe802WkuDnZrkEpl0zjwNWaPBCwyqt74Z6fpKEUDRJzBlu42owJc-WaevLtxOcXbkY8Z2PM2dfU
3600
BQCD7X4-PaX_IvwgnWTWQVqyyV3yhIa5xqnm5bUx2RaEXW9SwjBDMhbI2ugDb6r3bCccU4FOY9RXQueShBM
{'artists': {'href': 'https://api.spotify.com/v1/search?query=Khalid&type=artist&offset=0&limit=20', 'items': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6LuN9FCkKOj5PcnpouEgny'}, 'followers': {'href': None, 'total': 14116537}, 'genres': ['pop'], 'href': 'https://api.spotify.com/v1/artists/6LuN9FCkKOj5PcnpouEgny', 'id': '6LuN9FCkKOj5PcnpouEgny', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/ab6761610000e5eb012b37d6dec8872b18524f78', 'width': 640}, {'height': 320, 'url': 'https://i.scdn.co/image/ab67616100005174012b37d6dec8872b18524f78', 'width': 320}, {'height': 160, 'url': 'https://i.scdn.co/image/ab6761610000

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Your data should be downloaded to a .json file. Check if the data looks correct by running the snippet below.

In [4]:

count = 0
for resource_type in indices['resource_types']:
  for time_frame in indices['time_frames']:
    count = 0
    index_string = resource_type + "_" + time_frame
    print(index_string + '-> ')
    for resource in raw_file_data[index_string]:
      count += 1
      print(resource['name'] + ' : ' + str(count))
    print()


tracks_short_term-> 
Rot : 1
Sorbet : 2
Helix : 3
Nervous : 4
Oil & Water : 5
Feel : 6
Stag : 7
Transient : 8
Peach : 9
Vegan Cannibal : 10
Penance Permission : 11
At the Wheel : 12
Holding On : 13
Hanging By A Thread : 14
Oasis : 15
Seashell : 16
Pirates Don't Say "Yarr!" Anymore : 17
Greyblood : 18
Twisted Tongues : 19
The Dreamer : 20
The Veldt - 8 Minute Edit : 21
The Appetizer - Final Mix : 22
ABSOLUTELYCRANKINMYMF'INHOG : 23
You Didn't Start a Fire in My Heart, You Started It in My House! : 24
A Walk : 25
Violet : 26
I Go, Pt. Three : 27
lmho : 28
Circadian Deity : 29
Strobe : 30
God Damn : 31
Poplar Avenue : 32
Heal : 33
L.S.F.H.Q.P. : 34
Brand New Day : 35
Gambit : 36
Sri Vishnu Yantra : 37
Still Posi : 38
Roses : 39
Another Life : 40
Cheating the NASA Space Physical : 41
Querents : 42
Friends Suck : 43
Abstract Dynamics : 44
All Alone : 45
Cat Fantastic : 46
Homeward : 47
Duet : 48
nero : 49
5-HTP : 50

tracks_medium_term-> 
Gambit : 1
The Appetizer - Final Mix : 2
5-HTP : 3
S