In [None]:
# login to your Spotify account and create a new app.
# set a redirect url: IMPORTANT: this will be used to get our access code. e.g. https://google.com
# get your client_id and client_secret credentials
# we need these to authenticate ourselves to query the Spotify API.

In [None]:
CLIENT_ID = # type in your client_id here
CLIENT_SECRET = # type in your client_secret here
REDIRECT_URI = # type in the url you set here

In [None]:
CODE = # go to f"https://accounts.spotify.com/authorize?redirect_uri={REDIRECT_URI}&response_type=code&client_id={CLIENT_ID}&scope=user-library-read"
# and get the code from the params of the url you've been redirected to. you can visit this endpoint either through your browser or Postman.
# e.g. of the redirected url: https://www.google.com/?code={code}
# this is necessary because the endpoints we will be using require the user-library-read scope to deal with our personal Spotify data.

In [None]:
import base64
import requests

# all of this to get an access token with which we can query and retrieve our liked songs and our personal data
data = {
    'grant_type': 'authorization_code',
    'code': CODE,
    'redirect_uri': REDIRECT_URI
}
credentials = f'{CLIENT_ID}:{CLIENT_SECRET}'
encoded_credentials = base64.b64encode(credentials.encode('utf-8')).decode('utf-8')
headers = {
        'content-type': 'application/x-www-form-urlencoded',
        'Authorization': 'Basic ' + encoded_credentials
}
response = requests.post('https://accounts.spotify.com/api/token', headers=headers,data=data)
print(response.json())

In [None]:
ACCESS_TOKEN = response.json()['access_token'] # acquire the access token from the response

In [None]:
headers = {
    'Authorization': 'Bearer ' + ACCESS_TOKEN
}
response = requests.get('https://api.spotify.com/v1/me/tracks', headers=headers) # get all the songs you liked. set a limit if you want to control the 
# response received
res_dict = response.json()

In [None]:
print(res_dict)

In [None]:
print(res_dict['items'][0]['track']['id'])

In [None]:
# collecting song ids and song names
list_of_ids = []
list_of_names = []
for i in range(len(res_dict['items'])):
    list_of_ids.append(res_dict['items'][i]['track']['id'])
    list_of_names.append(res_dict['items'][i]['track']['name'])

In [None]:
# GET request: https://api.spotify.com/v1/audio-features
# building a list of ids as a string type for the param to be passed to the audio-features endpoint.
request_str = ""
for i in range(len(list_of_ids)):
    request_str += list_of_ids[i] + ","
request_str = request_str[:len(request_str)-1] + "," + list_of_ids[len(list_of_ids)-1]
print(request_str)

In [None]:
# getting a list of important features and their values for each song in your liked songs.
# some of these features include: tempo, danceability, energy, liveness, etc.
# we can use these audio features to cluster songs into playlists and further classify new ones.
headers = {
    'Authorization': 'Bearer ' + ACCESS_TOKEN
}

params = {
    'ids': request_str,
}

response = requests.get('https://api.spotify.com/v1/audio-features', params=params, headers=headers)
print(response.json())
audio_features = response.json()['audio_features']

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# get all the data into a Pandas DataFrame. drop irrelevant and categorical features so that we can work with numerical data while running the
# k-means clustering algorithm.
df = pd.DataFrame(audio_features)
df = df.drop('type', axis=1)
df = df.drop('uri', axis=1)
df = df.drop('track_href', axis=1)
df = df.drop('analysis_url', axis=1)
df = df.drop('time_signature', axis=1)
df['name'] = pd.DataFrame(list_of_names)
print(df.head())

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler

In [None]:
# segregating similar songs into three playlists
new_df = df.drop('id', axis=1)
new_df = new_df.drop('name', axis=1)
# normalize the features, this is important for clustering to ensure that a feature doesn't unintentionally dominate over others owing to its 
# higher range of values.
X = MinMaxScaler().fit_transform(new_df)
kmeans = KMeans(init="k-means++", n_clusters=3, random_state=42).fit(X)
df['kmeans'] = kmeans.labels_
print(df)

In [None]:
import requests
# getting a list of songs from an artist e.g. Billie Eilish and categorizing each song under a playlist
headers = {
    'Authorization': 'Bearer ' + ACCESS_TOKEN
}
# you can give the id of any artist whose tracks you wish to explore
artist_id = "6qqNVTkY8uBg9cP3Jd7DAH"
response = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}/top-tracks', headers=headers)
print(response.json())
new_songs = response.json()

In [None]:
# segregating clusters of songs into 3 playlists
# taking a look at each playlist, you can understand what mood each of these playlists represents.
# for e.g. you may notice that energetic songs fall under a certain playlist, soft and melancholic ones fall under another, etc.
cluster_1 = df[df['kmeans']==0]
cluster_2 = df[df['kmeans']==1]
cluster_3 = df[df['kmeans']==2]
print("Playlist #1:")
print(cluster_1)
print("Playlist #2:")
print(cluster_2)
print("Playlist #3:")
print(cluster_3)

In [None]:
# accumulating song names and ids for reference
list_of_ids = []
list_of_names = []
for i in range(len(new_songs['tracks'])):
    list_of_ids.append(new_songs['tracks'][i]['id'])
    list_of_names.append(new_songs['tracks'][i]['name'])

In [None]:
print(list_of_names)

In [None]:
# preparing query parameter
request_str = ""
for i in range(len(list_of_ids)):
    request_str += list_of_ids[i] + ","
request_str = request_str[:len(request_str)-1] + "," + list_of_ids[len(list_of_ids)-1]
print(request_str)

In [None]:
headers = {
    'Authorization': 'Bearer ' + ACCESS_TOKEN
}

params = {
    'ids': request_str,
}
# getting audio analysis features
response = requests.get('https://api.spotify.com/v1/audio-features', params=params, headers=headers)
print(response.json())
audio_features = response.json()['audio_features']

In [None]:
# removing features which weren't used while fitting our dataset: categorical and irrelevant ones.
test_df = pd.DataFrame(audio_features)
test_df['name'] = pd.DataFrame(list_of_names)
test_df['id'] = pd.DataFrame(list_of_ids)
cpy_df = test_df.drop('name', axis=1)
cpy_df = cpy_df.drop('id', axis=1)
cpy_df = cpy_df.drop('uri', axis=1)
cpy_df = cpy_df.drop('track_href', axis=1)
cpy_df = cpy_df.drop('type', axis=1)
cpy_df = cpy_df.drop('analysis_url', axis=1)
cpy_df = cpy_df.drop('time_signature', axis=1)
# scaling the testing dataset and using the previously trained model to predict the clusters for the new dataset.
cpy_df = MinMaxScaler().fit_transform(cpy_df)
test_df['kmeans'] = kmeans.predict(cpy_df)

In [None]:
# take a look at the categories the new songs fall under
print("New additions to Playlist #1: ")
print(test_df[test_df['kmeans']==0])
print("New additions to Playlist #2: ")
print(test_df[test_df['kmeans']==1])
print("New additions to Playlist #3: ")
print(test_df[test_df['kmeans']==2])

In [None]:
# visualizing the dataset in clusters
sns.pairplot(data = df, hue = 'kmeans')