# Spotify Music Recommendation Model

> Modeling recommendation relationships between spotify tracks using the Million Playlist dataset and graph theory.

## Links

<ul>
    <li><a href="https://www.kaggle.com/datasets/himanshuwagh/spotify-million">Million Playlist Dataset</a></li>
</ul>

In [1]:
import numpy as np
import os
import json
import rustworkx as rx
import pickle

In [2]:
%%time
# loading data

files = os.listdir('./data')

uriToData = {}
allPlaylists = np.full((11_000, 70), -1, dtype='int32')
counter = 0

for file in files:
    if file == '.DS_Store':
        continue
    with open(f"./data/{file}") as f:
        d = json.load(f)
        playlists = d["playlists"]
        for i in range(len(playlists)):
            playlist = playlists[i]["tracks"]
            for j in range(len(playlist)):
                if j >= 70:
                    break
                playlist[j]["track_uri"] = playlist[j]["track_uri"].split(':')[2]
                if playlist[j]["track_uri"] not in uriToData:
                    uriToData[playlist[j]["track_uri"]] = (len(uriToData), playlist[j]["track_name"])


for file in files:
    if file == '.DS_Store':
        continue
    with open(f"./data/{file}") as f:
        d = json.load(f)
        playlists = d["playlists"]
        for i in range(len(playlists)):
            playlist = playlists[i]["tracks"]
            for j in range(len(playlist)):
                if j >= 70:
                    break
                allPlaylists[i + counter, j] = uriToData[playlist[j]["track_uri"].split(':')[2]][0]
    counter += 1000

CPU times: user 3.29 s, sys: 211 ms, total: 3.5 s
Wall time: 3.5 s


In [3]:
%%time
graph = rx.PyGraph(multigraph=False)

for k, v in uriToData.items():
    graph.add_node(k)

for i in range(11_000):
    playlist = allPlaylists[i, :]
    for j in range(70):
        if playlist[j] == -1:
            break
        for k in range(j + 1, 70):
            if playlist[k] == -1:
                break
            if graph.has_edge(playlist[j], playlist[k]):
                graph.update_edge(playlist[j], playlist[k], graph.get_edge_data(playlist[j], playlist[k]) + 1)
            else:
                graph.add_edge(playlist[j], playlist[k], 1)
        

CPU times: user 2min 9s, sys: 1 s, total: 2min 10s
Wall time: 2min 10s


# Creating an API for our Model

Creating a few functions to easily get recommendations from our adjacency matrix and print recommendations to the console.

In [4]:
# functions for interfacing with adjacency matrix and maps

def get_uri(url):
    parts = url.split('/')
    return parts[len(parts) - 1]

def get_recommendations(url, numRecs=5):
    try:
        trackIdx = uriToData[get_uri(url)][0]
    except KeyError:
        print("song not found in database")
        return None
    recommendations = sorted(dict(graph.incident_edge_index_map(trackIdx)).items(), key=lambda x: x[1][2], reverse=True)
    result = []
    for i in range(len(recommendations)):
        if i >= numRecs:
            break
        result.append((uriToData[graph[recommendations[i][1][1]]][1], recommendations[i][1][2]))
    return result

def print_recommendations(recs):
    if recs is None:
        print("no recommendations")
        return
    for recSong, numAprs in recs:
        print(f"{recSong} - {numAprs}")

In [7]:
%%time

# Fluorescent Adolescent by Arctic Monkeys
recommendations = get_recommendations("https://open.spotify.com/track/7e8utCy2JlSB8dRHKi49xM", numRecs=10)

CPU times: user 4.01 ms, sys: 1.54 ms, total: 5.55 ms
Wall time: 4.29 ms


In [8]:
print_recommendations(recommendations)

Do I Wanna Know? - 16
Come a Little Closer - 14
1901 - 10
Why'd You Only Call Me When You're High? - 10
Arabella - 9
A-Punk - 9
Take Me Out - 9
R U Mine? - 8
505 - 8
Someday - 8


In [14]:
with open('./web/recommendation_graph.pkl', 'wb+') as f:
    pickle.dump(graph, f)

with open('./web/uriToData.pkl', 'wb+') as f:
    pickle.dump(uriToData, f)
