In [2]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler

CLIENT_ID = ''
CLIENT_SECRET = ''
REDIRECT_URI = 'http://localhost:8889/callback'

# Spotify OAuth setup
sp_oauth = SpotifyOAuth(
    CLIENT_ID, CLIENT_SECRET, REDIRECT_URI,
    scope="user-library-read user-top-read"
)

token_info = sp_oauth.get_access_token(as_dict=False)
sp = spotipy.Spotify(auth=token_info)


In [21]:
playlist_id = '37i9dQZF1DWUKw1j740sGk'
results = sp.playlist_tracks(playlist_id, limit=50)

In [4]:
def get_user_tracks(sp, playlist_id, limit=50):
    try:
        results = sp.playlist_tracks(playlist_id, limit=limit)
        track_ids = [track['track']['id'] for track in results['items'] if track['track']]
        return track_ids
    except spotipy.exceptions.SpotifyException as e:
        print("Error fetching user's top tracks:", e)
        return []

playlist_id = '37i9dQZF1DWUKw1j740sGk'
user_tracks = get_user_tracks(sp, playlist_id)

['0uHrMbMv3c78398pIANDqR', '48lxT5qJF0yYyf2z4wB4xW', '7b8Z1GU2plJy3aASZTiolF', '4N1MFKjziFHH4IS3RYYUrU', '1WM80A5a4xDtlndjqjZQIv', '0JC2yPGYZsO9zR9Bss3Hi8', '7leW1Dmvs9A4oDh9i5Qwpz', '1EjIXKhNHI00ZLMRpS8iz8', '2GxrNKugF82CnoRFbQfzPf', '3vSn1frPgFcRXrjWOfhMLl', '4lZHP98G8lR47Xgo8gLU4X', '0peRfvtx1Je91FxQtii5DJ', '4QNpBfC0zvjKqPJcyqBy9W', '3FAclTFfvUuQYnEsptbK8w', '4hFLAgCvQdngAtIhb3KaVI', '627SOpaFBewVhekDAu5b8v', '63ZedTAVRCICh2CNLWnsPZ', '5wL82ZQsJAlAMHe9tB35FT', '6iNKBRV0D1ExbbHanODj1D', '0gBfLS4aqbrr1ZvVv8dqWB', '5yzeBE4O1NV5sMm0GzbKYn', '5UqIP6lsf7rbKaUv6Mrf0A', '3PWrBc88jjw3fbSvhZrNEj', '7ki0Q2nYTBhVxZvqLLDp5W', '0jcw8cJf3TNMZN0BXlueML', '5fqjgdj9SMB4Jjmyc9cY1J', '3bdlwCZm9O5LP33Smf3i5b', '5MzICMmxRwPqaRiQDBk0kZ', '6AI3ezQ4o3HUoP6Dhudph3', '7CijDWxEEbKq76O5rpyCEN', '3bFEd8QB2lxl4w6ZkoEevC', '7BRD7x5pt8Lqa1eGYC4dzj', '7pQkDJXFDCJUY0sSbbc8i9', '7qiEoVlFjb3KaytT2zgK1g', '5TswDbxMnxptgFTf6ZxvXQ', '7fzHQizxTqy8wTXwlrgPQQ', '7LJkGyL4vVrtn1mho7BmtA', '0XguQR7pMvTG1C6UlMPYN4', '3lMzT16MjA

In [5]:
results = sp.playlist_tracks(playlist_id, limit=50)
df = pd.DataFrame(results)
df.head()

Unnamed: 0,href,items,limit,next,offset,previous,total
0,https://api.spotify.com/v1/playlists/37i9dQZF1...,"{'added_at': '2024-05-23T22:00:00Z', 'added_by...",50,https://api.spotify.com/v1/playlists/37i9dQZF1...,0,,185
1,https://api.spotify.com/v1/playlists/37i9dQZF1...,"{'added_at': '2024-05-23T22:00:00Z', 'added_by...",50,https://api.spotify.com/v1/playlists/37i9dQZF1...,0,,185
2,https://api.spotify.com/v1/playlists/37i9dQZF1...,"{'added_at': '2024-05-23T22:00:00Z', 'added_by...",50,https://api.spotify.com/v1/playlists/37i9dQZF1...,0,,185
3,https://api.spotify.com/v1/playlists/37i9dQZF1...,"{'added_at': '2024-05-23T22:00:00Z', 'added_by...",50,https://api.spotify.com/v1/playlists/37i9dQZF1...,0,,185
4,https://api.spotify.com/v1/playlists/37i9dQZF1...,"{'added_at': '2024-05-23T22:00:00Z', 'added_by...",50,https://api.spotify.com/v1/playlists/37i9dQZF1...,0,,185


In [6]:
def get_track_details(sp, track_ids):
    track_details = []
    try:
        for i in range(0, len(track_ids), 50):
            batch = track_ids[i:i+50]
            tracks = sp.tracks(batch)
            for track in tracks['tracks']:
                if track:
                    track_name = track['name']
                    track_id = track['id']
                    artists = ', '.join(artist['name'] for artist in track['artists'])
                    album = track['album']['name']
                    track_details.append({
                        'Track': track_name,
                        'Artist': artists,
                        'Album': album,
                        'id': track_id
                    })
    except spotipy.exceptions.SpotifyException as e:
        print("Error fetching track details:", e)
    return track_details

In [22]:
track_details = get_track_details(sp, user_tracks)
tracks_df = pd.DataFrame(track_details)
tracks_df.head()

Unnamed: 0,Track,Artist,Album,id
0,Europapa,Joost,Europapa,0uHrMbMv3c78398pIANDqR
1,Pedro,"Jaxomy, Agatino Romero, Raffaella Carrà",Pedro,48lxT5qJF0yYyf2z4wB4xW
2,Friesenjung,"Ski Aggu, Joost, Otto Waalkes",denk mal drüber nach...,7b8Z1GU2plJy3aASZTiolF
3,My Love,"Route 94, Jess Glynne",My Love,4N1MFKjziFHH4IS3RYYUrU
4,Girls Just Wanna Have Some,Chromatics,Girls Just Wanna Have Fun,1WM80A5a4xDtlndjqjZQIv


In [23]:
def get_audio_features(sp, track_ids):
    features = sp.audio_features(tracks=track_ids)
    return features

audio_features = get_audio_features(sp, user_tracks)
print(audio_features[0])

{'danceability': 0.706, 'energy': 0.94, 'key': 11, 'loudness': -4.969, 'mode': 0, 'speechiness': 0.168, 'acousticness': 0.173, 'instrumentalness': 0.000349, 'liveness': 0.228, 'valence': 0.479, 'tempo': 160.068, 'type': 'audio_features', 'id': '0uHrMbMv3c78398pIANDqR', 'uri': 'spotify:track:0uHrMbMv3c78398pIANDqR', 'track_href': 'https://api.spotify.com/v1/tracks/0uHrMbMv3c78398pIANDqR', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0uHrMbMv3c78398pIANDqR', 'duration_ms': 160219, 'time_signature': 4}


In [24]:
def get_user_data(sp):
    top_tracks = get_user_tracks(sp, playlist_id)
    features = get_audio_features(sp, top_tracks)
    return top_tracks, features

user_tracks, user_features = get_user_data(sp)

Creating the dataset:

In [25]:
print(audio_features[0])

{'danceability': 0.706, 'energy': 0.94, 'key': 11, 'loudness': -4.969, 'mode': 0, 'speechiness': 0.168, 'acousticness': 0.173, 'instrumentalness': 0.000349, 'liveness': 0.228, 'valence': 0.479, 'tempo': 160.068, 'type': 'audio_features', 'id': '0uHrMbMv3c78398pIANDqR', 'uri': 'spotify:track:0uHrMbMv3c78398pIANDqR', 'track_href': 'https://api.spotify.com/v1/tracks/0uHrMbMv3c78398pIANDqR', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0uHrMbMv3c78398pIANDqR', 'duration_ms': 160219, 'time_signature': 4}


In [26]:
audio_df = pd.DataFrame(audio_features)
audio_df = audio_df.drop('type', axis=1)
audio_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.706,0.94,11,-4.969,0,0.168,0.173,0.000349,0.228,0.479,160.068,0uHrMbMv3c78398pIANDqR,spotify:track:0uHrMbMv3c78398pIANDqR,https://api.spotify.com/v1/tracks/0uHrMbMv3c78...,https://api.spotify.com/v1/audio-analysis/0uHr...,160219,4
1,0.788,0.936,9,-6.294,1,0.301,0.0229,1e-06,0.311,0.844,151.019,48lxT5qJF0yYyf2z4wB4xW,spotify:track:48lxT5qJF0yYyf2z4wB4xW,https://api.spotify.com/v1/tracks/48lxT5qJF0yY...,https://api.spotify.com/v1/audio-analysis/48lx...,144846,4
2,0.786,0.98,11,-4.677,0,0.116,0.0161,0.000228,0.204,0.899,161.017,7b8Z1GU2plJy3aASZTiolF,spotify:track:7b8Z1GU2plJy3aASZTiolF,https://api.spotify.com/v1/tracks/7b8Z1GU2plJy...,https://api.spotify.com/v1/audio-analysis/7b8Z...,146087,4
3,0.813,0.616,8,-7.571,1,0.0495,0.000132,0.705,0.0658,0.744,119.977,4N1MFKjziFHH4IS3RYYUrU,spotify:track:4N1MFKjziFHH4IS3RYYUrU,https://api.spotify.com/v1/tracks/4N1MFKjziFHH...,https://api.spotify.com/v1/audio-analysis/4N1M...,259934,4
4,0.667,0.629,0,-8.493,1,0.0324,0.175,0.116,0.0915,0.455,115.002,1WM80A5a4xDtlndjqjZQIv,spotify:track:1WM80A5a4xDtlndjqjZQIv,https://api.spotify.com/v1/tracks/1WM80A5a4xDt...,https://api.spotify.com/v1/audio-analysis/1WM8...,223237,4


In [27]:
music_df = pd.merge(tracks_df, audio_df, on='id')
music_df.head()

Unnamed: 0,Track,Artist,Album,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,uri,track_href,analysis_url,duration_ms,time_signature
0,Europapa,Joost,Europapa,0uHrMbMv3c78398pIANDqR,0.706,0.94,11,-4.969,0,0.168,0.173,0.000349,0.228,0.479,160.068,spotify:track:0uHrMbMv3c78398pIANDqR,https://api.spotify.com/v1/tracks/0uHrMbMv3c78...,https://api.spotify.com/v1/audio-analysis/0uHr...,160219,4
1,Pedro,"Jaxomy, Agatino Romero, Raffaella Carrà",Pedro,48lxT5qJF0yYyf2z4wB4xW,0.788,0.936,9,-6.294,1,0.301,0.0229,1e-06,0.311,0.844,151.019,spotify:track:48lxT5qJF0yYyf2z4wB4xW,https://api.spotify.com/v1/tracks/48lxT5qJF0yY...,https://api.spotify.com/v1/audio-analysis/48lx...,144846,4
2,Friesenjung,"Ski Aggu, Joost, Otto Waalkes",denk mal drüber nach...,7b8Z1GU2plJy3aASZTiolF,0.786,0.98,11,-4.677,0,0.116,0.0161,0.000228,0.204,0.899,161.017,spotify:track:7b8Z1GU2plJy3aASZTiolF,https://api.spotify.com/v1/tracks/7b8Z1GU2plJy...,https://api.spotify.com/v1/audio-analysis/7b8Z...,146087,4
3,My Love,"Route 94, Jess Glynne",My Love,4N1MFKjziFHH4IS3RYYUrU,0.813,0.616,8,-7.571,1,0.0495,0.000132,0.705,0.0658,0.744,119.977,spotify:track:4N1MFKjziFHH4IS3RYYUrU,https://api.spotify.com/v1/tracks/4N1MFKjziFHH...,https://api.spotify.com/v1/audio-analysis/4N1M...,259934,4
4,Girls Just Wanna Have Some,Chromatics,Girls Just Wanna Have Fun,1WM80A5a4xDtlndjqjZQIv,0.667,0.629,0,-8.493,1,0.0324,0.175,0.116,0.0915,0.455,115.002,spotify:track:1WM80A5a4xDtlndjqjZQIv,https://api.spotify.com/v1/tracks/1WM80A5a4xDt...,https://api.spotify.com/v1/audio-analysis/1WM8...,223237,4


Data Preprocessing:

In [28]:
music_df.isnull().sum()

Track               0
Artist              0
Album               0
id                  0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
uri                 0
track_href          0
analysis_url        0
duration_ms         0
time_signature      0
dtype: int64

In [29]:
#Normalization
features_to_scale = music_df[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']].values
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features_to_scale)
music_df[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']] = scaled_features

In [30]:
music_df.head()
music_df.shape

(50, 20)

In [31]:
scaled_features.shape
print(scaled_features[0])

[5.51724138e-01 9.41034898e-01 8.32362698e-01 2.55559744e-01
 2.60787970e-01 3.97042093e-04 2.15239336e-01 4.78447788e-01
 6.30420606e-01 3.06959514e-01]


CONTENT BASED RECOMMENDATION SYSTEM

Retrieves tracks from specific playlist and based on chosen track from such playlist, it returns 5 recommendation of similar track based on audio features.

In [32]:
def cb_recommend_tracks(track_id, music_df, scaled_features, n=5):
    input_song_index = music_df[music_df['id'] == track_id].index[0] #row
    #Computes the cosine similarity between the input song's feature vector and the feature vectors of all songs
    cosine_scores = cosine_similarity([scaled_features[input_song_index]], scaled_features)
    get_similar_ids = cosine_scores[0].argsort()[::-1][1:n + 1]
    #retrievs the details of the recommended songs
    content_based_recommendations = music_df.iloc[get_similar_ids][['Track', 'Artist', 'Album']]
    
    return content_based_recommendations

input_song_id = '3vSn1frPgFcRXrjWOfhMLl'
rec = cb_recommend_tracks(input_song_id, music_df, scaled_features)
rec.reset_index(drop=True, inplace=True)
rec.index = rec.index + 1 #since 1 will indicate the most similar and 5 lest similar from top5
rec.head()

Unnamed: 0,Track,Artist,Album
1,Rim Tim Tagi Dim,Baby Lasagna,Rim Tim Tagi Dim
2,Can't Fight This Feeling,"Sophie Ellis-Bextor, Junior Caldera",Make a Scene
3,Guantanamera (She's Hot),Pitbull,I Am Armando - Armando Reloaded
4,Give Me Everything (feat. Nayer),"Pitbull, AFROJACK, Ne-Yo, Nayer",Planet Pit (Deluxe Version)
5,Les Duele,Morad,Les Duele
