In [811]:
# Based on tutorial from https://machinelearninggeek.com/spotify-song-recommender-system-in-python/
import pandas as pd
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import preprocessing
import sys
import os
import random
import logging

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util

# Select random sample: https://stackoverflow.com/questions/22258491/read-a-small-random-sample-from-a-big-csv-file-into-a-python-data-frame
# filename = "./static/tracks.csv"
n = sum(1 for line in open(filename)) - 1 #number of records in file (excludes header)
s = 1000 #desired sample size
skip = sorted(random.sample(range(1,n+1),n-s)) #the 0-indexed header will not be included in the skip list
df = pd.read_csv(filename, skiprows=skip)

# df=pd.read_csv("./static/tracks.csv", nrows=40000)

df.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,6N1wS1e5bMOubmVtkDLNrt,Gimme a Pigfoot and a Bottle of Beer,16,208827,0,['Bessie Smith'],['5ESobCkc6JI4tIMxQttqeg'],1923,0.691,0.198,7,-11.164,0,0.0784,0.971,0.00888,0.154,0.558,103.901,4
1,5ibit3V15UeNAxP37fZ5Mz,Chapter 6.18 - Król Maciuś na wyspie bezludnej,0,133100,0,['Janusz Korczak'],['4hLtcEEpjCmBtTnDdzRV4J'],1923-12-27,0.718,0.273,1,-20.341,0,0.95,0.688,0.0,0.175,0.616,95.306,3
2,74Ksfz9rsbP16lykqJ95sv,Chapter 135 - The Unlit Lamp,0,181940,0,"['Radclyffe Hall', 'Alice White', 'Synthesized...","['2OJEUmIIPsTAUjil4dOz70', '35AgMyuKAQIoMJZEp3...",1924,0.615,0.344,7,-16.403,0,0.953,0.744,0.0,0.13,0.379,74.774,4
3,5gMFh4lraF1XCYruow1cAi,Cornet Chop Suey,1,175027,0,['Louis Armstrong & His Hot Five'],['0t4VVR2T9Sb0z3NdDdEU6S'],1925,0.628,0.278,5,-9.357,1,0.144,0.994,0.135,0.0617,0.77,97.268,4
4,1IHM1j84hSNc00dKp4rOZy,Smoke House Blues,2,191560,0,['Jelly Roll Morton'],['4XepUkisa56DUeA3gbjDQD'],1926,0.642,0.718,8,-5.48,1,0.0448,0.943,0.731,0.0869,0.868,123.522,4


In [802]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                1000 non-null   object 
 1   name              1000 non-null   object 
 2   popularity        1000 non-null   int64  
 3   duration_ms       1000 non-null   int64  
 4   explicit          1000 non-null   int64  
 5   artists           1000 non-null   object 
 6   id_artists        1000 non-null   object 
 7   release_date      1000 non-null   object 
 8   danceability      1000 non-null   float64
 9   energy            1000 non-null   float64
 10  key               1000 non-null   int64  
 11  loudness          1000 non-null   float64
 12  mode              1000 non-null   int64  
 13  speechiness       1000 non-null   float64
 14  acousticness      1000 non-null   float64
 15  instrumentalness  1000 non-null   float64
 16  liveness          1000 non-null   float64
 

In [803]:
feature_cols=['popularity','duration_ms','explicit','danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo','time_signature',]

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalized_df =scaler.fit_transform(df[feature_cols])

print(normalized_df[:2])

[[0.00000000e+00 1.08713271e-01 0.00000000e+00 4.90073145e-01
  3.04985891e-01 8.18181818e-01 6.09036900e-01 0.00000000e+00
  8.14002090e-02 9.95983927e-01 5.25000000e-05 3.50931677e-01
  6.36548223e-01 3.21700247e-01 8.00000000e-01]
 [0.00000000e+00 1.08685420e-01 0.00000000e+00 5.20376176e-01
  2.22984227e-01 9.09090909e-01 7.20144983e-01 1.00000000e+00
  7.97283177e-02 9.97991963e-01 3.61000000e-02 2.83643892e-01
  8.51776650e-01 9.51134495e-01 8.00000000e-01]]


# Spotify Credentials Setup #

In [804]:
client_id = os.getenv("SPOTIPY_CLIENT_ID")
client_secret = os.getenv("SPOTIPY_CLIENT_SECRET")
redirect_uri = os.getenv("SPOTIPY_REDIRECT_URI")

In [805]:
scope = 'user-library-read'

if len(sys.argv) > 1:
    username = sys.argv[1]
else:
    print("Usage: %s username" % (sys.argv[0],))
    sys.exit()

In [806]:
auth_manager = SpotifyClientCredentials(client_id='SPOTIPY_CLIENT_ID', client_secret='SPOTIPY_CLIENT_SECRET')
sp = spotipy.Spotify(auth_manager=auth_manager)

In [807]:
token = util.prompt_for_user_token(scope, client_id=client_id, client_secret=client_secret, redirect_uri='https://localhost:8888/callback/')

In [819]:
def show_recommendations_for_track(track):
    recommendations = pd.DataFrame(columns=['Track']['Artist'])
    results = sp.recommendations(seed_tracks=[track['id']])
    for track in results['tracks']:
      recommendations = [track['name'], track['artists'][0]['name']]
    recommendations.to_csv('./solutions/recommendations.csv')


In [821]:
scope = 'user-top-read'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

ranges = ['short_term', 'medium_term', 'long_term']
# attributes=['id','name','popularity','duration_ms','explicit','artists','id_artists','release_date','danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo','time_signature']

items = sp.current_user_top_tracks(time_range='short_term', limit=5)['items']

for i, item in enumerate(items):
    print(i, item['name'], '//', item['artists'][0]['name'])
    show_recommendations_for_track(item)
        

0 internet crush // Jeremy Zucker 4GISsjjHTA2UyM9DY4DNgY
Recommendation:  talk is overrated (stripped.) Jeremy Zucker
Recommendation:  12 Hours Chris James
Recommendation:  Miss Her (feat. Nicklas Sahl) Maximillian
Recommendation:  wonder if she loves me JVKE
Recommendation:  back to you Alexander Stewart
Recommendation:  Last Minute Sam MacPherson
Recommendation:  One Day Less Anson Seabra
Recommendation:  What If We Never Met Chris James
Recommendation:  fall in love JAWNY
Recommendation:  full stop Jeremy Zucker
Recommendation:  Love Me Like That NERIAH
Recommendation:  Walk Out That Door Ali Gatie
Recommendation:  Why My Love (Ain't Enough) Fly By Midnight
Recommendation:  When You Think Of Me JP Saxe
Recommendation:  Set For Life Noah Cyrus
Recommendation:  Soulmate Johnny Stimson
Recommendation:  Howling at the Moon Mike Posner
Recommendation:  Good For Me ROSIE
Recommendation:  Worst Day ILLENIUM
Recommendation:  Illusion Carlie Hanson
1 my fault // Zeph 36WabQN781ejYiSWKQAzDD
R

# Recommendation Time #

In [None]:
# Create a pandas series with song titles as indices and indices as series values 
indices = pd.Series(df.index, index=df['name']).drop_duplicates()

# Create cosine similarity matrix based on given matrix
cosine = cosine_similarity(normalized_df)

def generate_recommendation(song_title, model_type=cosine ):
    """
    Purpose: Function for song recommendations 
    Inputs: song title and type of similarity model
    Output: Pandas series of recommended songs
    """
    # Get song indices
    index=indices[song_title]
    # Get list of songs for given songs
    score=list(enumerate(model_type[indices["Carmen"]]))
    # Sort the most similar songs
    similarity_score = sorted(score,key = lambda x:x[1],reverse = True)
    # Select the top-10 recommended songs
    similarity_score = similarity_score[1:11]
    top_songs_index = [i[0] for i in similarity_score]
    # Top 10 recommended songs
    top_songs=df['name'].iloc[top_songs_index]
    return top_songs

In [None]:
# Create sigmoid kernel matrix based on given matrix
sig_kernel = sigmoid_kernel(normalized_df)

print("Recommended Songs:")
print(generate_recommendation("Carmen",sig_kernel).values)

Recommended Songs:


KeyError: 'Carmen'