In [30]:
import sys
sys.path.append('../')
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import cosine_similarity
from matplotlib import pyplot
import re
import json
from textblob import TextBlob
import random

load_dotenv()

True

In [2]:
sp = spotipy.Spotify(
    auth_manager=SpotifyClientCredentials(
        client_id=os.getenv("SPOTIFY_CLIENT_ID"),
        client_secret=os.getenv("SPOTIFY_API_KEY")
    ))

Functions:

In [3]:
def df_cleaner(df):
    df.drop_duplicates('name', inplace=True)
    df = df.drop(
        [
            'album',
            'artists',
            'available_markets',
            'disc_number',
            'explicit',
            'external_ids',
            'external_urls',
            'href',
            'id',
            'is_local',
            'episode',
            'track',
            '...',
            'track_href',
            'analysis_url',
            'uri',
            'preview_url',
            'type',
            'name',
            'preview_url',
        ],
        errors='ignore',
        axis='columns'
    )
    df = df.dropna(axis = 0, how = 'all')
    df = df.reindex(sorted(df.columns), axis=1)

    return df


def get_top_tracks(max_page, per_page, keyword):
    current_offset = 1
    final_data = []
    results = sp.search(q='top', limit=per_page, type="track")
    final_results = results['tracks']['items']

    while current_offset <= max_page and results['tracks'].get('next', ''):
        current_offset += 1
        results = sp.search(q=keyword, offset=current_offset)
        final_results.extend(results['tracks']['items'])

    for i in final_results:
        audio_features = sp.audio_features(i["uri"])
        # print(audio_features)
        final_data.append({**i, **audio_features[0]})

    return final_data


def index_to_instance(df, index=None):
    if index:
        return XYZ(df)[index][1]
    else:
        return XYZ(df)


def XYZ(df):
    return sorted(list(zip(list(df.index.codes[0].data), list(df.index.levels[0].array))))


def value_to_index_map(array):
    array1 = zip(array, range(len(array)))
    return array1


class RecSysContentBased():
    def __init__(self):
        pass

    def fit(self, train):
        self.train_set = train
        df1 = cosine_similarity(train)
        self.similarity = df1
        self.distances = pairwise_distances(train, metric='euclidean')

    def evaluate(self, user):
        d = sorted(value_to_index_map(self.distances[user]))
        return list(index_to_instance(self.train_set, d[i][1]) for i in range(len(d)))

    def predict(self):
        pass

    def test(self, testset):
        pass




import and clean the data

In [4]:
baseDf = pd.read_csv('spotify_data.csv')

baseDf.index = [baseDf["name"]]
data = baseDf[[
    'energy', 'danceability', 'loudness', 'liveness', 'valence',
    'duration_ms', 'acousticness', 'speechiness', 'popularity']]
df = df_cleaner(baseDf)
baseDf

Unnamed: 0_level_0,album,artists,available_markets,disc_number,duration_ms,episode,explicit,external_ids,external_urls,href,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,track_href,analysis_url,time_signature
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
As It Was,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,167303,False,False,{'isrc': 'USSM12200612'},{'spotify': 'https://open.spotify.com/track/4L...,https://api.spotify.com/v1/tracks/4LRPiXqCikLl...,...,0,0.0557,0.342000,0.001010,0.3110,0.662,173.930,https://api.spotify.com/v1/tracks/4LRPiXqCikLl...,https://api.spotify.com/v1/audio-analysis/4LRP...,4
Running Up That Hill (A Deal With God) - 2018 Remaster,"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,300840,False,False,{'isrc': 'GBAYE1800884'},{'spotify': 'https://open.spotify.com/track/29...,https://api.spotify.com/v1/tracks/29d0nY7TzCoi...,...,0,0.0596,0.659000,0.002660,0.0546,0.139,108.296,https://api.spotify.com/v1/tracks/29d0nY7TzCoi...,https://api.spotify.com/v1/audio-analysis/29d0...,4
Left and Right (Feat. Jung Kook of BTS),"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,154487,False,False,{'isrc': 'USAT22205563'},{'spotify': 'https://open.spotify.com/track/0m...,https://api.spotify.com/v1/tracks/0mBP9X2gPCua...,...,1,0.0324,0.619000,0.000013,0.0901,0.719,101.058,https://api.spotify.com/v1/tracks/0mBP9X2gPCua...,https://api.spotify.com/v1/audio-analysis/0mBP...,4
About Damn Time,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,191822,False,True,{'isrc': 'USAT22202139'},{'spotify': 'https://open.spotify.com/track/1P...,https://api.spotify.com/v1/tracks/1PckUlxKqWQs...,...,0,0.0656,0.099500,0.000000,0.3350,0.722,108.966,https://api.spotify.com/v1/tracks/1PckUlxKqWQs...,https://api.spotify.com/v1/audio-analysis/1Pck...,4
I Like You (A Happier Song) (with Doja Cat),"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,192841,False,True,{'isrc': 'USUM72208023'},{'spotify': 'https://open.spotify.com/track/0O...,https://api.spotify.com/v1/tracks/0O6u0VJ46W86...,...,1,0.0751,0.121000,0.000000,0.1210,0.472,100.964,https://api.spotify.com/v1/tracks/0O6u0VJ46W86...,https://api.spotify.com/v1/audio-analysis/0O6u...,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Bad Boy for Life,"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,253067,False,True,{'isrc': 'USBB40580756'},{'spotify': 'https://open.spotify.com/track/2e...,https://api.spotify.com/v1/tracks/2eOuL8KesslT...,...,1,0.4900,0.179000,0.000000,0.2410,0.610,118.934,https://api.spotify.com/v1/tracks/2eOuL8KesslT...,https://api.spotify.com/v1/audio-analysis/2eOu...,4
No Other Love,"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,266240,False,False,{'isrc': 'USBB40580604'},{'spotify': 'https://open.spotify.com/track/6p...,https://api.spotify.com/v1/tracks/6p8RmxHGU1WL...,...,0,0.0472,0.318000,0.000000,0.0619,0.711,70.352,https://api.spotify.com/v1/tracks/6p8RmxHGU1WL...,https://api.spotify.com/v1/audio-analysis/6p8R...,4
Five-O (feat. Wyclef & P. Diddy),"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AT', 'AU...",1,279773,False,False,{'isrc': 'USBB40707174'},{'spotify': 'https://open.spotify.com/track/3c...,https://api.spotify.com/v1/tracks/3cttR5mtQZ5x...,...,1,0.2210,0.000719,0.000000,0.3700,0.636,150.098,https://api.spotify.com/v1/tracks/3cttR5mtQZ5x...,https://api.spotify.com/v1/audio-analysis/3ctt...,4
What About Us (Remix) [feat. Black Rob],"{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,243783,False,False,{'isrc': 'USBB40580245'},{'spotify': 'https://open.spotify.com/track/6P...,https://api.spotify.com/v1/tracks/6Ph7bndA7A5E...,...,1,0.0396,0.014300,0.000000,0.0451,0.720,96.365,https://api.spotify.com/v1/tracks/6Ph7bndA7A5E...,https://api.spotify.com/v1/audio-analysis/6Ph7...,4


scale down the data

In [5]:
X = pd.DataFrame(df, columns=list(df.columns))
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

split and cluster the data to 3 cluster

In [6]:
kmeans = KMeans(n_clusters=6, random_state=1234)
kmeans.fit(X_scaled_df)

In [7]:
labels = kmeans.predict(X_scaled_df)
clusters = np.unique(labels)

In [8]:
clusters = kmeans.predict(X_scaled_df)
X["cluster"] = clusters

In [9]:
kmeans2 = KMeans(n_clusters=50,
                 init="k-means++",
                 n_init=50,  # try with 1, 4, 8, 20, 30, 100...
                 max_iter=10,
                 tol=0,
                 algorithm="elkan",
                 random_state=1234)
kmeans2.fit(X_scaled_df)

In [10]:
K = range(2, 21)
inertia = []

for k in K:
    kmeans = KMeans(n_clusters=3,
                    random_state=1234)
    kmeans.fit(X_scaled_df)
    inertia.append(kmeans.inertia_)

In [11]:
model = RecSysContentBased()
model.fit(data)

In [46]:
user_input = input('what do you like?')
textBlb = TextBlob(user_input)  # Making our first textblob
textCorrected = str(textBlb.correct())
search_result = baseDf[baseDf['name'].str.contains(user_input, case=False)][:4]
guess_message = ''
search_result

Unnamed: 0_level_0,album,artists,available_markets,disc_number,duration_ms,episode,explicit,external_ids,external_urls,href,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,track_href,analysis_url,time_signature
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Vegas (From the Original Motion Picture Soundtrack ELVIS),"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,182907,False,False,{'isrc': 'USRC12201441'},{'spotify': 'https://open.spotify.com/track/0h...,https://api.spotify.com/v1/tracks/0hquQWY3xvYq...,...,0,0.255,0.0777,3.2e-05,0.145,0.74,159.969,https://api.spotify.com/v1/tracks/0hquQWY3xvYq...,https://api.spotify.com/v1/audio-analysis/0hqu...,4
Synchronise,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,287270,False,False,{'isrc': 'FRU702200107'},{'spotify': 'https://open.spotify.com/track/6f...,https://api.spotify.com/v1/tracks/6f9XYKnQIkBf...,...,1,0.0536,0.00729,0.538,0.107,0.815,127.996,https://api.spotify.com/v1/tracks/6f9XYKnQIkBf...,https://api.spotify.com/v1/audio-analysis/6f9X...,4
Lisztomania - Classixx Version,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,304413,False,False,{'isrc': 'FR31Q0900013'},{'spotify': 'https://open.spotify.com/track/37...,https://api.spotify.com/v1/tracks/378HDaqOZxzB...,...,0,0.0641,0.064,0.00881,0.0507,0.382,108.0,https://api.spotify.com/v1/tracks/378HDaqOZxzB...,https://api.spotify.com/v1/audio-analysis/378H...,4
Deezy Daisy - Oxford Remix,"{'album_type': 'single', 'artists': [{'externa...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,267074,False,False,{'isrc': 'FRU701300115'},{'spotify': 'https://open.spotify.com/track/5E...,https://api.spotify.com/v1/tracks/5EHEEtGjuDst...,...,1,0.0471,0.0345,0.325,0.081,0.616,118.004,https://api.spotify.com/v1/tracks/5EHEEtGjuDst...,https://api.spotify.com/v1/audio-analysis/5EHE...,4


In [47]:
options = list(enumerate(search_result['name']))
print(*options, sep='\n')

(0, 'Vegas (From the Original Motion Picture Soundtrack ELVIS)')
(1, 'Synchronise')
(2, 'Lisztomania - Classixx Version')
(3, 'Deezy Daisy - Oxford Remix')


In [50]:
user_choice_input_data = input(guess_message)
user_choice_id = search_result.iloc[[user_choice_input_data]]['id'].values[0]
user_choice_id

'0hquQWY3xvYqN4qtiquniF'

API call for song data from spotify

In [19]:
track_data = sp.track(track_id=user_choice_id)
track_data_featured = {**track_data, **sp.audio_features(track_data['uri'])[0]}
#track_data_featured = sp.audio_features(track_data['uri'])[0]
track_data_featured

{'album': {'album_type': 'single',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/52tJU1YfTeD1uCQjq8yooZ'},
    'href': 'https://api.spotify.com/v1/artists/52tJU1YfTeD1uCQjq8yooZ',
    'id': '52tJU1YfTeD1uCQjq8yooZ',
    'name': 'Bastion',
    'type': 'artist',
    'uri': 'spotify:artist:52tJU1YfTeD1uCQjq8yooZ'},
   {'external_urls': {'spotify': 'https://open.spotify.com/artist/1MW1wqNtF2hNgsPfGDhrHB'},
    'href': 'https://api.spotify.com/v1/artists/1MW1wqNtF2hNgsPfGDhrHB',
    'id': '1MW1wqNtF2hNgsPfGDhrHB',
    'name': 'Catching Cairo',
    'type': 'artist',
    'uri': 'spotify:artist:1MW1wqNtF2hNgsPfGDhrHB'}],
  'available_markets': [],
  'external_urls': {'spotify': 'https://open.spotify.com/album/6Q0y7nqujvEvqwi7B2zkUQ'},
  'href': 'https://api.spotify.com/v1/albums/6Q0y7nqujvEvqwi7B2zkUQ',
  'id': '6Q0y7nqujvEvqwi7B2zkUQ',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/ab67616d0000b2739870f93c3e07aa775ce5eaa5',
    'width': 640},
 

In [52]:
track_data_X = pd.DataFrame(track_data_featured, columns=list(df.columns), index=[0])
track_data_scaler = StandardScaler()
scaler.transform(track_data_X)
track_data_X_scaled = scaler.transform(track_data_X)
track_data_X_scaled_df = pd.DataFrame(track_data_X_scaled, columns=track_data_X.columns)
track_data_X_scaled

array([[-0.6801227 , -1.26553308, -0.03605552,  1.13426354, -0.64647472,
        -1.22681207, -0.56830121,  0.99772459,  1.01817229, -0.67646524,
         0.00501506,  1.60254684,  0.12971834, -0.43008103,  0.29210663]])

predict

In [53]:
track_data_labels = kmeans.predict(track_data_X_scaled_df)
track_data_cluster = np.unique(track_data_labels)
track_data_cluster
track_data_cluster = kmeans.predict(track_data_X_scaled_df)

track_data_X["cluster"] = track_data_cluster
track_data_X_scaled_df

Unnamed: 0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,time_signature,track_number,valence
0,-0.680123,-1.265533,-0.036056,1.134264,-0.646475,-1.226812,-0.568301,0.997725,1.018172,-0.676465,0.005015,1.602547,0.129718,-0.430081,0.292107


In [54]:
# random.choice(baseDf[baseDf['cluster'] == 1])
# random_track = pd.DataFrame(pandas.np.random.random(100))
# df_elements = baseDf[baseDf['cluster'] == 1].sample(n=1)

recommend_track = baseDf[X['cluster'] == int(track_data_cluster)].sample()
recommend_track

Unnamed: 0_level_0,album,artists,available_markets,disc_number,duration_ms,episode,explicit,external_ids,external_urls,href,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,track_href,analysis_url,time_signature
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Why Ask You II, MB 63","{'album_type': 'album', 'artists': [{'external...",[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",2,121893,False,False,{'isrc': 'USG3R0910042'},{'spotify': 'https://open.spotify.com/track/6s...,https://api.spotify.com/v1/tracks/6sJWjeNP7stp...,...,1,0.0398,0.775,0.897,0.0529,0.427,115.717,https://api.spotify.com/v1/tracks/6sJWjeNP7stp...,https://api.spotify.com/v1/audio-analysis/6sJW...,4


In [66]:

from IPython.display import IFrame

#track_id = "1rfORa9iYmocEsnnZGMVC4"
external_url_data = recommend_track['external_urls'].values[0]
external_url_data = external_url_data.replace('\'','\"')
external_url = json.loads(external_url_data)

IFrame(src=external_url['spotify'],
       width="300",
       height="100",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
      )