In [80]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.cluster import KMeans
import import_ipynb
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import cdist

import Utility
from Utility import *

In [81]:
tracks = pd.read_csv(r"datasets/data.csv")

In [82]:
tfidf = TfidfVectorizer( stop_words='english')

#Replace NaN with an empty string
tracks['name'] = tracks['name'].fillna('')

tracks = tracks.head(25000)

#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(tracks['name'])

#Output the shape of tfidf_matrix
tfidf_matrix.shape


(25000, 15701)

In [83]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [84]:

indices = pd.Series(tracks.index, index=tracks['name']).drop_duplicates()


In [85]:
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices[title]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]

    track_indices = [i[0] for i in sim_scores]
    return tracks['name'].iloc[track_indices].tolist()

In [86]:
get_recommendations("I Might Fall Back On You")

['Fall Back Down',
 'Fall For You',
 'I Might Fall Back On You',
 'When I Fall In Love',
 'When I Fall In Love',
 'When I Fall In Love',
 'When I Fall In Love',
 'If I Ever Fall In Love',
 'If I Ever Fall In Love',
 'I Could Fall In Love']

In [87]:
tracks.to_csv('tracks_with_cluster.csv')

In [88]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id="677a46ed628944af94b5cdb96e3e25ee",
                                                           client_secret="dfc3a58da7e34b009001ff1794592060"))

def find_song(name):
    song_data = defaultdict()
    results = sp.search(q= 'track: {}'.format(name), limit=1)
    if results['tracks']['items'] == []:
        return None

    results = results['tracks']['items'][0]
    track_id = results['id']
    audio_features = sp.audio_features(track_id)[0]

    song_data['name'] = [name]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]

    for key, value in audio_features.items():
        song_data[key] = value

    return pd.DataFrame(song_data)


In [89]:
def recommend_songs(song_id, n_songs=10):
  
    """
    Recommends songs based on a list of previous songs that a user has listened to.
    """
    song_embedding = PCA_algorithm(tracks)
    
    kmeans_pca , centroids = KMeans_with_PCA_algorithm(song_embedding,3)

    #get Segment K-means PCA from song_id argument
    df_segm_pca_kmeans = pd.concat([tracks.reset_index(drop=True), pd.DataFrame(song_embedding)],axis=1)
    df_segm_pca_kmeans.columns.values[-2:] = ['com1','com2']
    df_segm_pca_kmeans['Segment K-means PCA'] = kmeans_pca.labels_

    #get segment value of song_id
    segment_val = df_segm_pca_kmeans[df_segm_pca_kmeans['id'] == song_id]['Segment K-means PCA'].values[0]
    
    filtered_data_per_segment = filter_based_on_segment(df_segm_pca_kmeans,segment_val,'id', 'name')   

    rec_songs = filter_based_on_cluster_centroid(kmeans_pca,filtered_data_per_segment,segment_val,tracks)

    #recommend top n songs
    return rec_songs.head(n_songs)['name'].tolist()

In [90]:
# recommend_songs("3w3cxwYuR7ThpE8KVSys5x")
recommend_songs("05xDjWH9ub67nJJk82yfGf")




['On Moonlight Bay (with Paul Weston & His Orchestra & The Norman Luboff Choir)',
 '(Where Are You?) Now That I Need You',
 'Believe in Yourself',
 'Baby Doll (with Paul Weston & His Orchestra) - From "The Belle Of New York"',
 'Begin the Beguine',
 'At Last',
 'Doney Gal',
 'Tzatzas',
 'At The Cafe Rendezvous',
 'Kiba Marakata Manju Manu Mukhamandala']