In [None]:
''' Install Spotipy package '''

!pip install spotipy
!pip install -U kaleido

In [None]:
''' Mount Google Drive '''

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
''' IMPORTS '''

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.cluster import DBSCAN, KMeans, SpectralClustering, AgglomerativeClustering
from sklearn.neighbors import NearestNeighbors
from collections import Counter
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture

from sklearn.manifold import TSNE
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import plot
from plotly.subplots import make_subplots
import kaleido
import plotly

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict

from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist
import difflib

import warnings
warnings.filterwarnings('ignore')

In [None]:
''' SPOTIPY TOKENS '''

client_id = '705dbe6ad8834b4fbee893b05bf70e11'
client_secret = '64d5ed61ae124a9cafc9fa7aa07aca89'

In [None]:
''' READ DATA '''

data = pd.read_csv('drive/MyDrive/USML/v2/data.csv')
genre_data = pd.read_csv('drive/MyDrive/USML/v2/data_by_genres.csv')
year_data = pd.read_csv('drive/MyDrive/USML/v2/data_by_year.csv')
artist_data = pd.read_csv('drive/MyDrive/USML/v2/data_by_artist.csv')

In [None]:
''' READ DIMENSION REDUCED DATA '''

''' PCA '''
pca_music = pd.read_csv('drive/MyDrive/USML/v2/pca_music.csv')
pca_genre = pd.read_csv('drive/MyDrive/USML/v2/pca_genre.csv')
pca_artist = pd.read_csv('drive/MyDrive/USML/v2/pca_artist.csv')

''' TSNE '''
tsne_music = pd.read_csv('drive/MyDrive/USML/v2/tsne_music.csv')
tsne_genre = pd.read_csv('drive/MyDrive/USML/v2/tsne_genre.csv')
tsne_artist = pd.read_csv('drive/MyDrive/USML/v2/tsne_artist.csv')

In [None]:
''' Columns of data to be considered '''

columns_of_interest = ['danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'duration_ms', 'popularity', 'year']

In [None]:
''' Clustering pipeline '''

track_clustering_pipeline = Pipeline([('scaler', StandardScaler()),
                                       ('kmeans', KMeans(n_clusters = 20))],
                                     verbose = False)
X = data[columns_of_interest]
track_clustering_pipeline.fit(X)

Pipeline(steps=[('scaler', StandardScaler()),
                ('kmeans', KMeans(n_clusters=20))])

<h2> Content Based Song Recommendation System </h2>

In [None]:
''' SPOTIPY API '''

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id = client_id,
                                                           client_secret = client_secret))

In [None]:
'''
Funtion to fetch song details from Spotipy API

Input Params: song_name str, year str
Returns: metadata of song
'''

def get_track_details_from_spotify(song_name, year):

  track_data = {}

  query = 'track: ' + song_name + ' year: ' + str(year)
  results = sp.search(query, limit=1)
  # If search result empty, return None
  if results['tracks']['items'] == []:
    return None

  # Get results
  results = results['tracks']['items'][0]
  # Get track id of the song
  track_id = results['id']
  # Get audio features of the song
  audio_features = sp.audio_features(track_id)[0]

  # Fill track_data dict
  track_data['name'] = [song_name]
  track_data['year'] = [year]
  track_data['explicit'] = [int(results['explicit'])]
  track_data['duration_ms'] = [results['duration_ms']]
  track_data['popularity'] = [results['popularity']]

  # Fill audio features
  for key, value in audio_features.items():
    track_data[key] = value

  # Resurt track_data dict
  return pd.DataFrame(track_data)

In [None]:
'''
Get all details of track
First search local database for the track. This reduces API calls.
Input params: 
track dict, spotify_data pd DF
Returns:
track details
'''
def get_track_data(track, spotify_data):
    
    # Search local database
    try:
        track_data = spotify_data[(spotify_data['name'] == track['name']) 
                                & (spotify_data['year'] == track['year'])].iloc[0]
        return track_data
    
    # If track not present in local database, call spotipy api
    except:
        return get_track_details_from_spotify(track['name'], track['year'])

In [None]:
def get_mean_vec(track_list, spotify_data):
    
    print(track_list)

    track_vectors = []
    
    for track in track_list:
        track_data = get_track_data(track, spotify_data)
        print(track_data)
        if track_data is None:
            print('Warning: {} does not exist in Spotify or in database'.format(track['name']))
            continue
        track_vector = track_data[columns_of_interest].values
        print('---------------------------')
        print(track_vector)
        print('---------------------------')
        track_vectors.append(track_vector) 
    print(track_vectors) 
    print('---------------------------')
    
    track_matrix = np.array(list(track_vectors))
    print(track_matrix)
    print('---------------------------')
    print(np.mean(track_matrix, axis=0))
    return np.mean(track_matrix, axis=0)

In [None]:
'''
Recommend songs using already clustered data
Input params: 
track_list list, spotify_data pd DF, number of tracks to recommend int
Returns:
recommended tracks
'''
def recommend_tracks(track_list, spotify_data, n_tracks=10):
    
    columns = ['name', 'year', 'artists']
    track_dict = {
        'name': [],
        'year': []
    }
    for dictionary in track_list:
      track_dict['name'].append(dictionary['name'])
      track_dict['year'].append(dictionary['year'])
    # print(track_dict)

    # Get track center, scale data and get recommendations    
    track_center = get_mean_vec(track_list, spotify_data)
    scaler = track_clustering_pipeline.steps[0][1]
    scaled_data = scaler.transform(spotify_data[columns_of_interest])
    scaled_track_center = scaler.transform(track_center.reshape(1, -1))
    distances = cdist(scaled_track_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_tracks][0])
    
    # Get recommended tracks
    # print(type(index[0]))
    # print(index)
    recommended_tracks = spotify_data.iloc[index]
    # print(recommended_tracks)
    # Remove all those tracks already in the playlist
    recommended_tracks = recommended_tracks[~recommended_tracks['name'].isin(track_dict['name'])]
    # Return recommended tracks
    return recommended_tracks[columns].to_dict(orient='records')

In [None]:
recommend_tracks([{'name': 'believer', 'year':2017}], data)

[{'name': 'believer', 'year': 2017}]
       name  year  explicit  duration_ms  popularity  danceability  energy  \
0  believer  2017         0       204347          88         0.776    0.78   

   key  loudness  mode  ...  instrumentalness  liveness  valence    tempo  \
0   10    -4.374     0  ...                 0     0.081    0.666  124.949   

             type                      id  \
0  audio_features  0pqnGHJpmpxLKifKRmU6WP   

                                    uri  \
0  spotify:track:0pqnGHJpmpxLKifKRmU6WP   

                                          track_href  \
0  https://api.spotify.com/v1/tracks/0pqnGHJpmpxL...   

                                        analysis_url time_signature  
0  https://api.spotify.com/v1/audio-analysis/0pqn...              4  

[1 rows x 22 columns]
---------------------------
[[ 7.76000e-01  7.80000e-01  1.00000e+01 -4.37400e+00  0.00000e+00
   1.28000e-01  6.22000e-02  0.00000e+00  8.10000e-02  6.66000e-01
   1.24949e+02  2.04347e+05  8.8000

[{'name': 'Believer', 'year': 2017, 'artists': "['Imagine Dragons']"},
 {'name': "Don't Start Now", 'year': 2019, 'artists': "['Dua Lipa']"},
 {'name': "Don't Start Now", 'year': 2020, 'artists': "['Dua Lipa']"},
 {'name': 'One Kiss (with Dua Lipa)',
  'year': 2018,
  'artists': "['Calvin Harris', 'Dua Lipa']"},
 {'name': 'motive (with Doja Cat)',
  'year': 2020,
  'artists': "['Ariana Grande', 'Doja Cat']"},
 {'name': 'New Rules', 'year': 2017, 'artists': "['Dua Lipa']"},
 {'name': 'Waka Waka (This Time for Africa) [The Official 2010 FIFA World Cup (TM) Song] (feat. Freshlyground)',
  'year': 2010,
  'artists': "['Shakira', 'Freshlyground']"},
 {'name': 'Look What You Made Me Do',
  'year': 2017,
  'artists': "['Taylor Swift']"},
 {'name': '20 Min', 'year': 2017, 'artists': "['Lil Uzi Vert']"},
 {'name': 'Hallucinate', 'year': 2020, 'artists': "['Dua Lipa']"}]

<h2> Collaberative Filtering based Recommender System <h2>