In [2]:
# Import libaries
import pandas as pd
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import creds
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [3]:
# Reading in song dataset
df = pd.read_csv('drake_songs_dataset.csv')

In [4]:
# Checking for null values
df.isna().sum()

track_uri           0
track_name          0
album_name          0
duration_ms         0
danceability        0
energy              0
key                 0
loudness            0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
mode                0
type                0
id                  0
uri                 0
track_href          0
analysis_url        0
time_signature      0
dtype: int64

In [5]:
# Get features we want to include in the cosine similarity calculation
features = [
        'danceability', 'energy', 'key', 'loudness',
        'speechiness', 'acousticness', 'instrumentalness', 
        'liveness', 'valence', 'tempo'
    ]

In [6]:
df[features]

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.557,0.774,7,-5.275,0.3510,0.012000,0.000000,0.3960,0.397,111.975
1,0.754,0.449,7,-9.211,0.1090,0.033200,0.000083,0.5520,0.357,77.169
2,0.529,0.673,0,-4.711,0.1750,0.000307,0.000002,0.0930,0.366,165.921
3,0.835,0.626,1,-5.833,0.1250,0.058900,0.000060,0.3960,0.350,91.030
4,0.817,0.440,10,-8.482,0.0734,0.060300,0.000001,0.3300,0.544,142.024
...,...,...,...,...,...,...,...,...,...,...
424,0.505,0.536,1,-5.293,0.1550,0.075900,0.055600,0.0656,0.322,84.756
425,0.828,0.591,1,-6.217,0.1090,0.019900,0.000323,0.3610,0.202,119.251
426,0.903,0.669,2,-6.851,0.0519,0.068500,0.000028,0.1140,0.536,105.081
427,0.380,0.840,1,-4.528,0.3110,0.029100,0.000000,0.8700,0.387,159.939


In [7]:
# Function to get information about the user's track, returns track info (dictionary) or None if not a valid song
def get_user_track():
    # Get user track
    track = input('Enter track name: ')
    # Make API call to verify if it is a spotify song
    # Initialize the Spotify API client with client credentials
    sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=creds.CLIENT_ID, client_secret=creds.CLIENT_SECRET))

    # Make API call to search for the user's song made by Drake and extract the audio features of the song if a valid song
    search_results = sp.search(q='track:' + track + ' artist:Drake', type='track', limit=1)

    # Check if song is an actual track in Spotify
    if search_results['tracks']['items']:
        return search_results
    return None


In [8]:
# Function that calculates cosine similarity and makes recommendations
def cosine_recs(drake_dataset, features, n_recs):
    # Get the user's track
    user_track = get_user_track()
    # Check if valid track
    if user_track is not None:
        # Check if track is in the drake song dataset
        if user_track['tracks']['items'][0]['name'] in drake_dataset['track_name'].values:
            # Start recommendation system
            
            # Create an indices series to get the index of a track given its name
            indices = pd.Series(drake_dataset.index, index = drake_dataset['track_name'])
            user_idx = indices[user_track['tracks']['items'][0]['name']]

            # Scale data
            scaler =  StandardScaler()
            scaled_df = scaler.fit_transform(drake_dataset[features])

            # Calculate cosine similarity matrix
            cos_matrix = cosine_similarity(scaled_df, scaled_df)

            # Create a series with the similarity scores in descending order
            scores = pd.Series(cos_matrix[user_idx]).sort_values(ascending = False)

            # Get indexes of top N recommendations
            rec_indexes = list(scores.iloc[1:(n_recs + 1)].index)

            return df.iloc[rec_indexes]
            
    return None
    

In [9]:
# Function to get information about the user's track, returns track info (dictionary) or None if not a valid song
def get_user_track(user_track):
    # Make API call to verify if it is a spotify song
    # Initialize the Spotify API client with client credentials
    sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=creds.CLIENT_ID, client_secret=creds.CLIENT_SECRET))

    # Make API call to search for the user's song made by Drake and extract the audio features of the song if a valid song
    search_results = sp.search(q='track:' + user_track, type='track', limit=1)

    # Check if song is an actual track in Spotify
    if search_results['tracks']['items']:
        return search_results
    return None

# Function that calculates cosine similarity and makes recommendations
def cosine_recs(drake_dataset, features, n_recs, user_track):
    # Get the user's track
    user_track = get_user_track(user_track)
    # Check if valid track
    if user_track is not None:
        # Check if track is in the drake song dataset
        if user_track['tracks']['items'][0]['name'] in drake_dataset['track_name'].values:
            # Start recommendation system
            
            # Create an indices series to get the index of a track given its name
            indices = pd.Series(drake_dataset.index, index = drake_dataset['track_name'])
            user_idx = indices[user_track['tracks']['items'][0]['name']]

            # Scale data
            scaler =  StandardScaler()
            scaled_df = scaler.fit_transform(drake_dataset[features])

            # Calculate cosine similarity matrix
            cos_matrix = cosine_similarity(scaled_df, scaled_df)

            # Create a series with the similarity scores in descending order
            scores = pd.Series(cos_matrix[user_idx]).sort_values(ascending = False)

            # Get indexes of top N recommendations
            rec_indexes = list(scores.iloc[1:(n_recs + 1)].index)

            return df.iloc[rec_indexes]
            
    return None
    

In [None]:
# Function that calculates cosine similarity and makes recommendations
def cosine_recs(drake_dataset, features, n_recs, user_track):
    # Check if valid track
    if user_track is not None:
        # Check if track is in the drake song dataset
        if user_track in drake_dataset['track_name'].values:
            # Start recommendation system
            
            # Create an indices series to get the index of a track given its name
            indices = pd.Series(drake_dataset.index, index = drake_dataset['track_name'])
            user_idx = indices[user_track]

            # Scale data
            scaler =  StandardScaler()
            scaled_df = scaler.fit_transform(drake_dataset[features])

            # Calculate cosine similarity matrix
            cos_matrix = cosine_similarity(scaled_df, scaled_df)

            # Create a series with the similarity scores in descending order
            scores = pd.Series(cos_matrix[user_idx]).sort_values(ascending = False)

            # Get indexes of top N recommendations
            rec_indexes = list(scores.iloc[1:(n_recs + 1)].index)

            return df.iloc[rec_indexes]
            
    return None