In [113]:
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from recommender import recommender
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import creds  # Import

In [114]:
drake_sad = pd.read_csv('playlist_1.csv')
drake_hype = pd.read_csv('playlist_2.csv')
drake_chill = pd.read_csv('playlist_3.csv')
drake_romantic = pd.read_csv('playlist_4.csv')
drake_party = pd.read_csv('playlist_5.csv')

1. Split data into train and test
2. Run the recommendations on each song in the playlist, add recommendations to a recommendation list
3. Check if the recommendations are accurate with the test data

In [115]:
drake_sad.head()

Unnamed: 0,track_uri,track_name,duration_ms,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,spotify:track:047fCsbO4NdmwCBn8pcUXl,Marvins Room,347227,0.492,0.26,9,-17.341,0.0921,0.646,0.00178,0.0705,0.312,111.519
1,spotify:track:2Gnsof1hvZzjE1xdLRpjtf,Over My Dead Body,272573,0.489,0.57,1,-10.291,0.306,0.759,2e-06,0.179,0.584,185.516
2,spotify:track:6Z01gUquJsjJC67uNWm6P0,Shot For Me,224720,0.566,0.465,2,-11.714,0.235,0.0555,0.0,0.567,0.177,143.015
3,spotify:track:2KvHC9z14GSl4YpkNMX384,Do Not Disturb,283551,0.618,0.693,7,-5.943,0.45,0.246,0.0,0.112,0.454,170.982
4,spotify:track:4wVOKKEHUJxHCFFNUWDn0B,Chicago Freestyle (feat. Giveon),220488,0.735,0.449,10,-7.507,0.347,0.629,0.0,0.113,0.0397,122.947


In [116]:
# Define a function to split the dataset into training and testing
def split_dataset(df, train_ratio=0.7):
    num_rows = len(df)
    num_train = int(num_rows * train_ratio)
    
    # Shuffle the DataFrame rows
    shuffled_df = df.sample(frac=1, random_state=42)
    
    # Split into training and testing DataFrames
    train_df = shuffled_df.iloc[:num_train]
    test_df = shuffled_df.iloc[num_train:]
    
    return train_df, test_df


In [117]:
# Splitting data
sad_train, sad_test = split_dataset(drake_sad)

In [118]:
sad_train.head()

Unnamed: 0,track_uri,track_name,duration_ms,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
35,spotify:track:7aRCf5cLOFN1U7kvtChY1G,Search & Rescue,272113,0.817,0.44,10,-8.482,0.0734,0.0603,1e-06,0.33,0.544,142.024
13,spotify:track:6MR5IBSNfDmiwnrlQpVw4w,Club Paradise,283816,0.338,0.729,6,-6.419,0.102,0.758,3.2e-05,0.18,0.543,184.174
26,spotify:track:2Na0z2gfN67Rzf0vp74Wi3,Lose You,305374,0.526,0.583,4,-9.172,0.376,0.699,0.0,0.372,0.378,89.837
30,spotify:track:4BhGTc3Cgay2U1QcTS7vQe,Fire & Desire,238120,0.722,0.252,1,-14.411,0.0761,0.0671,0.0,0.0852,0.275,79.923
16,spotify:track:0YkUwXxnTkeJBvt5upeEtP,From Florida With Love,234783,0.787,0.477,6,-7.837,0.199,0.768,0.0,0.104,0.476,80.506


In [119]:
DATASET_NAME = 'drake_songs_dataset.csv'

#Read in data as a dataframe
drake_df = pd.read_csv(DATASET_NAME)

#Get desired audio features
selected_features = [
    'danceability', 'energy', 'key', 'loudness',
    'speechiness', 'acousticness', 'instrumentalness', 
    'liveness', 'valence', 'tempo'
]



In [120]:
#Scales all data before computing cosine similarity matrix
def scale_data(input_song, drake_df):
    # Making a copy to not alter drake_df
    recommender_dataset = drake_df.copy()
    
    # Removing input song from recommender_dataset so it isn't recommended
    recommender_dataset = recommender_dataset[recommender_dataset['track_uri'] != input_song['track_uri']]

    #Getting only necessary columns before concat
    recommender_dataset = recommender_dataset[selected_features].copy()
    input_song = input_song[selected_features].copy().to_frame().T
    
    #Combining rows for features scaling
    all_features = pd.concat([input_song, recommender_dataset])
    scaler = StandardScaler()
    all_features_scaled = scaler.fit_transform(all_features)

    user_features = all_features_scaled[:1, :].copy()
    dataset_features = all_features_scaled[1:, :].copy()
    
    return user_features, dataset_features
    
        

In [123]:
# Function that runs the recommendation system
def make_recs(input_song_index, playlist_df, drake_df):
    #Keeps all columns so that we can extract the recommended song names and artists later
    df_all_cols = drake_df.copy()

    # Get the input song that we will make recommendations from
    input_song = playlist_df.iloc[input_song_index]

    #Remove the user's inputted track from original dataset so it isn't recommended later on
    drake_df =  drake_df[drake_df['track_uri'] != input_song['track_uri']]

    #Scale data
    user_features, dataset_features = scale_data(input_song, drake_df)

    # Recommending system
    return recommender(user_features, dataset_features, df_all_cols, 5)
    

In [124]:
make_recs(0, drake_sad, drake_df)

Unnamed: 0,track_uri,track_name,album_name,duration_ms,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
216,spotify:track:2FbGlEPAjNhWvrVvlentVq,Cameras / Good Ones Go Interlude - Medley,Take Care (Deluxe),434960,0.441,0.403,2,-11.18,0.147,0.255,1.3e-05,0.13,0.158,139.92
94,spotify:track:11L064movtyopGdLiX4sVg,Peak,Scorpion,206026,0.687,0.218,9,-13.539,0.0388,0.85,0.000125,0.106,0.269,91.991
114,spotify:track:2fkeWbM6iqTw7oGHTYm2lw,4422,More Life,186293,0.609,0.229,11,-12.36,0.0333,0.558,0.0413,0.113,0.19,111.887
220,spotify:track:0m1KYWlT6LhFRBDVq9UNx4,HYFR (Hell Ya Fucking Right),Take Care (Deluxe),206626,0.444,0.627,7,-8.744,0.252,0.123,0.0,0.0795,0.359,98.21
40,spotify:track:1PDP7mLiAMwhfmgIwzhOm2,Yebba’s Heartbreak,Certified Lover Boy,133762,0.476,0.161,8,-11.665,0.0407,0.967,0.0381,0.109,0.0908,119.614
