# Daltonify: An Audio Feature Recommender System

## *Recommender System*
Testing now on my own data set of country songs. Also adjusted functions here so that they can construct the playlist based on scores but then rank based on popularity of the artist.

Making further changes to functions here. Trying to divide up function calls to simplify code.

#### Table of Contents

* [Topic 1](#topic-1)
* [Topic 2](#topic-2)

### Import Libraries & Read in Data

In [1]:
## standard imports 
import pandas as pd 
import numpy as np
import re
## visualizations
import matplotlib.pyplot as plt
import seaborn as sns

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity, pairwise_kernels

### Spotify Credentials - must be set in local environment to run
auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

## options
# pd.options.display.max_rows = 4000
# pd.options.display.max_columns = 100
# pd.set_option('max_colwidth', 100)

In [2]:
### read in data
df = pd.read_csv('../data/country.csv')
# track = pd.read_csv('../data/WAP.csv')
track = pd.read_csv('../data/boston.csv')

In [None]:
# df.head(2)

In [None]:
# track

In [3]:
drop_cols = ['key', 'mode', 'time_signature', 'duration_ms']
df.drop(columns=drop_cols+['genre'], inplace=True)
track.drop(columns=drop_cols, inplace=True)  ### not present in test set using here

In [4]:
track

Unnamed: 0,track_name,artist,track_id,popularity,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Boston,Dalton & the Sheriffs,4HJ7mSMtHAdU55lLjGE4zW,0.15,0.541,0.921,-5.25,0.0443,0.00052,0.0784,0.159,0.613,99.98


In [5]:
df.head(2)

Unnamed: 0,track_name,artist,popularity,track_id,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Forever After All,Luke Combs,86,6IBcOGPsniK3Pso1wHIhew,0.487,0.65,-5.195,0.0253,0.191,0.0,0.0933,0.456,151.964
1,Be Like That - feat. Swae Lee & Khalid,Kane Brown,87,5f1joOtoMeyppIcJGZQvqJ,0.727,0.626,-8.415,0.0726,0.0469,2.6e-05,0.126,0.322,86.97


In [None]:
# pd.concat([df, track], ignore_index=True)

## Recommender Functions <a class="anchor" id="topic-1"></a>
<hr/>

In [14]:
def add_track_data(df, track):
    ID = track['track_id'].values[0]
    ### Create X data
    data = pd.concat([df, track], ignore_index=True)
    ### desired features for model (may change later)
    features = ['acousticness', 'danceability', 'energy', 'speechiness', 'valence']
    X = data[features]
    return X, data


# def calculate_similarity(X, kind='Cosine'):
#     metric_dict = {
#         'Chi Squared': 'chi2', 
#         'Linear Kernel': 'linear', 
#         'Radial Basis' : 'rbf',
#         'Laplacian' : 'laplacian',
#         'Sigmoid' : 'sigmoid',
#         'Cosine': 'cosine'
#     }
    
#     sim_mat = pairwise_kernels(X, metric=metric_dict[kind])
    

#     return sim_mat

#     if kind == 'Cosine Similarity':
# #         sim_mat = cosine_similarity(X, X)
#         sim_mat = pairwise_kernels(X, metric='cosine')
#         metric_choice = 'cosine'
        
#     elif kind == 'Linear Kernel':
# #         sim_mat = linear_kernel(X,X)
#     else:
#         metric = 'cosine'

#     sim_mat = pairwise_kernels(X, metric=metric_choice)
    

def pop_track_recommender(df, track):
    '''uses cosine similarity to recommend tracks'''
    
    ID = track['track_id'].values[0]
    ### calculate data 
    X, data = add_track_data(df, track)
    
    ### calculate similarity matrix
    similarity_matrix = cosine_similarity(X, X)
    
    ### create mapping bwtn track ids and index
    track_id_map = pd.Series(data.index, index=data['track_id'])
    ## find index of track in dataframe
    track_index = track_id_map[ID]
    
    ### find the correct column for the track in the similarity matrix
    similarity_scores = pd.Series(similarity_matrix[track_index])
    similarity_scores.sort_values(ascending=False, inplace=True)

    ### CREATE DF OF ALL SCORES
    scores_ids = data['track_id'].loc[similarity_scores.index]
    
    ### CREATE DF OF ALL SCORES
    rec_tracks_df = data[data['track_id'].isin(scores_ids.values)].copy()
    rec_tracks_df['score'] = similarity_scores
    rec_tracks_df.sort_values(by=['score', 'popularity'], ascending=False, inplace=True)

    return rec_tracks_df

def top_recommended_tracks(results, num_tracks):
    
    top_half = results[results['score'] >= results['score'].median()].copy()

    top_half.sort_values(by='popularity', ascending=False, inplace=True)
    
    top_tracks = top_half[:num_tracks]
    
    return top_tracks

def recommender(df, track, num_tracks):
    results = pop_track_recommender(df, track)
    top_tracks = top_recommended_tracks(results, num_tracks)
    
    return top_tracks


In [21]:
# pop_track_recommender(df, track)

In [8]:
x, df2 = add_track_data(df, track)

In [73]:
# metric_choice = 'polynomial'
# sim_mat = pairwise_kernels(X, metric=metric_choice)
# sim_mat

In [19]:
# calculate_similarity(x, kind='Linear')

In [42]:
# X, data = add_track_data(df, track)

In [43]:
# X

In [44]:
# data

In [45]:
# data.iloc[-1]['track_id']



In [46]:
# similarity_matrix = pairwise_kernels(X, metric = 'cosine')
# similarity_matrix

Exception: Data must be 1-dimensional

In [36]:
pop_track_reccommender(df, track)


Exception: Data must be 1-dimensional

In [27]:
similarity_matrix[results]

array([[0.97500543, 0.93516305, 0.95746121, ..., 0.59221839, 0.94203006,
        1.        ]])

In [20]:
def make_track_URIs(track_ids):
    ### reformats track ids as track URIs
    ### need text spotify:track: in front of each ID to use in Spotify
    track_URIs = []
    for track_id in track_ids:
        uri = 'spotify:track:'+ track_id
        track_URIs.append(uri)
    return track_URIs

# def create_playlist_file(track_ids, og_track_id, name):
    
#     ### creates text file of Spotify URIs
#     track_list = og_track_id.values.tolist() + track_ids.values.tolist()
#     track_URIs = make_track_URIs(track_list)
#     ### write URIs to text file
#     playlist = open(fr'../playlists/playlist_{name}.txt','w')
#     playlist.writelines('%s\n' % track for track in track_URIs) 
#     playlist.close()
#     pass

def create_playlist_file(track_ids, og_track_id):
    
    ### creates text file of Spotify URIs
    track_list = og_track_id.values.tolist() + track_ids.values.tolist()
    track_URIs = make_track_URIs(track_list)
    ### write URIs to text file
    playlist = open(r'../playlists/playlist.txt','w')
    playlist.writelines('%s\n' % track for track in track_URIs) 
    playlist.close()
    pass

def display_playlist(playlist_tracks):
    ### displays playlist track name, artist, album
    tracks_dict = sp.tracks(playlist_tracks)['tracks']
    playlist_info = []
    for i in range(len(playlist_tracks)):
        track = [
            tracks_dict[i]['name'], 
            tracks_dict[i]['artists'][0]['name'],
            tracks_dict[i]['album']['name']
            ]
        playlist_info.append(track)
    
    playlist_df = pd.DataFrame(playlist_info, columns=['Title', 'Artist', 'Album'] )
    ### start index at 1
    playlist_df.index = np.arange(1,len(playlist_df)+1)
    return playlist_df




In [None]:
# results = track_reccommender(df, track, include_pop=False)
# results


In [None]:
scorer_choice = 'Cosine Similarity'
results = pop_track_reccommender2(df, track, scorer= scorer_choice)
results

In [None]:
top = top_recommended_tracks(results, 15)
top

In [None]:
display_playlist(top['track_id'])

In [None]:
top_half = results[results['score'] >= results['score'].median()].copy()

top_half.sort_values(by='popularity', ascending=False, inplace=True)

In [None]:
top_half[:15]

In [None]:
# results['track_id']
# top_half[:15]['track_id'].values

In [None]:
# track_ids = track['track_id'].values.tolist() + top_half[:15]['track_id'].values.tolist()
# track_ids
create_playlist_file(track_ids=top_half[:15]['track_id'], og_track_id=track['track_id'], name='boston-country')

In [None]:
display_playlist(top_half[:15]['track_id'])

In [None]:
results['score'].describe()

In [None]:
cols = ['artist_name','score', 'popularity', 'danceability',
       'energy', 'valence', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'loudness','tempo',
       'track_id']
results = results[cols]
results

In [None]:
make_track_URIs(results['track_id'])

In [None]:
create_playlist_file(results['track_id'], 'WAP-country')

In [None]:
# display_playlist(make_track_URIs(results['track_id']))

## Try it without Popularity <a class="anchor" id="topic-2"></a>
<hr/>

In [None]:
# results_nopop = track_reccommender(df, track, include_pop=False)

In [None]:
# results_nopop

In [None]:
# display_playlist(make_track_URIs(results_nopop['track_id']))

In [None]:

Y = np.array([
    [1,2,3],
    [6,4,5],
    [9,7,8]
]
)

s = cosine_similarity(Y, Y)
s

In [None]:
s[1]

In [None]:
playlist_file = open('../playlists/playlist_boston-country.txt', 'r+')

text = playlist_file.read().splitlines()

In [None]:
display_playlist(text)

In [None]:
playlist_file = open('../playlists/playlist_boston-country.txt', 'r+')

text = playlist_file.read().splitlines()
text