## Import all the necessary python packages

In [376]:
import os
import glob
import requests


from rake_nltk import Rake
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import json

Save the Spotify API Token. Get your own personal token by following this link: https://developer.spotify.com/console/get-audio-features-several-tracks/?ids= . I would just generate a new token and save it (granted it does expire). You can also do so programatically: https://stackoverflow.com/questions/39887342/how-can-i-get-an-access-token-spotify-api

In [2]:
spotify_auth = os.getenv("SPOTFIY_API_TOKEN")


Point to the path where you saved your spotify streaming history. 

In [321]:
spotify_history_path = [os.path.expanduser("~") + '/MySpotifyData/StreamingHistory0.json',
                        os.path.expanduser("~") + '/MySpotifyData/StreamingHistory1.json']

In [322]:
df = pd.DataFrame()

for file in spotify_history_path:
    data = pd.read_json(file)
    df = temp.append(data, ignore_index = True)

In [323]:
my_music = df.sample(300)
my_music

Unnamed: 0,endTime,artistName,trackName,msPlayed
1200,2019-11-12 22:34,Converge,Hell to Pay,271773
20105,2020-09-29 17:34,IDLES,Kill Them With Kindness,229579
9065,2020-04-30 18:35,RXYZYXR,Polar Knights (Instrumental),294028
11555,2020-07-01 16:31,The Human Abstract,Digital Veil,210360
10719,2020-06-11 15:19,Kid Cudi,Flight At First Sight/Advanced,116948
...,...,...,...,...
3180,2019-12-27 13:30,Freddie Gibbs,Shitsville,7617
16546,2020-06-22 14:36,Protest The Hero,From the Sky,374960
8099,2020-03-26 13:02,Glaswegians,orwellian,910042
6633,2020-03-06 15:47,BROCKHAMPTON,CANNON,59701


In [2]:
# query_type: can be 'artist', 'album', 'track'
def make_spotify_search_request(query_type, query):
    params = {'q': query, 'type': query_type, 'limit':1}
    r = requests.get('https://api.spotify.com/v1/search', 
                     headers={"Authorization": "Bearer {}".format(spotify_auth)},
                     params=params)
    if r.status_code == 401:
        raise Exception(r.json())
    return r.json()


In [325]:
def add_genres_to_artists(df):
    df["genres"] = ""
    for index, row in df.iterrows():
        json = make_spotify_search_request('artist', row['artistName'])
        genre_result = []
        id_result = ""
        if "artists" in json and  "items" in json["artists"]:
            result_list = json["artists"]["items"]
            if len(result_list) > 0:
                genre_result = result_list[0]["genres"]
                id_result = result_list[0]["id"]

        df.at[index, "genres"] = genre_result
        df.at[index, "artistId"] = id_result

add_genres_to_artists(my_music)
my_music

Unnamed: 0,endTime,artistName,trackName,msPlayed,genres,artistId
1200,2019-11-12 22:34,Converge,Hell to Pay,271773,"[boston hardcore, chaotic hardcore, mathcore, ...",7kHzfxMLtVHHb523s43rY1
20105,2020-09-29 17:34,IDLES,Kill Them With Kindness,229579,"[bristol indie, modern alternative rock, moder...",75mafsNqNE1WSEVxIKuY5C
9065,2020-04-30 18:35,RXYZYXR,Polar Knights (Instrumental),294028,"[djent, instrumental progressive metal, thall]",4KScLR4NpZbGiXzj9FSA85
11555,2020-07-01 16:31,The Human Abstract,Digital Veil,210360,"[deathcore, djent, melodic metalcore, progress...",2SDGIFzEh9xmE5zDKcMRkj
10719,2020-06-11 15:19,Kid Cudi,Flight At First Sight/Advanced,116948,"[hip hop, ohio hip hop, rap]",0fA0VVWsXO9YnASrzqfmYu
...,...,...,...,...,...,...
3180,2019-12-27 13:30,Freddie Gibbs,Shitsville,7617,"[alternative hip hop, hip hop, indiana hip hop...",0Y4inQK6OespitzD6ijMwb
16546,2020-06-22 14:36,Protest The Hero,From the Sky,374960,"[canadian metal, canadian post-hardcore, djent...",6z3BjfmgvDUIHaJ0UPTtrQ
8099,2020-03-26 13:02,Glaswegians,orwellian,910042,[],7ezriLBaunXh3OVnmjLG4L
6633,2020-03-06 15:47,BROCKHAMPTON,CANNON,59701,"[boy band, hip hop, rap]",1Bl6wpkWCQ4KVgnASpvzzA


In [326]:
# Filter out any artist that doesn't have genres
my_music = my_music[my_music["genres"].apply(len).gt(0)]
my_music

Unnamed: 0,endTime,artistName,trackName,msPlayed,genres,artistId
1200,2019-11-12 22:34,Converge,Hell to Pay,271773,"[boston hardcore, chaotic hardcore, mathcore, ...",7kHzfxMLtVHHb523s43rY1
20105,2020-09-29 17:34,IDLES,Kill Them With Kindness,229579,"[bristol indie, modern alternative rock, moder...",75mafsNqNE1WSEVxIKuY5C
9065,2020-04-30 18:35,RXYZYXR,Polar Knights (Instrumental),294028,"[djent, instrumental progressive metal, thall]",4KScLR4NpZbGiXzj9FSA85
11555,2020-07-01 16:31,The Human Abstract,Digital Veil,210360,"[deathcore, djent, melodic metalcore, progress...",2SDGIFzEh9xmE5zDKcMRkj
10719,2020-06-11 15:19,Kid Cudi,Flight At First Sight/Advanced,116948,"[hip hop, ohio hip hop, rap]",0fA0VVWsXO9YnASrzqfmYu
...,...,...,...,...,...,...
6094,2020-02-28 23:09,Death Grips,I've Seen Footage,202853,"[alternative hip hop, escape room, experimenta...",5RADpgYLOuS2ZxDq7ggYYH
3180,2019-12-27 13:30,Freddie Gibbs,Shitsville,7617,"[alternative hip hop, hip hop, indiana hip hop...",0Y4inQK6OespitzD6ijMwb
16546,2020-06-22 14:36,Protest The Hero,From the Sky,374960,"[canadian metal, canadian post-hardcore, djent...",6z3BjfmgvDUIHaJ0UPTtrQ
6633,2020-03-06 15:47,BROCKHAMPTON,CANNON,59701,"[boy band, hip hop, rap]",1Bl6wpkWCQ4KVgnASpvzzA


Get test data from : https://github.com/mdeff/fma . Use the raw_artists file found in fma_metadata.zip

In [327]:
path_to_artists_csv = os.path.expanduser("~") + "/Downloads/fma_metadata/raw_artists.csv"
artists_df = pd.read_csv(path_to_artists_csv, index_col=0, usecols=['artist_name', 'artist_id'])
artists_df = artists_df.rename(columns={"artist_name": "artistName"})
artists_df = artists_df.sample(800)

In [328]:
add_genres_to_artists(artists_df)
artists_df

Unnamed: 0_level_0,artistName,genres,artistId
artist_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
385,Adept,"[melodic metalcore, metalcore, screamo, swedis...",1TsgX8DlUvpk9mrUqxmOZu
7198,Moolen,"[glam metal, hard rock, metal, rock, sleaze rock]",0cc6vw3VN8YlIcvr1v7tBL
20589,K3N,"[chicago rap, rap]",5K4W6rqBFWDnAN6FQUkS6x
20509,Ford Hanford,[],
20388,LEE OTHER,[],23woqvFe3BOc8ifi2z8IHF
...,...,...,...
4377,Jesus du Dos,[],
12527,Nebe-Quartett,[vaudeville],06sCDDkD5MMazN3juL8XHq
10313,Hal McGee,[],1pn43stRVM96FRKglA7ltP
9345,angil,"[belgian pop, variete francaise]",3QVolfxko2UyCOtexhVTli


In [329]:
# Filter out any artist that doesn't have genres or fewer than 2 genres
filtered_artists_df = artists_df[artists_df["genres"].apply(len).gt(0)]
filtered_artists_df

Unnamed: 0_level_0,artistName,genres,artistId
artist_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
385,Adept,"[melodic metalcore, metalcore, screamo, swedis...",1TsgX8DlUvpk9mrUqxmOZu
7198,Moolen,"[glam metal, hard rock, metal, rock, sleaze rock]",0cc6vw3VN8YlIcvr1v7tBL
20589,K3N,"[chicago rap, rap]",5K4W6rqBFWDnAN6FQUkS6x
16913,Agan,[electro house],7vk5e3vY1uw9plTHJAMwjN
13498,Zea,"[avant-garde metal, swiss metal]",6yCjbLFZ9qAnWfsy9ujm5Y
...,...,...,...
6291,Dark Time Sunshine,"[abstract hip hop, seattle hip hop, undergroun...",5cfC9eCiLaXJJFqJRnj0gW
1165,Quintron,[garage punk blues],7oYKm5OvA9Z2TZZWCoqMz1
16536,COW,"[alternative country, classic canadian rock, c...",3CYSRCHfilgR8DSbkCMp5j
12527,Nebe-Quartett,[vaudeville],06sCDDkD5MMazN3juL8XHq


In [330]:
combined = pd.concat([my_music, filtered_artists_df], axis=0)
combined

Unnamed: 0,endTime,artistName,trackName,msPlayed,genres,artistId
1200,2019-11-12 22:34,Converge,Hell to Pay,271773.0,"[boston hardcore, chaotic hardcore, mathcore, ...",7kHzfxMLtVHHb523s43rY1
20105,2020-09-29 17:34,IDLES,Kill Them With Kindness,229579.0,"[bristol indie, modern alternative rock, moder...",75mafsNqNE1WSEVxIKuY5C
9065,2020-04-30 18:35,RXYZYXR,Polar Knights (Instrumental),294028.0,"[djent, instrumental progressive metal, thall]",4KScLR4NpZbGiXzj9FSA85
11555,2020-07-01 16:31,The Human Abstract,Digital Veil,210360.0,"[deathcore, djent, melodic metalcore, progress...",2SDGIFzEh9xmE5zDKcMRkj
10719,2020-06-11 15:19,Kid Cudi,Flight At First Sight/Advanced,116948.0,"[hip hop, ohio hip hop, rap]",0fA0VVWsXO9YnASrzqfmYu
...,...,...,...,...,...,...
6291,,Dark Time Sunshine,,,"[abstract hip hop, seattle hip hop, undergroun...",5cfC9eCiLaXJJFqJRnj0gW
1165,,Quintron,,,[garage punk blues],7oYKm5OvA9Z2TZZWCoqMz1
16536,,COW,,,"[alternative country, classic canadian rock, c...",3CYSRCHfilgR8DSbkCMp5j
12527,,Nebe-Quartett,,,[vaudeville],06sCDDkD5MMazN3juL8XHq


In [331]:
combined['Key_words'] = ""
combined['Key_words'] = combined['Key_words'].astype(object)

for index, row in combined.iterrows():
    genres = ' '.join(row['genres'])
    
    # instantiating Rake, by default it uses english stopwords from NLTK
    # and discards all puntuation characters as well
    r = Rake()

    # extracting the words by passing the text
    r.extract_keywords_from_text(genres)

    # getting the dictionary whith key words as keys and their scores as values
    key_words_dict_scores = r.get_word_degrees()
    
    # assigning the key words to the new column for the corresponding artist
    combined.at[index, 'Key_words'] = ' '.join(list(key_words_dict_scores.keys()))

combined.set_index('artistName', inplace = True)
combined

Unnamed: 0_level_0,endTime,trackName,msPlayed,genres,artistId,Key_words
artistName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Converge,2019-11-12 22:34,Hell to Pay,271773.0,"[boston hardcore, chaotic hardcore, mathcore, ...",7kHzfxMLtVHHb523s43rY1,hardcore screamo doom metal post boston chaoti...
IDLES,2020-09-29 17:34,Kill Them With Kindness,229579.0,"[bristol indie, modern alternative rock, moder...",75mafsNqNE1WSEVxIKuY5C,bristol indie modern alternative rock
RXYZYXR,2020-04-30 18:35,Polar Knights (Instrumental),294028.0,"[djent, instrumental progressive metal, thall]",4KScLR4NpZbGiXzj9FSA85,djent instrumental progressive metal thall
The Human Abstract,2020-07-01 16:31,Digital Veil,210360.0,"[deathcore, djent, melodic metalcore, progress...",2SDGIFzEh9xmE5zDKcMRkj,deathcore djent melodic metalcore progressive
Kid Cudi,2020-06-11 15:19,Flight At First Sight/Advanced,116948.0,"[hip hop, ohio hip hop, rap]",0fA0VVWsXO9YnASrzqfmYu,hip hop ohio rap
...,...,...,...,...,...,...
Dark Time Sunshine,,,,"[abstract hip hop, seattle hip hop, undergroun...",5cfC9eCiLaXJJFqJRnj0gW,abstract hip hop seattle underground rap
Quintron,,,,[garage punk blues],7oYKm5OvA9Z2TZZWCoqMz1,garage punk blues
COW,,,,"[alternative country, classic canadian rock, c...",3CYSRCHfilgR8DSbkCMp5j,alternative country classic canadian rock folk...
Nebe-Quartett,,,,[vaudeville],06sCDDkD5MMazN3juL8XHq,vaudeville


In [332]:
count = CountVectorizer()
count_matrix = count.fit_transform(combined['Key_words'])

# generating the cosine similarity matrix
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [354]:
def formulate_bottom_two_artists(df, score_series, my_music_artists):
    recommended_artists = []
    i = 0
    list_df = list(df.index)
    while len(recommended_artists) < 2:
        artist_idx = score_series.index[i]
        artist = list_df[artist_idx]
        artistId = list(df.loc[[artist]]["artistId"])[0]
        
        # Make sure to not recommend artists that are part of my_music
        if artist not in my_music_artists:
            recommended_artists.append({"name": artist, "spotifyId": artistId})
        i += 1
    return recommended_artists

# Given artist name, and dataframes, return the bottom two artists to recommend
def recommend_artists(df, artist_name, cosine_sim, my_music_artists):
    
    # creating a Series for the movie titles so they are associated to an ordered numerical
    # list I will use in the function to match the indexes
    indices = pd.Series(df.index)
    
    # initializing the empty list of recommended movies
    recommended_artists = []
    
    # gettin the index of the artist_name that matches the one provided as input
    idx = indices[indices == artist_name].index[0]

    # creating a Series with the similarity scores in ascending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = True)

    return formulate_bottom_two_artists(df, score_series, my_music_artists)
    

my_music_artists = list(my_music["artistName"])
recommend_artists(combined, 'JPEGMAFIA', cosine_sim, my_music_artists)

[{'name': 'Samara Lubelski', 'spotifyId': '7m0vo8YcqFNXFOoKycpOES'},
 {'name': 'Jawbone', 'spotifyId': '0p2DpCY7eufSqZpW1mAZij'}]

In [358]:
recommendation_artists = set()
recommendations = list()
sample = my_music.sample(20)
for index, row in sample.iterrows():
    recommended = recommend_artists(combined, row["artistName"], cosine_sim, my_music_artists)
    for item in recommended:
        if item["name"] not in recommendation_artists:
            recommendations.append(item)
            recommendation_artists.add(item["name"])
recommendations 

[{'name': 'Jawbone', 'spotifyId': '0p2DpCY7eufSqZpW1mAZij'},
 {'name': 'And.', 'spotifyId': '3jK9MiCrA42lLAdMGUZpwa'},
 {'name': 'Koban', 'spotifyId': '1q7T9rFQ2a2ukA1PU51fo3'},
 {'name': 'GV', 'spotifyId': '14jz3w0nEENe6ewnpSqRvf'},
 {'name': 'angil', 'spotifyId': '3QVolfxko2UyCOtexhVTli'},
 {'name': 'K3N', 'spotifyId': '5K4W6rqBFWDnAN6FQUkS6x'},
 {'name': 'Failure', 'spotifyId': '3grvcGPaLhfrD5CYsecr4j'},
 {'name': 'Chamber Music Society of Lincoln Center',
  'spotifyId': '7D5qegbCcQwEnPuRVOlB62'},
 {'name': 'Organoid', 'spotifyId': '0FjF4Knsp7DEVujXASJBdO'},
 {'name': 'ZMG', 'spotifyId': '6tuPdaFPIytg3l2f51L7Hw'},
 {'name': 'blithe field', 'spotifyId': '4effiSDHnV1sU1FaLSWIRS'},
 {'name': 'The Blend', 'spotifyId': '6SieHXbnzLxi9SVCO1Iv8P'},
 {'name': 'Cursillistas', 'spotifyId': '24HffIPc0fRXaYOzQhIMuc'},
 {'name': 'Wavves', 'spotifyId': '6bUJpbekaIlq2fT5FMV2mQ'},
 {'name': 'Shogun', 'spotifyId': '6diiyZ15AVZoSxzAyjIYJT'},
 {'name': '회사AUTO', 'spotifyId': '3rWJqI55ysYgFzbR5ZhX5s'},


In [385]:
def get_spotify_top_track(artist_id):
    url = 'https://api.spotify.com/v1/artists/{}/top-tracks'.format(artist_id)
    params = {'market': 'US'}
    r = requests.get(url, 
                     headers={"Authorization": "Bearer {}".format(spotify_auth)},
                     params=params)
    if r.status_code == 401:
        raise Exception(r.json())
    return r.json()


Find the top track of each recommended artist and save the spotify track id

In [386]:
list_of_top_track_ids = []
for item in recommendations:
    list_of_top_track_ids.append(get_spotify_top_track(item["spotifyId"])["tracks"][0]["uri"])
    
list_of_top_track_ids

['spotify:track:34UOSXZVpd5oRnCeUHvAPN',
 'spotify:track:2IFFKj9orAsQOOS0JRhHAW',
 'spotify:track:70pkwEEG7nDTXBFqoGQkH0',
 'spotify:track:35q7X4tARttH2r0ByoJOJt',
 'spotify:track:3F1P0QzdXtBz0MXy7KIO5w',
 'spotify:track:4fzsfWzRhPawzqhX8Qt9F3',
 'spotify:track:50iBJRkGjjht3RiwD0VqLA',
 'spotify:track:0hCbtOo9aFH3RDybCUi07Z',
 'spotify:track:7sfNMu2SJCHE7Tf1WHz9cg',
 'spotify:track:5lN3gM5Q6rcHzkY6TFAslJ',
 'spotify:track:1ifq1Ig1498noPWG2IMOVe',
 'spotify:track:0IY0VdkmEf4oQQPS4GmPGN',
 'spotify:track:6KxZIG1dny5wX701ZhMZEh',
 'spotify:track:6sChW7Z0IwSvvBR6m4dMws',
 'spotify:track:6lOnmMUBxANDriVh2kQqkG',
 'spotify:track:68Lzr6nZ7V2JQKaFH2wCuS',
 'spotify:track:6uYM1xsg69mzynbikvv5Rq',
 'spotify:track:1PRBkBQmQZAlGMJ6KvxG4A',
 'spotify:track:7tHo8zOzTRa3tAPqdNPmH8',
 'spotify:track:1hE4ybk6fEKqYRDeLpwpLt',
 'spotify:track:4U8xPuXwq9MpBqgrprIEg5',
 'spotify:track:1vcaDCPU3B9UZFxAF9NeVN',
 'spotify:track:1LfGP4l3u94cu57cR4khzq',
 'spotify:track:1x6xY7mgpXFRnY8mPgXbx9',
 'spotify:track:

In [377]:
def create_spotify_playlist():
    url = 'https://api.spotify.com/v1/me'
    r = requests.get(url, 
                     headers={"Authorization": "Bearer {}".format(spotify_auth)},
                     params=params)
    if r.status_code == 401:
        raise Exception(r.json())
    user_id = r.json()["id"]
    
    url = "https://api.spotify.com/v1/users/{}/playlists".format(user_id)
    body = {"name": "Recommendation Engine Created",
            "description": "Some music I'm sure I won't enjoy",
            "public": "true"}
    r = requests.post(url,
                      headers={"Authorization": "Bearer {}".format(spotify_auth)},
                      json=body)
    
    return r.json()

In [None]:
create_spotify_playlist()

In [379]:
playlist_id = "4TSQ7pxlk2couFmdV3VJME"

In [392]:
def add_track_to_playlist(playlist_id, tracklist):
    body = json.dumps({"uris": tracklist})       
    
    url = "https://api.spotify.com/v1/playlists/{}/tracks".format(playlist_id)

    r = requests.post(url,
                      headers={"Authorization": "Bearer {}".format(spotify_auth)},
                      data=body)
    
    return r.json()

In [393]:
add_track_to_playlist(playlist_id, list_of_top_track_ids)

{'snapshot_id': 'MywzNmFhZDhmNjAxNjQ0Mzg0ZTU3NmZjYjVjOTJiMWUwZjEzYzU5Yzkz'}