In [1]:
# import libraries
import requests, base64, webbrowser, pandas as pd, json, spacy
from urllib.parse import urlencode
from sklearn.metrics.pairwise import cosine_similarity
from bs4 import BeautifulSoup
from nrclex import NRCLex

In [2]:
# client info
client_id = "1471ff020c8b44a69882fcb1729aac26"
client_secret = "da42ff41b8614b42bc015d376c592af7"

In [4]:
# get user authorization and access code
def access_code():

    auth_headers = {
        "client_id": client_id,
        "response_type": "code",
        "redirect_uri": "http://localhost:7777/callback",
        "scope": "user-top-read user-library-read user-read-recently-played playlist-modify-private"
    }

    webbrowser.open("https://accounts.spotify.com/authorize?" + urlencode(auth_headers))

# exchange access code for access token
def access_token(code):
    
    encoded_credentials = base64.b64encode(client_id.encode() + b':' + client_secret.encode()).decode("utf-8")

    token_headers = {
        "Authorization": "Basic " + encoded_credentials,
        "Content-Type": "application/x-www-form-urlencoded"
    }

    token_data = {
        "grant_type": "authorization_code",
        "code": code,
        "redirect_uri": "http://localhost:7777/callback"
    }

    r = requests.post("https://accounts.spotify.com/api/token", data=token_data, headers=token_headers)
    token = r.json()["access_token"]

    return token

# set headers used to make requests
def headers(token):

    user_headers = {
        "Authorization": "Bearer " + token,
        "Content-Type": "application/json"
    }

    return user_headers

# normalize data betwen 0 and 1
def normalize(df):
    for column in df.columns:
        if column != "id":
            min_val = min(df[column])
            df[column] += min_val
            max_val = max(df[column])
            df[column] /= max_val
    return df

# replace all none values with mean of column
def fill(df):
    for column in df.columns:
        df[column] = df[column].fillna(df[column].mean())
    return df

# store data in dataframe and clean dataframe
def organize(data, data2):
    df1 = pd.DataFrame(data)
    df2 = pd.DataFrame(data2)

    df1 = df1.drop(columns = ["type", "uri", "track_href", "analysis_url", "duration_ms", "time_signature", "mode"])
    df2 = df2.drop("anticip", axis = 1)

    df1 = normalize(df1)
    df2 = fill(df2)

    features = pd.concat([df1, df2], axis = 1)
    features = features.set_index("id")

    return features

# collect data on user's recently played songs
def recently_played(limit, user_headers):

    user_params = {
        "limit": limit
    }
 
    recent = requests.get("https://api.spotify.com/v1/me/player/recently-played", params = user_params, headers = user_headers).json()

    data = []
    data2 = []

    for i in range(len(recent["items"])):
        song_id = recent["items"][i]["track"]["id"]
        song_info = requests.get("https://api.spotify.com/v1/tracks/" + song_id, headers = user_headers).json()
        artist_name = song_info["artists"][0]["name"]
        song_name = song_info["name"]
        data2.append(get_mood(artist_name, song_name))
        track_features = requests.get("https://api.spotify.com/v1/audio-features/" + song_id, params = user_params, headers = user_headers).json()
        data.append(track_features)

    recent_features = organize(data, data2)

    return recent_features

# get data on songs in a specific genre
def songs_in_genre(limit, user_headers, genre = "", artist = ""):
    user_params = {
        "q": f"genre%3{genre}%2520artist%3{artist}",
        "type": "track",
        "market": "US",
        "limit": limit
    }
    genre_info = requests.get("https://api.spotify.com/v1/search", params = user_params, headers = user_headers).json()

    data = []
    data2 = []

    for item in genre_info["tracks"]["items"]:
        song_id = item["id"]
        song_info = requests.get("https://api.spotify.com/v1/tracks/" + song_id, headers = user_headers).json()
        artist_name = song_info["artists"][0]["name"]
        song_name = song_info["name"]
        data2.append(get_mood(artist_name, song_name))
        track_features = requests.get("https://api.spotify.com/v1/audio-features/" + song_id, params = user_params, headers = user_headers).json()
        data.append(track_features)
    
    genre_features = organize(data, data2)

    return genre_features

# compute similarities
def similarity(features1, features2):
  
    similarities = cosine_similarity(features1, features2)
    cumulative_similarities = []

    for i in range (len(similarities)):
        cumulative_similarities.append((sum(similarities[i]), features1.index[i]))

    cumulative_similarities = sorted(cumulative_similarities, reverse = True)

    return cumulative_similarities[:10]

# make recommendations 
def recommend(cumulative_similarities, user_headers):

    recommendations = []
    for item in cumulative_similarities:
        song_info = requests.get("https://api.spotify.com/v1/tracks/" + item[1], headers = user_headers).json()
        song_features = (song_info["uri"], song_info["artists"][0]["name"], song_info["album"]["name"], song_info["name"])
        recommendations.append(song_features)
    return recommendations

# get user id
def user_info(user_headers):
    info = requests.get("https://api.spotify.com/v1/me", headers = user_headers).json()
    return info["id"]

# create a recommendation playlist
def create_playlist(user_id, name, description, user_headers):
    data = json.dumps({
    "name": name,
    "description": description,
    "public": False
})
    new_playlist= requests.post(f"https://api.spotify.com/v1/users/{user_id}/playlists", data= data, headers = user_headers).json()
    return new_playlist["id"]

# add recommendations to a playlist
def update_playlist(playlist_id, user_headers, recommendations):
    uris = [item[0] for item in recommendations]
    data = json.dumps({
        "uris": uris
    })
    updated_playlist = requests.put("https://api.spotify.com/v1/playlists/" + playlist_id + "/tracks", data = data, headers = user_headers)

# scrape lyrics from genius
def scrape_lyrics(artist_name, song_name):
    artist_name2 = str(artist_name.replace(' ','-')) if ' ' in artist_name else str(artist_name)
    song_name2 = str(song_name.replace(' ','-')) if ' ' in song_name else str(song_name)
    page = requests.get('https://genius.com/'+ artist_name2 + '-' + song_name2 + '-' + 'lyrics')
    html = BeautifulSoup(page.text, 'html.parser')
    lyrics1 = html.find("div", class_="lyrics")
    lyrics2 = html.find("div", class_="Lyrics__Container-sc-1ynbvzw-1 kUgSbL")
    if lyrics1:
        lyrics = lyrics1.get_text()
    elif lyrics2:
        lyrics = lyrics2.get_text()
    elif lyrics1 == lyrics2 == None:
        lyrics = None
    return lyrics

# attach lyrics onto data frame
def lyrics_onto_frame(df1, artist_name):
    for i,x in enumerate(df1['track']):
        test = scrape_lyrics(artist_name, x)
        df1.loc[i, 'lyrics'] = test
    return df1

# get mood of a song based on lyrics
def get_mood(artist_name, song_name):
    lyrics = scrape_lyrics(artist_name, song_name)
    if lyrics == None:
        return {'fear': None,
                'anger': None,
                'anticip': None,
                'trust': None,
                'surprise': None,
                'positive': None,
                'negative': None,
                'sadness': None,
                'disgust': None,
                'joy': None,
                'anticipation': None}

    # lemmatize lyrics
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(lyrics)
    lemmatized_tokens = [token.lemma_ for token in doc]
    lemmatized_lyrics = " ".join(lemmatized_tokens)
    emotion = NRCLex(lemmatized_lyrics)

    # get relative mood frequencies
    mood_frequencies = emotion.affect_frequencies
    return mood_frequencies

In [5]:
# authorize program to access your Spotify profile
access_code()

In [6]:
# enter access code: in the url, it's all text after 'http://localhost:7777/callback?code='
code = input("Access Code: ")

In [7]:
# exchange access code for token
token = access_token(code)

# set headers to make requests
user_headers = headers(token)

# get your user_id
user_id = user_info(user_headers)

In [8]:
# create a new recommendations playlist
name = input("Give your playlist a cool name: ")
description = input("What's your playlist about?")
playlist_id = create_playlist(user_id, name, description, user_headers)

In [9]:
# filter recommendations by genre and artist
search = songs_in_genre(30, user_headers, "hip-hop", "the weeknd")

# get info about 10 most recently played songs
recent = recently_played(10, user_headers)

  df[column] = df[column].fillna(df[column].mean())


In [13]:
search.columns

Index(['danceability', 'energy', 'key', 'loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'fear', 'anger', 'trust', 'surprise', 'positive', 'negative', 'sadness',
       'disgust', 'joy', 'anticipation'],
      dtype='object')

In [12]:
# generate recommendations and update playlist
similarities = similarity(search, recent)
recommendations = recommend(similarities, user_headers)
update_playlist(playlist_id, user_headers, recommendations)