In [1]:
from dotenv import load_dotenv
import os
import base64
from requests import post, get
import json
import datetime

load_dotenv()
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

def getToken():
    auth_string = client_id + ":" + client_secret
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = str(base64.b64encode(auth_bytes), "utf-8")

    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }

    data = {"grant_type": "client_credentials"}

    result = post(url, headers=headers, data=data)
    json_results = json.loads(result.content)
    token = json_results["access_token"]
    return token

def get_auth_header(token):
    return {"Authorization": "Bearer " + token}

def search_for_artist(token, artist_name):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    query = "?q={0}&type=artist&limit=1".format(artist_name)
    query_url = url + query
    result = get(query_url, headers=headers)
    json_result = json.loads(result.content)["artists"]["items"]
    if len(json_result) == 0:
        print("No artist with this name exists")
    
    return json_result[0]

def get_songs_by_artist(token, artist_id):
    url = "https://api.spotify.com/v1/artists/{}/top-tracks?country=US".format(artist_id)
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)["tracks"]
    return json_result

def get_songs_from_playlist(token, playlist_id):
    url = "https://api.spotify.com/v1/playlists/{}/tracks".format(playlist_id)
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)["items"]

    return json_result

def get_year_from_song(token, song_id):
    url = "https://api.spotify.com/v1/tracks/{}".format(song_id)
    headers = get_auth_header(token)
    result = get(url , headers=headers)
    json_result = json.loads(result.content)["album"]["release_date"]
    return json_result

def get_song_info_id(token, song_id):
    url = "https://api.spotify.com/v1/tracks/{}".format(song_id)
    headers = get_auth_header(token)
    result = get(url , headers=headers)
    json_result = json.loads(result.content)
    return json_result

# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

token = getToken()
get_year_from_song(token, "1rP5gAqMlm8d6UnfseuzSm")

'2020-11-13'

In [2]:
import csv
import pandas as pd
import matplotlib as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

#df = pd.read_csv("datasets/dataset.csv")
df = pd.read_csv("datasets/data.csv")


# COLUMNS:
# ['Unnamed: 0', 'track_id', 'artists', 'album_name', 'track_name',
#        'popularity', 'duration_ms', 'explicit', 'danceability', 'energy',
#        'key', 'loudness', 'mode', 'speechiness', 'acousticness',
#        'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature',
#        'track_genre']

# df = df.dropna()
# df.drop(columns="Unnamed: 0", inplace=True)
# df = df.drop_duplicates('track_id')

df = df.drop_duplicates('id')




#numerical_features = ['explicit', 'popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence']
numerical_features = ['valence', 'acousticness', 'danceability', 'energy', 'explicit', 'instrumentalness', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']

scaled_df = df
scaler = MinMaxScaler()
scaled_df[numerical_features] = scaler.fit_transform(df[numerical_features])
df.columns



Index(['valence', 'year', 'acousticness', 'artists', 'danceability',
       'duration_ms', 'energy', 'explicit', 'id', 'instrumentalness', 'key',
       'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date',
       'speechiness', 'tempo'],
      dtype='object')

In [3]:
# def generate_recommended_songs(track_name, num_recommended):
#     if track_name not in scaled_df["track_name"].values:
#         print("Song doesn't exist")
#         return

#     track_index = scaled_df.index[scaled_df["track_name"] == track_name]

#     scores = cosine_similarity(scaled_df.loc[track_index][numerical_features], scaled_df[numerical_features])

#     similar_song_indices = scores.argsort()[0][::-1][1:num_recommended + 1]

#     # Get the names of the most similar songs based on content-based filtering
#     content_based_recommendations = scaled_df.iloc[similar_song_indices][["track_name", "artists"]]
#     print(content_based_recommendations)

def generate_recommended_songs(track_name, num_recommended):
    if track_name not in scaled_df["name"].values:
        print("Song doesn't exist")
        return

    track_index = scaled_df.index[scaled_df["name"] == track_name]

    scores = cosine_similarity(scaled_df.loc[track_index][numerical_features], scaled_df[numerical_features])

    similar_song_indices = scores.argsort()[0][::-1][1:num_recommended + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = scaled_df.iloc[similar_song_indices][["name", "artists"]]
    print(content_based_recommendations)



print(generate_recommended_songs("HIGHEST IN THE ROOM", 5))


                                             name                     artists
19432                   No Guidance (feat. Drake)    ['Chris Brown', 'Drake']
92358  The Plan - From the Motion Picture "TENET"            ['Travis Scott']
18612                                   The Hills              ['The Weeknd']
18853               Come and See Me (feat. Drake)  ['PARTYNEXTDOOR', 'Drake']
75085                                       MAYBE           ['The Kid LAROI']
None


In [4]:
#get song information and recommend song
def search_for_song_id(token, song_name):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    format_song_name = ""
    for i in range(len(song_name)):
        if song_name[i] == " ":
            format_song_name += "+"
        else:
            format_song_name += song_name[i]
    query = "?q={0}&type=track&limit=1".format(format_song_name)
    query_url = url + query
    result = get(query_url, headers=headers)
    json_result = json.loads(result.content)["tracks"]["items"][0]
    return json_result

def getAudioFeatures(token, id):
    url = "https://api.spotify.com/v1/audio-features/{}".format(id)
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)
    return json_result


def getSongInfo(token, song_name):
    json_song_info = search_for_song_id(token, song_name)
    song_id = json_song_info["id"]
    final_dict = dict()
    song_artists = ""
    for i in range(len(json_song_info["artists"])):
        song_artists += json_song_info["artists"][i]['name'] + ";"
    song_artists = song_artists[0:-1]
    song_album_name = json_song_info["album"]["name"]
    song_duration = json_song_info["duration_ms"]
    song_popularity = json_song_info["popularity"]
    song_explicit = json_song_info["explicit"]

    song_audio_features = getAudioFeatures(token, song_id)
    # final_dict["track_id"] = song_id
    # final_dict["artists"] = song_artists
    # final_dict["album_name"] = song_album_name
    # final_dict["track_name"] = song_name
    # final_dict["popularity"] = song_popularity
    # final_dict["duration_ms"] = song_duration
    # final_dict["explicit"] = song_explicit
    # final_dict["danceability"] = song_audio_features["danceability"]
    # final_dict["energy"] = song_audio_features["energy"]
    # final_dict["key"] = song_audio_features["key"]
    # final_dict["loudness"] = song_audio_features["loudness"]
    # final_dict["mode"] = song_audio_features["mode"]
    # final_dict["speechiness"] = song_audio_features["speechiness"]
    # final_dict["acousticness"] = song_audio_features["acousticness"]
    # final_dict["instrumentalness"] = song_audio_features["instrumentalness"]
    # final_dict["liveness"] = song_audio_features["liveness"]
    # final_dict["valence"] = song_audio_features["valence"]
    # final_dict["tempo"] = song_audio_features["tempo"]
    # final_dict["time_signature"] = ""
    # final_dict["track_genre"] = ""
    
    # ['valence', 'year', 'acousticness', 'artists', 'danceability',
    #    'duration_ms', 'energy', 'explicit', 'id', 'instrumentalness', 'key',
    #    'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date',
    #    'speechiness', 'tempo']
    
    final_dict["valence"] = song_audio_features["valence"]
    final_dict["year"] = get_year_from_song(token, song_id)[0:4]
    final_dict["acousticness"] = song_audio_features["acousticness"]
    final_dict["artists"] = song_artists
    final_dict["danceability"] = song_audio_features["danceability"]
    final_dict["duration_ms"] = song_duration
    final_dict["energy"] = song_audio_features["energy"]
    final_dict["explicit"] = song_explicit
    final_dict["id"] = song_id
    final_dict["instrumentalness"] = song_audio_features["instrumentalness"]
    final_dict["key"] = song_audio_features["key"]
    final_dict["liveness"] = song_audio_features["liveness"]
    final_dict["loudness"] = song_audio_features["loudness"]
    final_dict["mode"] = song_audio_features["mode"]
    final_dict["name"] = json_song_info["name"]
    final_dict["popularity"] = song_popularity
    final_dict["release_date"] = get_year_from_song(token, song_id)
    final_dict["speechiness"] = song_audio_features["speechiness"]
    final_dict["tempo"] = song_audio_features["tempo"]

    # song_df = pd.DataFrame(final_dict, index = [0])
    # return song_df
    return final_dict

token = getToken()
print(getSongInfo(token, "HIGHEST IN THE ROOM"))
    
    

{'valence': 0.0605, 'year': '2019', 'acousticness': 0.0546, 'artists': 'Travis Scott', 'danceability': 0.598, 'duration_ms': 175720, 'energy': 0.427, 'explicit': True, 'id': '3eekarcy7kvN4yt5ZFzltW', 'instrumentalness': 5.83e-06, 'key': 7, 'liveness': 0.21, 'loudness': -8.764, 'mode': 0, 'name': 'HIGHEST IN THE ROOM', 'popularity': 80, 'release_date': '2019-10-04', 'speechiness': 0.0317, 'tempo': 76.469}


In [5]:
# def recommendSongs(token, track_name, num_recommended):
#     new_df = df
#     if track_name not in df["track_name"].values:
#         song_df = getSongInfo(token, track_name)
#         new_df.loc[-1] = song_df
    
#     scaler = MinMaxScaler()
#     scaled_df = new_df
#     scaled_df[numerical_features] = scaler.fit_transform(df[numerical_features])
    

#     track_index = scaled_df.index[scaled_df["track_name"] == track_name]

#     print(scaled_df.loc[track_index])


#     scores = cosine_similarity(scaled_df.loc[track_index][numerical_features], scaled_df[numerical_features])

#     similar_song_indices = scores.argsort()[0][::-1][1:num_recommended + 1]

#     # Get the names of the most similar songs based on content-based filtering
#     content_based_recommendations = scaled_df.iloc[similar_song_indices][["track_name", "artists"]]

#     return content_based_recommendations
def recommendSongs(token, track_name, num_recommended):
    new_df = df.copy()
    if getSongInfo(token, track_name)["name"] not in new_df["name"].values:
        song_df = getSongInfo(token, track_name)
        new_df.loc[-1] = song_df
        #print("hello")

    
    scaler = MinMaxScaler()
    scaled_df = new_df
    scaled_df[numerical_features] = scaler.fit_transform(new_df[numerical_features])
    
    track_index = scaled_df.index[scaled_df["name"] == getSongInfo(token, track_name)["name"]]


    scores = cosine_similarity(scaled_df.loc[track_index][numerical_features], scaled_df[numerical_features])

    similar_song_indices = scores.argsort()[0][::-1][1:num_recommended + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = scaled_df.iloc[similar_song_indices][["name", "artists"]]

    return content_based_recommendations

print(recommendSongs(token, "Up  Nav", 5))    

                                      name                   artists
80551                       Long, Long Ago             ['Sam Cooke']
80747                                Sandy                  ['Dion']
112029  Circle of Fourths - Alternate Take        ['Duke Ellington']
112862          El Ultimo Adios A Mi Madre                ['Ramito']
5640                     El Caballo Blanco  ['José Alfredo Jimenez']


In [6]:
def generate_playlist(token, playlist_id):
    json_songs = get_songs_from_playlist(token, playlist_id)
    tracks = []
    for i in json_songs:
        if getSongInfo(token, i["track"]["name"])["name"] in df["name"].values:
            track_index = df.index[df["name"] == i["track"]["name"]]
            tracks.append(df.loc[track_index])
        else:
            song_dict = getSongInfo(token, str(i["track"]["name"]) + " " + str(i["track"]["artists"][0]["name"]))
            song_df = pd.DataFrame(song_dict, index=[0])
            tracks.append(song_df)
    
    list_recommended_songs = []
    for i in range(len(tracks)):
        current_song_df = tracks[i]
        list_recommended_songs.append(recommendSongs(token, current_song_df.iloc[0]["name"], 1))

    return list_recommended_songs
    


    
print(generate_playlist(token, "1bZNUAiWc34L35puG3ppKF"))
#1bZNUAiWc34L35puG3ppKF


[              name          artists
19625  Tyler Herro  ['Jack Harlow'],                            name                       artists
108319  High (feat. Elton John)  ['Young Thug', 'Elton John'],             name          artists
18844  Gassed Up  ['Nebu Kiniza'],                           name             artists
74722  Off Deez (with J. Cole)  ['JID', 'J. Cole'],              name       artists
164286  Flashback  ['Ministry']]
