In [183]:
from dotenv import load_dotenv
import os
import base64
from requests import post, get
import json


load_dotenv()
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

def getToken():
    auth_string = client_id + ":" + client_secret
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = str(base64.b64encode(auth_bytes), "utf-8")

    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }

    data = {"grant_type": "client_credentials"}

    result = post(url, headers=headers, data=data)
    json_results = json.loads(result.content)
    token = json_results["access_token"]
    return token

def get_auth_header(token):
    return {"Authorization": "Bearer " + token}

def search_for_artist(token, artist_name):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    query = "?q={0}&type=artist&limit=1".format(artist_name)
    query_url = url + query
    result = get(query_url, headers=headers)
    json_result = json.loads(result.content)["artists"]["items"]
    if len(json_result) == 0:
        print("No artist with this name exists")
    
    return json_result[0]

def get_songs_by_artist(token, artist_id):
    url = "https://api.spotify.com/v1/artists/{}/top-tracks?country=US".format(artist_id)
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)["tracks"]
    return json_result

def get_songs_from_playlist(token, playlist_id):
    url = "https://api.spotify.com/v1/playlists/{}/tracks".format(playlist_id)
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)["items"]

    for idx, song in enumerate(json_result):
        print("{0}. {1}".format(idx+1, song["track"]["name"]))

def get_year_from_song(token, song_id):
    url = "https://api.spotify.com/v1/tracks/{}".format(song_id)
    headers = get_auth_header(token)
    result = get(url , headers=headers)
    json_result = json.loads(result.content)["album"]["release_date"]
    print(json_result)

token = getToken()
get_year_from_song(token, "1rP5gAqMlm8d6UnfseuzSm")

2020-11-13


In [177]:
import csv
import pandas as pd
import matplotlib as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv("datasets/dataset.csv")


# COLUMNS:
# ['Unnamed: 0', 'track_id', 'artists', 'album_name', 'track_name',
#        'popularity', 'duration_ms', 'explicit', 'danceability', 'energy',
#        'key', 'loudness', 'mode', 'speechiness', 'acousticness',
#        'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature',
#        'track_genre']

df = df.dropna()
df.drop(columns="Unnamed: 0", inplace=True)
df = df.drop_duplicates('track_id')

numerical_features = ['explicit', 'popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence']

scaled_df = df
scaler = MinMaxScaler()
scaled_df[numerical_features] = scaler.fit_transform(df[numerical_features])
df.columns



Index(['track_id', 'artists', 'album_name', 'track_name', 'popularity',
       'duration_ms', 'explicit', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'time_signature', 'track_genre'],
      dtype='object')

In [178]:
def generate_recommended_songs(track_name, num_recommended):
    if track_name not in scaled_df["track_name"].values:
        print("Song doesn't exist")
        return

    track_index = scaled_df.index[scaled_df["track_name"] == track_name]

    scores = cosine_similarity(scaled_df.loc[track_index][numerical_features], scaled_df[numerical_features])

    similar_song_indices = scores.argsort()[0][::-1][1:num_recommended + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = scaled_df.iloc[similar_song_indices][["track_name", "artists"]]
    print(content_based_recommendations)



print(generate_recommended_songs("HIGHEST IN THE ROOM", 5))


                      track_name            artists
2053                       Creep          Radiohead
53301                    5% TINT       Travis Scott
33167                   16 Lines           Lil Peep
81450                  The Hills         The Weeknd
20400  No Guidance (feat. Drake)  Chris Brown;Drake
None


In [179]:
#get song information and recommend song

def search_for_song_id(token, song_name):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    format_song_name = ""
    for i in range(len(song_name)):
        if song_name[i] == " ":
            format_song_name += "+"
        else:
            format_song_name += song_name[i]
    query = "?q={0}&type=track&limit=1".format(format_song_name)
    query_url = url + query
    result = get(query_url, headers=headers)
    json_result = json.loads(result.content)["tracks"]["items"][0]
    return json_result

def getAudioFeatures(token, id):
    url = "https://api.spotify.com/v1/audio-features/{}".format(id)
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)
    return json_result


def getSongInfo(token, song_name):
    json_song_info = search_for_song_id(token, song_name)
    song_id = json_song_info["id"]
    final_dict = dict()
    song_artists = ""
    for i in range(len(json_song_info["artists"])):
        song_artists += json_song_info["artists"][i]['name'] + ";"
    song_artists = song_artists[0:-1]
    song_album_name = json_song_info["album"]["name"]
    song_duration = json_song_info["duration_ms"]
    song_popularity = json_song_info["popularity"]
    song_explicit = json_song_info["explicit"]

    song_audio_features = getAudioFeatures(token, song_id)
    final_dict["track_id"] = song_id
    final_dict["artists"] = song_artists
    final_dict["album_name"] = song_album_name
    final_dict["track_name"] = song_name
    final_dict["popularity"] = song_popularity
    final_dict["duration_ms"] = song_duration
    final_dict["explicit"] = song_explicit
    final_dict["danceability"] = song_audio_features["danceability"]
    final_dict["energy"] = song_audio_features["energy"]
    final_dict["key"] = song_audio_features["key"]
    final_dict["loudness"] = song_audio_features["loudness"]
    final_dict["mode"] = song_audio_features["mode"]
    final_dict["speechiness"] = song_audio_features["speechiness"]
    final_dict["acousticness"] = song_audio_features["acousticness"]
    final_dict["instrumentalness"] = song_audio_features["instrumentalness"]
    final_dict["liveness"] = song_audio_features["liveness"]
    final_dict["valence"] = song_audio_features["valence"]
    final_dict["tempo"] = song_audio_features["tempo"]
    final_dict["time_signature"] = ""
    final_dict["track_genre"] = ""

    # song_df = pd.DataFrame(final_dict, index = [0])
    # return song_df
    return final_dict

token = getToken()
print(getSongInfo(token, "HIGHEST IN THE ROOM"))
    
    

{'track_id': '3eekarcy7kvN4yt5ZFzltW', 'artists': 'Travis Scott', 'album_name': 'HIGHEST IN THE ROOM', 'track_name': 'HIGHEST IN THE ROOM', 'popularity': 84, 'duration_ms': 175720, 'explicit': True, 'danceability': 0.598, 'energy': 0.427, 'key': 7, 'loudness': -8.764, 'mode': 0, 'speechiness': 0.0317, 'acousticness': 0.0546, 'instrumentalness': 5.83e-06, 'liveness': 0.21, 'valence': 0.0605, 'tempo': 76.469, 'time_signature': '', 'track_genre': ''}


In [180]:
def recommendSongs(token, track_name, num_recommended):
    new_df = df
    if track_name not in df["track_name"].values:
        song_df = getSongInfo(token, track_name)
        new_df.loc[-1] = song_df
    
    scaler = MinMaxScaler()
    scaled_df = new_df
    scaled_df[numerical_features] = scaler.fit_transform(df[numerical_features])
    

    track_index = scaled_df.index[scaled_df["track_name"] == track_name]

    print(scaled_df.loc[track_index])


    scores = cosine_similarity(scaled_df.loc[track_index][numerical_features], scaled_df[numerical_features])

    similar_song_indices = scores.argsort()[0][::-1][1:num_recommended + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = scaled_df.iloc[similar_song_indices][["track_name", "artists"]]

    return content_based_recommendations

print(recommendSongs(token, "Erase the Social", 5))    

                  track_id       artists            album_name  \
-1  4uhvMW7ly7tJil31YYscAN  Lil Uzi Vert  The Perfect LUV Tape   

          track_name  popularity  duration_ms  explicit  danceability  energy  \
-1  Erase the Social         1.0       199993       1.0         0.705   0.609   

    key  loudness  mode  speechiness  acousticness  instrumentalness  \
-1    3       0.0     1       0.0729         0.694               0.0   

    liveness  valence    tempo time_signature track_genre  
-1     0.234    0.718  140.058                             
                                track_name              artists
106284                         Sju idioter         Eddie Meduza
18699         If It Flies, Floats or Fucks  Kevin Bloody Wilson
15592   Everybody Dies in Their Nightmares              Danny G
69452                           Kolachathi      Dabzee;ANXZIETY
18553                             Classico          Tenacious D
