In [146]:
from dotenv import load_dotenv
import os
import base64
from requests import post, get
import json

load_dotenv()

client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET") 

def get_token():
    auth_string = client_id + ":" + client_secret
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = str(base64.b64encode(auth_bytes), "utf-8")

    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {"grant_type": "client_credentials"}
    result = post(url, headers=headers, data=data)
    json_result = json.loads(result.content)
    token = json_result["access_token"]
    return token

def get_auth_header(token):
    return {"Authorization": "Bearer " + token}

def search_for_artist(token, artist_name):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    query = f"?q={artist_name}&type=artist&limit=1"

    query_url = url + query
    result = get(query_url, headers=headers)
    json_result = json.loads(result.content)["artists"]["items"]
    if len(json_result) == 0:
        print("No artist with this name exists...")
        return None
    
    return json_result[0]

def get_songs_by_artist(token, artist_id):
    url = f"https://api.spotify.com/v1/artists/{artist_id}/top-tracks?country=CA"
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)["tracks"]
    return json_result

def get_markets(token):
    url = "https://api.spotify.com/v1/markets"
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)
    return json_result


In [147]:
token = get_token()
result = search_for_artist(token, "Of Monsters and Men")
artist_id = result["id"]
songs = get_songs_by_artist(token, artist_id)
for idx, song in enumerate(songs):
    print(f"{idx + 1}. {song['name']}")
markets = get_markets(token)

1. Little Talks
2. Dirty Paws
3. Mountain Sound
4. King And Lionheart
5. Crystals
6. Love Love Love
7. Alligator
8. Wolves Without Teeth
9. Human
10. Empire


### PLAN
**Problem:** Given a song, recommend 5 new songs.<br><br>
**Solution:**
1. Build and train the model
    1. Use the Spotify Web API's *Get Tracks' Audio Features* to get the features of tracks.
    2. Perform exploratory data analysis to find general patterns, missing values, identify outliers, etc.
    3. Perform feature engineering to remove irrelevant data, combine data, and mutate data.
    4. Use Spotify Web API's *Get Recommendations* as labeled data.
    5. Train the model.<br><br>
2. Test the model
    1. Input a song.
    2. Use the Spotify Web API to get the track's audio features.
    2. Check the proximity of model's recommended songs vs Spotify's recommended songs.<br><br>
3. Use the model
    1. Input a song.
    2. Enjoy the 5 recommended songs :)

In [148]:
# load in the data and display head
all_song_data = pd.read_csv("spotify_data.csv")
all_song_data.head()

Unnamed: 0.1,Unnamed: 0,artist_name,track_name,track_id,popularity,year,genre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0,Jason Mraz,I Won't Give Up,53QF56cjZA9RTuuMZDrSA6,68,2012,acoustic,0.483,0.303,4,-10.058,1,0.0429,0.694,0.0,0.115,0.139,133.406,240166,3
1,1,Jason Mraz,93 Million Miles,1s8tP3jP4GZcyHDsjvw218,50,2012,acoustic,0.572,0.454,3,-10.286,1,0.0258,0.477,1.4e-05,0.0974,0.515,140.182,216387,4
2,2,Joshua Hyslop,Do Not Let Me Go,7BRCa8MPiyuvr2VU3O9W0F,57,2012,acoustic,0.409,0.234,3,-13.711,1,0.0323,0.338,5e-05,0.0895,0.145,139.832,158960,4
3,3,Boyce Avenue,Fast Car,63wsZUhUZLlh1OsyrZq7sz,58,2012,acoustic,0.392,0.251,10,-9.845,1,0.0363,0.807,0.0,0.0797,0.508,204.961,304293,4
4,4,Andrew Belle,Sky's Still Blue,6nXIYClvJAfi6ujLiKqEq8,54,2012,acoustic,0.43,0.791,6,-5.419,0,0.0302,0.0726,0.0193,0.11,0.217,171.864,244320,4


In [149]:
# drop the first column since it is just the index
all_song_data = all_song_data.drop(all_song_data.columns[0], axis=1)
all_song_data.head()

Unnamed: 0,artist_name,track_name,track_id,popularity,year,genre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Jason Mraz,I Won't Give Up,53QF56cjZA9RTuuMZDrSA6,68,2012,acoustic,0.483,0.303,4,-10.058,1,0.0429,0.694,0.0,0.115,0.139,133.406,240166,3
1,Jason Mraz,93 Million Miles,1s8tP3jP4GZcyHDsjvw218,50,2012,acoustic,0.572,0.454,3,-10.286,1,0.0258,0.477,1.4e-05,0.0974,0.515,140.182,216387,4
2,Joshua Hyslop,Do Not Let Me Go,7BRCa8MPiyuvr2VU3O9W0F,57,2012,acoustic,0.409,0.234,3,-13.711,1,0.0323,0.338,5e-05,0.0895,0.145,139.832,158960,4
3,Boyce Avenue,Fast Car,63wsZUhUZLlh1OsyrZq7sz,58,2012,acoustic,0.392,0.251,10,-9.845,1,0.0363,0.807,0.0,0.0797,0.508,204.961,304293,4
4,Andrew Belle,Sky's Still Blue,6nXIYClvJAfi6ujLiKqEq8,54,2012,acoustic,0.43,0.791,6,-5.419,0,0.0302,0.0726,0.0193,0.11,0.217,171.864,244320,4


In [150]:
# check genres in spotify api
def get_genres(token):
    url = "https://api.spotify.com/v1/recommendations/available-genre-seeds"
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)
    return json_result

# variables with the genres from kaggle and genres from spotify
spotify_genres = get_genres(token)['genres']
kaggle_genres = list(all_song_data['genre'].unique())

difference = list(set(kaggle_genres) - set(spotify_genres))
print("Genres in Kaggle dataset but not in Spotify dataset:", len(difference))
difference2 = list(set(spotify_genres) - set(kaggle_genres))
print("Genres in Spotify dataset but not in Kaggle dataset:", len(difference2))

Genres in Kaggle dataset but not in Spotify dataset: 0
Genres in Spotify dataset but not in Kaggle dataset: 44


In [151]:
# get the y values so that we can test the data

def get_recommendations(token, seed_tracks):
    url = f"https://api.spotify.com/v1/recommendations?market=CA&limit=2&seed_tracks={seed_tracks}"
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    json_result = json.loads(result.content)
    return json_result
    
def search_for_track(token, track_name, artist_name):
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)
    query = f"?q={track_name}{artist_name}&type=track&limit=1"

    query_url = url + query
    result = get(query_url, headers=headers)
    json_result = json.loads(result.content)["tracks"]["items"]
    if len(json_result) == 0:
        print("No track with this name exists...")
        return None
    
    return json_result[0]

track = search_for_track(token, "better off", "jeremy zucker")
print(track['name'])
print(track['popularity'])

# now we want to get Spotify's recommendation for all 1 million tracks in the dataset
recommendations = get_recommendations(token, "7bdYxWPCs46dQ0XLwySOyv")
rec_tracks = recommendations['tracks']
for track in rec_tracks:
    print(track['name'], "- ", end="")
    artist_string = ""
    for artist in track['artists']:
        artist_string = artist_string + artist['name'] +", "
    print(artist_string[:-2])

# split data into train data and test data
from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split()

better off
65
Older - Shallou, Daya
Be My Mistake - The 1975
