# Importing the libraries

In [1]:
import os
import pandas as pd
import numpy as np

import spotipy
from spotipy.oauth2 import SpotifyOAuth,SpotifyClientCredentials
import yaml
import re
from tqdm import tqdm
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

In [2]:
stream= open("spotify/spotify.yaml")
spotify_details = yaml.safe_load(stream)
auth_manager = SpotifyClientCredentials(client_id=spotify_details['Client_id'],
                                        client_secret=spotify_details['client_secret'])
sp = spotipy.client.Spotify(auth_manager=auth_manager)

# Importing the dataset

In [3]:
dtypes = {'track_uri': 'object', 'artist_uri': 'object', 'album_uri': 'object', 'danceability': 'float16', 'energy': 'float16', 'key': 'float16',
               'loudness': 'float16', 'mode': 'float16', 'speechiness': 'float16', 'acousticness': 'float16', 'instrumentalness': 'float16',
               'liveness': 'float16', 'valence': 'float16', 'tempo': 'float16', 'duration_ms': 'float32', 'time_signature': 'float16',
               'Track_release_date': 'int8', 'Track_pop': 'int8', 'Artist_pop': 'int8', 'genres': 'object'}
try:
    df=pd.read_csv('data/final_processed_data.csv',dtype=dtypes)
except:
    print('Failed to load dataset')
    df=pd.read_csv('data/final_processed_data.csv',dtype=dtypes)


In [25]:
df.rename(columns = {'genres':'Artist_genres'}, inplace = True)

In [26]:
df.head()
df.dtypes

track_uri              object
artist_uri             object
album_uri              object
danceability          float16
energy                float16
key                   float16
loudness              float16
mode                  float16
speechiness           float16
acousticness          float16
instrumentalness      float16
liveness              float16
valence               float16
tempo                 float16
duration_ms           float32
time_signature        float16
Artist_genres          object
Track_pop                int8
Artist_pop               int8
Track_release_date       int8
dtype: object

# Test

Extract playlist tracks and artist uri

In [9]:
user_det=sp.user("316yiwiyxgcdn5m5eurpnlc4dcda")

In [10]:
user_det

{'display_name': 'Mayank Shetty',
 'external_urls': {'spotify': 'https://open.spotify.com/user/316yiwiyxgcdn5m5eurpnlc4dcda'},
 'href': 'https://api.spotify.com/v1/users/316yiwiyxgcdn5m5eurpnlc4dcda',
 'id': '316yiwiyxgcdn5m5eurpnlc4dcda',
 'images': [{'url': 'https://platform-lookaside.fbsbx.com/platform/profilepic/?asid=1286400288088541&height=50&width=50&ext=1710994938&hash=AfpbjC1hCPr57EbdSXgjFt2jU7A6cxjJ_4QldqyQomDPyA',
   'height': 64,
   'width': 64},
  {'url': 'https://platform-lookaside.fbsbx.com/platform/profilepic/?asid=1286400288088541&height=300&width=300&ext=1710994938&hash=AfpwecPRNcUd9XnG9VHgNRFnigE_JKED227TWHClA13GVg',
   'height': 300,
   'width': 300}],
 'type': 'user',
 'uri': 'spotify:user:316yiwiyxgcdn5m5eurpnlc4dcda',
 'followers': {'href': None, 'total': 7}}

In [3]:
def get_IDs (user, playlist_id):
 track_ids = []
 artist_id = []
 playlist=sp.user_playlist (user, playlist_id)
 for item in playlist['tracks']['items']:
  track=item['track']
  track_ids.append(track['id'])
  artist=item['track']['artists']
  artist_id.append(artist[0]['id'])
 return track_ids,artist_id


track_ids,artist_id = get_IDs ('316yiwiyxgcdn5m5eurpnlc4dcda', '37i9dQZF1DXaQm3ZVg9Z2X') 
print (len(track_ids))
print (len(artist_id))

50
50


Getting uri using the spotipy api and preprocessing

In [4]:
artist_id_uni=list(set(artist_id))
track_ids_uni=list(set(track_ids))

In [20]:
audio_features=pd.DataFrame()
for i in tqdm(range(0,len(track_ids_uni),25)):
    try:
     track_feature = sp.audio_features(track_ids_uni[i:i+25])
     track_df = pd.DataFrame(track_feature)
     audio_features=pd.concat([audio_features,track_df],axis=0)
    except Exception as e:
        print(e)
        continue

100%|██████████| 2/2 [00:00<00:00,  6.28it/s]


In [21]:
track_=pd.DataFrame()
for i in tqdm(range(0,len(track_ids_uni),25)):
    try:
        track_features = sp.tracks(track_ids_uni[i:i+25])
        for x in range(len(track_features['tracks'])):
            track_pop=pd.DataFrame([track_ids_uni[i+x]],columns=['Track_uri'])
            track_pop['Track_release_date']=track_features['tracks'][x]['album']['release_date']
            track_pop['Track_pop'] = track_features['tracks'][x]["popularity"]
            track_pop['Artist_uri']=track_features['tracks'][x]['artists'][0]['id']
            track_pop['Album_uri']=track_features['tracks'][x]['album']['id']
            track_=pd.concat([track_,track_pop],axis=0)
    except Exception as e:
        print(e)
        continue

100%|██████████| 2/2 [00:00<00:00,  2.44it/s]


In [22]:
artist_=pd.DataFrame()
for i in tqdm(range(0,len(artist_id_uni),25)):
    try:
        artist_features = sp.artists(artist_id_uni[i:i+25])
        for x in range(len(artist_features['artists'])):
            artist_df=pd.DataFrame([artist_id_uni[i+x]],columns=['Artist_uri'])
            artist_pop = artist_features['artists'][x]["popularity"]
            artist_genres = artist_features['artists'][x]["genres"]
            artist_df["Artist_pop"] = artist_pop
            if artist_genres: 
                artist_df["genres"] = " ".join([re.sub(' ','_',i) for i in artist_genres])
            else:
              artist_df["genres"] = "unknown"
            artist_=pd.concat([artist_,artist_df],axis=0)
    except Exception as e:
        print(e)
        continue

100%|██████████| 1/1 [00:00<00:00,  4.72it/s]


In [23]:
test=pd.DataFrame(track_,columns=['Track_uri','Artist_uri','Album_uri'])

In [24]:
test.rename(columns = {'Track_uri':'track_uri','Artist_uri':'artist_uri','Album_uri':'album_uri'}, inplace = True)

In [25]:
audio_features.drop(columns=['type','uri','track_href','analysis_url'],axis=1,inplace=True)

In [26]:
test = pd.merge(test,audio_features, left_on = "track_uri", right_on= "id",how = 'outer')
test = pd.merge(test,track_, left_on = "track_uri", right_on= "Track_uri",how = 'outer')
test = pd.merge(test,artist_, left_on = "artist_uri", right_on= "Artist_uri",how = 'outer')

In [12]:
del audio_features,track_,artist_

In [27]:
test.rename(columns = {'genres':'Artist_genres'}, inplace = True)

In [28]:
test.drop(columns=['Track_uri','Artist_uri_x','Artist_uri_y','Album_uri','id'],axis=1,inplace=True)

In [29]:
test

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,Artist_genres
0,2w3eXFL86RgxGtDr8bzwgK,4gzpq5DPGxSnKTe4SA8HAU,4E7bV0pzG0LciBSWTszra6,0.566,0.459,1,-9.488,1,0.0289,0.327,0.000253,0.202,0.151,116.916,285345,4,2005-06-07,65,88,permanent_wave pop
1,6V6goat94tTJOWXXKZstNX,4gzpq5DPGxSnKTe4SA8HAU,0CE9VXSH70pz4BQzMPm9gO,0.352,0.711,5,-6.412,1,0.0334,0.157,0.701,0.0683,0.422,73.161,274587,4,2017-07-13,59,88,permanent_wave pop
2,4hf0hL4kWyjWztZzVsM39V,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.611,0.556,9,-5.761,1,0.0288,0.175,7.1e-05,0.0359,0.252,126.532,297307,4,2002-08-08,67,88,permanent_wave pop
3,0u35Dpz37TY2M2j20RUdMf,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.262,0.482,5,-7.75,0,0.0294,0.0364,0.000189,0.11,0.063,170.968,318627,4,2002-08-08,68,88,permanent_wave pop
4,2DHgvPQD1jApRnT1DBZdrS,4gzpq5DPGxSnKTe4SA8HAU,6ZG5lRT77aJ3btmArcykra,0.495,0.525,7,-9.342,0,0.0301,0.396,0.00736,0.0961,0.249,154.466,256467,4,2000-07-10,65,88,permanent_wave pop
5,0BCPKOYdS2jbQ8iyB56Zns,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.577,0.749,5,-7.215,0,0.0279,0.599,0.0115,0.183,0.255,130.97,307880,4,2002-08-08,85,88,permanent_wave pop
6,1ZqHjApl3pfzwjweTfMi0g,4gzpq5DPGxSnKTe4SA8HAU,1CEODgTmTwLyabvwd7HBty,0.33,0.58,1,-7.875,0,0.0374,0.0614,0.00129,0.115,0.11,76.093,222653,4,2008-05-26,70,88,permanent_wave pop
7,0R8P9KfGJCDULmlEoBagcO,4gzpq5DPGxSnKTe4SA8HAU,6ZG5lRT77aJ3btmArcykra,0.565,0.546,11,-7.496,0,0.0314,0.189,0.0015,0.17,0.195,139.757,273427,4,2000-07-10,76,88,permanent_wave pop
8,7KolrFGhfDi1JTSgQBT5sI,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.277,0.797,0,-5.66,1,0.0385,0.586,0.341,0.221,0.26,177.004,238333,3,2002-08-08,59,88,permanent_wave pop
9,75JFxkI2RXiU7L9VXzMkle,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.557,0.442,5,-7.224,1,0.0243,0.731,1.5e-05,0.11,0.213,146.277,309600,4,2002-08-08,89,88,permanent_wave pop


In [30]:
test.dropna(axis=0,inplace=True)

Dividing popularity in buckets of 5, release date in buckets of 50 yrs.

In [32]:
test['Track_pop'] = test['Track_pop'].apply(lambda x: int(x/5))
test['Artist_pop'] = test['Artist_pop'].apply(lambda x: int(x/5))
test['Track_release_date'] = test['Track_release_date'].apply(lambda x: x.split('-')[0])
test['Track_release_date']=test['Track_release_date'].astype('int16')
test['Track_release_date'] = test['Track_release_date'].apply(lambda x: int(x/50))

Changind datatypes

In [33]:
test[['danceability', 'energy', 'key','loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness','liveness', 'valence', 'tempo', 'time_signature']]=test[['danceability', 'energy', 'key','loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness','liveness', 'valence', 'tempo','time_signature']].astype('float16')
test[['duration_ms']]=test[['duration_ms']].astype('float32')
test[['Track_release_date', 'Track_pop', 'Artist_pop']]=test[['Track_release_date', 'Track_pop', 'Artist_pop']].astype('int8')

In [18]:
df=pd.read_csv("Streamlit/data/streamlit_data.csv")

In [19]:
currentdf=len(df)

In [20]:
df.dropna(axis=0,inplace=True)

In [21]:
print('{} New Tracks Found'.format(len(df)-currentdf))

0 New Tracks Found


In [22]:
len(df)

113384

In [23]:
df = df[~df['track_uri'].isin(test['track_uri'].values)]

In [34]:
test.head()

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,Artist_genres
0,2w3eXFL86RgxGtDr8bzwgK,4gzpq5DPGxSnKTe4SA8HAU,4E7bV0pzG0LciBSWTszra6,0.565918,0.458984,1.0,-9.484375,1.0,0.0289,0.326904,0.000253,0.202026,0.151001,116.9375,285345.0,4.0,40,13,17,permanent_wave pop
1,6V6goat94tTJOWXXKZstNX,4gzpq5DPGxSnKTe4SA8HAU,0CE9VXSH70pz4BQzMPm9gO,0.352051,0.710938,5.0,-6.410156,1.0,0.033386,0.156982,0.701172,0.068298,0.422119,73.1875,274587.0,4.0,40,11,17,permanent_wave pop
2,4hf0hL4kWyjWztZzVsM39V,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.61084,0.556152,9.0,-5.761719,1.0,0.028793,0.175049,7.1e-05,0.035889,0.251953,126.5625,297307.0,4.0,40,13,17,permanent_wave pop
3,0u35Dpz37TY2M2j20RUdMf,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.261963,0.481934,5.0,-7.75,0.0,0.029404,0.036407,0.000189,0.109985,0.062988,171.0,318627.0,4.0,40,13,17,permanent_wave pop
4,2DHgvPQD1jApRnT1DBZdrS,4gzpq5DPGxSnKTe4SA8HAU,6ZG5lRT77aJ3btmArcykra,0.495117,0.524902,7.0,-9.34375,0.0,0.030106,0.395996,0.007359,0.09613,0.249023,154.5,256467.0,4.0,40,13,17,permanent_wave pop


In [35]:
test['Artist_genres'] = test['Artist_genres'].apply(lambda x: x.split(" "))
tfidf = TfidfVectorizer(max_features=3) #max_features=5 
tfidf_matrix = tfidf.fit_transform(test['Artist_genres'].apply(lambda x: " ".join(x)))
genre_df = pd.DataFrame(tfidf_matrix.toarray())
genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names_out()]

In [36]:
genre_df=genre_df.astype('float16')
test.drop(columns=['Artist_genres'],axis=1,inplace=True)

In [37]:
test = pd.concat([test.reset_index(drop=True), genre_df.reset_index(drop=True)],axis = 1)

In [38]:
test.head()

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|permanent_wave,genre|pop
0,2w3eXFL86RgxGtDr8bzwgK,4gzpq5DPGxSnKTe4SA8HAU,4E7bV0pzG0LciBSWTszra6,0.565918,0.458984,1.0,-9.484375,1.0,0.0289,0.326904,...,0.202026,0.151001,116.9375,285345.0,4.0,40,13,17,0.713867,0.700195
1,6V6goat94tTJOWXXKZstNX,4gzpq5DPGxSnKTe4SA8HAU,0CE9VXSH70pz4BQzMPm9gO,0.352051,0.710938,5.0,-6.410156,1.0,0.033386,0.156982,...,0.068298,0.422119,73.1875,274587.0,4.0,40,11,17,0.713867,0.700195
2,4hf0hL4kWyjWztZzVsM39V,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.61084,0.556152,9.0,-5.761719,1.0,0.028793,0.175049,...,0.035889,0.251953,126.5625,297307.0,4.0,40,13,17,0.713867,0.700195
3,0u35Dpz37TY2M2j20RUdMf,4gzpq5DPGxSnKTe4SA8HAU,0RHX9XECH8IVI3LNgWDpmQ,0.261963,0.481934,5.0,-7.75,0.0,0.029404,0.036407,...,0.109985,0.062988,171.0,318627.0,4.0,40,13,17,0.713867,0.700195
4,2DHgvPQD1jApRnT1DBZdrS,4gzpq5DPGxSnKTe4SA8HAU,6ZG5lRT77aJ3btmArcykra,0.495117,0.524902,7.0,-9.34375,0.0,0.030106,0.395996,...,0.09613,0.249023,154.5,256467.0,4.0,40,13,17,0.713867,0.700195


In [31]:
test.isna().sum().sum()

0

# df

In [33]:
df['Artist_genres'] = df['Artist_genres'].apply(lambda x: x.split(" "))
tfidf_matrix = tfidf.transform(df['Artist_genres'].apply(lambda x: " ".join(x)))
genre_df = pd.DataFrame(tfidf_matrix.toarray())
genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names_out()]

In [34]:
genre_df

Unnamed: 0,genre|permanent_wave,genre|pop
0,0.000000,0.00000
1,0.000000,0.00000
2,0.000000,0.00000
3,0.000000,0.00000
4,0.000000,0.00000
...,...,...
113346,0.714005,0.70014
113347,0.714005,0.70014
113348,0.714005,0.70014
113349,0.714005,0.70014


In [37]:
genre_df=genre_df.astype('float16')
df.drop(columns=['Artist_genres'],axis=1,inplace=True)

In [40]:
len(genre_df)

113351

In [38]:
df = pd.concat([df.reset_index(drop=True), genre_df.reset_index(drop=True)],axis = 1)

In [39]:
df.columns

Index(['Unnamed: 0', 'track_uri', 'artist_uri', 'album_uri', 'danceability',
       'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms',
       'time_signature', 'Track_release_date', 'Track_pop', 'Artist_pop',
       'genre|permanent_wave', 'genre|pop'],
      dtype='object')

In [37]:
test.columns

Index(['track_uri', 'artist_uri', 'album_uri', 'danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature',
       'Track_release_date', 'Track_pop', 'Artist_pop', 'genre|fi_jazzhop',
       'genre|lo', 'genre|unknown'],
      dtype='object')

In [178]:
test.head()

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|permanent_wave,genre|pop
0,0ZlVUhjO8c0bOx1D2Btznf,4gzpq5DPGxSnKTe4SA8HAU,2FeyIYDDAQqcOJKOKhvHdr,0.405748,0.833984,0.636364,0.835386,1.0,0.057165,0.000114,...,0.373047,0.162964,0.726351,0.063589,0.8,1.0,0.647059,0.85,0.713867,0.700195
1,6P2Y4KnF2x8uwZV2cZWA8t,4gzpq5DPGxSnKTe4SA8HAU,0G0WNcM706ASd6n7UxXuKu,0.292573,0.262939,0.181818,0.784946,1.0,0.030867,0.654473,...,0.103027,0.073608,0.564295,0.11642,0.8,1.0,0.647059,0.85,0.713867,0.700195
2,23khhseCLQqVMCIT1WMAns,4gzpq5DPGxSnKTe4SA8HAU,2G4AUqfwxcV1UdQjm2ouYr,0.746754,0.481934,0.545455,0.816408,0.0,0.041779,0.026703,...,0.106018,0.352051,0.394584,0.053112,0.8,1.0,0.823529,0.85,0.713867,0.700195
3,0FMjqbY3aWo1QDbo3GwXib,4gzpq5DPGxSnKTe4SA8HAU,2G4AUqfwxcV1UdQjm2ouYr,0.501131,0.501953,0.454545,0.810166,1.0,0.026917,0.012754,...,0.067993,0.039398,0.412004,0.040147,0.6,1.0,0.705882,0.85,0.713867,0.700195
4,0qksx8mV28lztYIZ1om8ml,4gzpq5DPGxSnKTe4SA8HAU,6ZG5lRT77aJ3btmArcykra,0.400806,0.803223,0.363636,0.859982,1.0,0.05063,0.03352,...,0.068787,0.285889,0.492768,0.056749,0.6,1.0,0.764706,0.85,0.713867,0.700195


# pred

In [41]:
try:
    df.drop(columns=['genre|unknown'],axis=1,inplace=True)
    test.drop(columns=['genre|unknown'],axis=1,inplace=True)
except:
    print('genre|unknown not found')

genre|unknown not found


In [179]:
test.columns

Index(['track_uri', 'artist_uri', 'album_uri', 'danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature',
       'Track_release_date', 'Track_pop', 'Artist_pop', 'genre|permanent_wave',
       'genre|pop'],
      dtype='object')

In [None]:
test.iloc[:,3:]

In [42]:
df.rename({"Unnamed: 0":"a"}, axis="columns", inplace=True)

# Then, drop the column as usual.

df.drop(["a"], axis=1, inplace=True)

In [43]:
len(df.columns)

21

## One Hot Encoding
I was first using OneHotEncoder for "Track_release_date", "Track_pop", and "Artist_pop," but I found no difference in the final result other than high memory usage.

In [36]:
""" ohe = OneHotEncoder(handle_unknown='ignore')
dummies = pd.DataFrame(ohe.fit_transform(test[['Track_release_date', 'Track_pop', 'Artist_pop']]).toarray(), index=test.index,dtype=int)
column_name = ohe.get_feature_names_out(['Track_release_date', 'Track_pop', 'Artist_pop'])
dummies.columns=column_name
test = pd.concat([test.drop(['Track_release_date', 'Track_pop', 'Artist_pop'], axis=1), dummies], axis=1) """

" ohe = OneHotEncoder(handle_unknown='ignore')\ndummies = pd.DataFrame(ohe.fit_transform(test[['Track_release_date', 'Track_pop', 'Artist_pop']]).toarray(), index=test.index,dtype=int)\ncolumn_name = ohe.get_feature_names_out(['Track_release_date', 'Track_pop', 'Artist_pop'])\ndummies.columns=column_name\ntest = pd.concat([test.drop(['Track_release_date', 'Track_pop', 'Artist_pop'], axis=1), dummies], axis=1) "

In [37]:
""" ohe2 = OneHotEncoder(categories=ohe.categories_,handle_unknown='ignore')
dummies = pd.DataFrame(ohe2.fit_transform(df[['Track_release_date', 'Track_pop', 'Artist_pop']]).toarray(), index=df.index, dtype=int)
column_name = ohe2.get_feature_names_out(['Track_release_date', 'Track_pop', 'Artist_pop'])
dummies.columns=column_name
df=pd.concat([df.drop(['Track_release_date', 'Track_pop', 'Artist_pop'], axis=1), dummies], axis=1)
 """

" ohe2 = OneHotEncoder(categories=ohe.categories_,handle_unknown='ignore')\ndummies = pd.DataFrame(ohe2.fit_transform(df[['Track_release_date', 'Track_pop', 'Artist_pop']]).toarray(), index=df.index, dtype=int)\ncolumn_name = ohe2.get_feature_names_out(['Track_release_date', 'Track_pop', 'Artist_pop'])\ndummies.columns=column_name\ndf=pd.concat([df.drop(['Track_release_date', 'Track_pop', 'Artist_pop'], axis=1), dummies], axis=1)\n "

In [38]:
#df.info(memory_usage = "deep")

In [39]:
#test.loc[:,test.columns.str.startswith('genre')]=test.loc[:,test.columns.str.startswith('genre')].astype('bool')
#df.loc[:,df.columns.str.startswith('genre')]=df.loc[:,df.columns.str.startswith('genre')].astype('bool')


## Scaling

In [56]:
# sc=MinMaxScaler()
# df.iloc[:,3:19]=sc.fit_transform(df.iloc[:,3:19])
# pickle.dump(sc, open('data/sc.sav', 'wb'))

In [44]:
sc = pickle.load(open('Streamlit/data/sc.sav','rb'))

In [45]:
test.iloc[:,3:19]=sc.transform(test.iloc[:,3:19])

In [79]:
df.iloc[:,3:19]=sc.transform(df.iloc[:,3:19])

In [46]:
test.iloc[:5,3:]

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|permanent_wave,genre|pop
0,0.430211,0.731934,0.818182,0.84887,1.0,0.041177,0.001948,0.01032,0.171021,0.333008,0.498311,0.044729,0.8,1.0,0.764706,0.85,0.713867,0.700195
1,0.211523,0.416992,0.272727,0.819155,1.0,0.035149,0.164599,0.001965,0.112976,0.124023,0.583298,0.055106,0.8,1.0,0.941176,0.85,0.713867,0.700195
2,0.572791,0.458984,0.090909,0.807294,1.0,0.030042,0.328217,0.000253,0.202026,0.151001,0.493824,0.053175,0.8,1.0,0.705882,0.85,0.713867,0.700195
3,0.225731,0.438965,1.0,0.791812,1.0,0.032437,0.448817,0.014127,0.925781,0.119019,0.700486,0.046942,0.8,1.0,0.647059,0.85,0.713867,0.700195
4,0.746754,0.481934,0.545455,0.816408,0.0,0.041779,0.026703,0.622338,0.106018,0.352051,0.394584,0.053112,0.8,1.0,0.823529,0.85,0.713867,0.700195


In [47]:
playvec=pd.DataFrame(test.sum(axis=0)).T
playvec

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|permanent_wave,genre|pop
0,2U8g9wVcUu9wsg6i7sFSv87LVHVU3tWfcxj5aiPFEW4Q2w...,4gzpq5DPGxSnKTe4SA8HAU4gzpq5DPGxSnKTe4SA8HAU4g...,2R7iJz5uaHjLEVnMkloO184E7bV0pzG0LciBSWTszra64E...,23.505578,30.411499,26.272727,41.497305,35.0,2.0889,11.854809,...,11.721375,12.016418,26.21648,2.48956,39.200001,49.5,37.235295,42.400001,34.96875,35.3125


In [48]:
# playvec2=pd.DataFrame(test.mean(axis=0)).T
# playvec2
# Create an empty DataFrame with the same columns as 'test'
playvec2 = pd.DataFrame(columns=test.columns)
test2=test.copy()
test2.iloc[:,:3]=None
# test
# Set the first three columns to empty strings
# playvec2.loc[:, :3] = ''
# playvec2.loc[0,3:]=pd.DataFrame(test.iloc[:,3:].mean()).T
playvec2=pd.concat([playvec2,pd.DataFrame(test2.mean()).T],axis=0)
# Calculate the mean for the remaining columns and append to the DataFrame
# playvec2 = playvec2.append(test.mean(), ignore_index=True)
# playvec2=pd.concat(playvec2,test.mean())
playvec2

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|permanent_wave,genre|pop
0,,,,0.470112,0.60823,0.525455,0.829946,0.7,0.041778,0.237096,...,0.234427,0.240328,0.52433,0.049791,0.784,0.99,0.744706,0.848,0.699219,0.706055


## Similarity and Recommending

In [80]:
df2=df.copy()

In [50]:
playvec.iloc[0:1,18:]

Unnamed: 0,Artist_pop,genre|permanent_wave,genre|pop
0,42.400001,34.96875,35.3125


In [82]:
df['sim']=cosine_similarity(df.iloc[:,3:16],playvec.iloc[:,3:16])
df['sim2']=cosine_similarity(df.iloc[:,16:19],playvec.iloc[:,16:19])
df['sim3']=cosine_similarity(df.iloc[:,19:-4],playvec.iloc[:,19:])
df['sim4']= (df['sim']+df['sim2']+df['sim3'])/3
df = df.sort_values(['sim'],ascending = False,kind='stable')
qq=df.groupby('artist_uri').head(5).track_uri.head(50)     #to limit recmmendation by same artist
aa=sp.tracks(qq[0:50])
Fresult=pd.DataFrame()
for i in range(50):
    result=pd.DataFrame([i])
    result['track_name']=aa['tracks'][i]['name']
    result['artist_name']=aa['tracks'][i]['artists'][0]['name']
    #result['url']=aa['tracks'][i]['external_urls']['spotify']
    #result['image']=aa['tracks'][i]['album']['images'][1]['url']
    Fresult=pd.concat([Fresult,result],axis=0)
Fresult

Unnamed: 0,0,track_name,artist_name
0,0,Gravity,The Infamous Stringdusters
0,1,Scream,Tank
0,2,Dozen Roses & a Six-Pack,Cole Swindell
0,3,Hurricane,Jimmy Needham
0,4,I Wish It Would Rain Down - 2016 Remaster,Phil Collins
0,5,Where You Are Tonight,Kip Moore
0,6,Arrows (feat. Macklemore & Ryan Lewis),Fences
0,7,White Trash,Chris Janson
0,8,Are You Happy Now,Rascal Flatts
0,9,I Still Miss You,Hawk Nelson


In [102]:
df['sim5']=df['sim']/df['sim'].max()

In [109]:
df['sim6']= (5*df['sim5']+3*df['sim2']+df['sim3'])/9

In [54]:
df = df.sort_values(['sim4'],ascending = False,kind='stable')
qq=df.groupby('artist_uri').head(5).track_uri.head(50)     #to limit recmmendation by same artist
aa=sp.tracks(qq[0:50])
Fresult=pd.DataFrame()
for i in range(50):
    result=pd.DataFrame([i])
    result['track_name']=aa['tracks'][i]['name']
    result['artist_name']=aa['tracks'][i]['artists'][0]['name']
    #result['url']=aa['tracks'][i]['external_urls']['spotify']
    #result['image']=aa['tracks'][i]['album']['images'][1]['url']
    Fresult=pd.concat([Fresult,result],axis=0)
Fresult

Unnamed: 0,0,track_name,artist_name
0,0,Viva La Vida,Coldplay
0,1,A Sky Full of Stars,Coldplay
0,2,Adventure of a Lifetime,Coldplay
0,3,Don't Panic,Coldplay
0,4,Christmas Lights,Coldplay
0,5,Desert Rose,Sting
0,6,Don't You Want Me,The Human League
0,7,Brand New Day,Sting
0,8,Fortress Around Your Heart,Sting
0,9,Practical Arrangement,Sting


In [83]:
df.head()

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,time_signature,Track_release_date,Track_pop,Artist_pop,genre|permanent_wave,genre|pop,sim,sim2,sim3,sim4
91717,3wBzbRG2OnmKl9jYyR6umu,7yTltkMBvChBkA86Tz8WfW,0yXaxoA7bBeXc31NQxiv93,0.467611,0.613,0.636364,0.828459,1.0,0.02817,0.213855,...,0.8,1.0,0.294118,0.35,0.0,0.0,0.988423,0.912232,0.0,0.633552
27507,67AiSvsRAPIt7JL1KDlvBq,4mwXUEKaW4ftbncf9Hi58l,3jliDnhfFupNVHNdOI4sKh,0.637652,0.725,0.636364,0.857225,1.0,0.038981,0.316265,...,0.8,1.0,0.235294,0.55,0.0,0.0,0.988192,0.932686,0.0,0.640293
19185,3Ne1RRcRpIn2RBYyuidXcS,1mfDfLsMxYcOOZkzBxvSVW,2mlPEYhK1KwDD45CCCWxaH,0.468623,0.723,0.727273,0.894781,1.0,0.030561,0.252008,...,0.8,1.0,0.352941,0.6,0.0,0.0,0.987366,0.963074,0.0,0.650147
59321,16LVMgMrnpCCgQyWMlXaMg,4yMGs8CtlMVF7RV2XU539m,50F9ClKFmuVCI1EU1aoEyK,0.536437,0.674,0.636364,0.866846,1.0,0.032952,0.258032,...,0.8,1.0,0.176471,0.3,0.0,0.0,0.987174,0.865514,0.0,0.617563
19562,4F2t297QK40XFcIHqkmLHs,4lxfqrEsLX6N1N4OCSkILp,2sS9l8uc3d3UsK9unJyrD8,0.516194,0.667,0.727273,0.883514,1.0,0.030561,0.293173,...,0.8,0.5,0.705882,0.75,0.0,0.0,0.986974,0.963867,0.0,0.65028


In [91]:
df2['sim']=cosine_similarity(df2.iloc[:,3:16],playvec2.iloc[:,3:16])
df2['sim2']=cosine_similarity(df2.iloc[:,16:19],playvec2.iloc[:,16:19])
df2['sim3']=cosine_similarity(df2.iloc[:,19:-4],playvec2.iloc[:,19:])
df2['sim4']= (df2['sim']+df2['sim2']+df2['sim3'])/3
df2 = df2.sort_values(['sim'],ascending = False,kind='stable')
qq=df2.groupby('artist_uri').head(5).track_uri.head(50)     #to limit recmmendation by same artist
aa=sp.tracks(qq[0:50])
Fresult2=pd.DataFrame()
for i in range(50):
    result=pd.DataFrame([i])
    result['track_name']=aa['tracks'][i]['name']
    result['artist_name']=aa['tracks'][i]['artists'][0]['name']
    #result['url']=aa['tracks'][i]['external_urls']['spotify']
    #result['image']=aa['tracks'][i]['album']['images'][1]['url']
    Fresult2=pd.concat([Fresult2,result],axis=0)
Fresult2

Unnamed: 0,0,track_name,artist_name
0,0,Interlude: Talk Radio,Gorillaz
0,1,Overture,Icon For Hire
0,2,Dungeons and Dragons Theme (Instrumental Version),Cult Hit Tributes
0,3,The Cube - Scene,Logic
0,4,The Letter,Outkast
0,5,The Westward Procession,Hillsong Worship
0,6,Doctor Who Theme - TV Version,Murray Gold
0,7,Stay Alive - Interlude,J.PERIOD
0,8,All Blues - Studio Sequence,Miles Davis
0,9,Electric Zoo,Spongebob Squarepants


In [92]:
df2.iloc[0:1,16:19]

Unnamed: 0,Track_release_date,Track_pop,Artist_pop
3409,40,1,15


In [93]:

playvec2.iloc[:,16:19]

Unnamed: 0,Track_release_date,Track_pop,Artist_pop
0,1.0,0.593529,0.417


In [94]:
playvec.iloc[:,16:19]

Unnamed: 0,Track_release_date,Track_pop,Artist_pop
0,100.0,59.352943,41.700001


In [95]:
cosine_similarity(df2.iloc[0:1,16:19],playvec.iloc[:,16:19])

array([[0.88745157]])

In [99]:
Fresult.head()

Unnamed: 0,0,track_name,artist_name
0,0,Interlude: Talk Radio,Gorillaz
0,1,Overture,Icon For Hire
0,2,Dungeons and Dragons Theme (Instrumental Version),Cult Hit Tributes
0,3,The Cube - Scene,Logic
0,4,The Letter,Outkast


In [105]:
df.head()

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,Track_release_date,Track_pop,Artist_pop,genre|fi_jazzhop,genre|lo,sim,sim2,sim3,sim4,sim5
3409,5f0FCcrkvyhiRT5wynS0XN,3AA28KZvwAUcZuOKwyblJQ,0NvirtaDCaZU5PAW1O5FDE,0.263,0.935,0.0,-7.934,1.0,0.143,0.778,...,40,1,15,0.0,0.0,0.014329,0.887452,0.0,0.300594,1.0
49930,5SVWMBJhwrBDYLioEOmDBw,1Jy0lTKAQDnTklKzF0g2o7,034rIpQ6gBG6lASW3nnuNT,0.207,0.658,11.0,-8.45,1.0,0.0636,0.0396,...,40,1,10,0.0,0.0,0.014,0.87856,0.0,0.29752,0.977037
97634,1jNb6NlxXc6BmFMEHMIyEM,5jAQe0gOlmlyNKyfd5d2jH,6OT1X94BRhcaDj5CKswYCX,0.296,0.707,8.0,-8.3,1.0,0.0421,0.00231,...,40,1,5,0.0,0.0,0.013978,0.856738,0.0,0.290239,0.975512
10822,30S46V7RcyGXUModoqW69b,4xRYI6VqpkE3UwrDrAZL8L,5dOpbgAmJeyoakKQ0QLWkR,0.453,0.245,1.0,-21.58,1.0,0.302,0.117,...,40,1,14,0.0,0.0,0.013896,0.886621,0.0,0.300172,0.969753
532,0RBlLGGoMx0AxCsO4XyjGa,1G9G7WwrXka3Z1r7aIDjI7,1UsmQ3bpJTyK6ygoOOjG1r,0.697,0.11,0.0,-23.55,1.0,0.917,0.167,...,40,1,14,0.0,0.0,0.013879,0.886621,0.0,0.300167,0.968594


In [100]:
df2.head()

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,time_signature,Track_release_date,Track_pop,Artist_pop,genre|fi_jazzhop,genre|lo,sim,sim2,sim3,sim4
3409,5f0FCcrkvyhiRT5wynS0XN,3AA28KZvwAUcZuOKwyblJQ,0NvirtaDCaZU5PAW1O5FDE,0.263,0.935,0.0,-7.934,1.0,0.143,0.778,...,3.0,40,1,15,0.0,0.0,0.014329,0.887452,0.0,0.300594
49930,5SVWMBJhwrBDYLioEOmDBw,1Jy0lTKAQDnTklKzF0g2o7,034rIpQ6gBG6lASW3nnuNT,0.207,0.658,11.0,-8.45,1.0,0.0636,0.0396,...,4.0,40,1,10,0.0,0.0,0.014,0.87856,0.0,0.29752
97634,1jNb6NlxXc6BmFMEHMIyEM,5jAQe0gOlmlyNKyfd5d2jH,6OT1X94BRhcaDj5CKswYCX,0.296,0.707,8.0,-8.3,1.0,0.0421,0.00231,...,4.0,40,1,5,0.0,0.0,0.013978,0.856738,0.0,0.290239
10822,30S46V7RcyGXUModoqW69b,4xRYI6VqpkE3UwrDrAZL8L,5dOpbgAmJeyoakKQ0QLWkR,0.453,0.245,1.0,-21.58,1.0,0.302,0.117,...,1.0,40,1,14,0.0,0.0,0.013896,0.886621,0.0,0.300172
532,0RBlLGGoMx0AxCsO4XyjGa,1G9G7WwrXka3Z1r7aIDjI7,1UsmQ3bpJTyK6ygoOOjG1r,0.697,0.11,0.0,-23.55,1.0,0.917,0.167,...,5.0,40,1,14,0.0,0.0,0.013879,0.886621,0.0,0.300167


In [101]:
Fresult2.head()

Unnamed: 0,0,track_name,artist_name
0,0,Interlude: Talk Radio,Gorillaz
0,1,Overture,Icon For Hire
0,2,Dungeons and Dragons Theme (Instrumental Version),Cult Hit Tributes
0,3,The Cube - Scene,Logic
0,4,The Letter,Outkast


In [86]:
df['sim']=cosine_similarity(df.iloc[:,3:16],playvec.iloc[:,3:16])
df['sim2']=cosine_similarity(df.loc[:, df.columns.str.startswith('T')|df.columns.str.startswith('A')],playvec.loc[:, playvec.columns.str.startswith('T')|playvec.columns.str.startswith('A')])
df['sim3']=cosine_similarity(df.loc[:, df.columns.str.startswith('genre')],playvec.loc[:, playvec.columns.str.startswith('genre')])
df['sim4']=(df['sim']+df['sim2']+df['sim3'])/3
df = df.sort_values(['sim4'],ascending = False,kind='stable')
# genra>audio>pop
qq=df.groupby('artist_uri').head(5).track_uri.head(50)
aa=sp.tracks(qq[0:50])
Fresult=pd.DataFrame()
for i in range(50):
    result=pd.DataFrame([i])
    result['track_name']=aa['tracks'][i]['name']
    result['artist_name']=aa['tracks'][i]['artists'][0]['name']
    #result['url']=aa['tracks'][i]['external_urls']['spotify']
    #result['image']=aa['tracks'][i]['album']['images'][1]['url']
    Fresult=pd.concat([Fresult,result],axis=0)
Fresult

Unnamed: 0,0,track_name,artist_name
0,0,Adventure of a Lifetime,Coldplay
0,1,In My Place,Coldplay
0,2,Christmas Lights,Coldplay
0,3,Shiver,Coldplay
0,4,God Put a Smile upon Your Face,Coldplay
0,5,Don't You Want Me,The Human League
0,6,Steppin' Out,Joe Jackson
0,7,Human,The Human League
0,8,Human - Edit,The Human League
0,9,Brand New Day,Sting


In [87]:
Spotifyresult=pd.DataFrame()
for i in range(len(test)-1):
    if len(Spotifyresult)>=50:
        break
    ff=sp.recommendations(seed_tracks=list(test.track_uri[1+i:5+i]),limit=2)
    for z in range(2):
        result=pd.DataFrame([z+(2*i)+1])
        result['track_name']=ff['tracks'][z]['name']
        result['artist_name']=ff['tracks'][z]['artists'][0]['name']
        #result['uri']=ff['tracks'][z]['id']
        #result['url']=ff['tracks'][z]['external_urls']['spotify']
        #result['image']=ff['tracks'][z]['album']['images'][1]['url']
        Spotifyresult=pd.concat([Spotifyresult,result],axis=0)
Spotifyresult

Unnamed: 0,0,track_name,artist_name
0,1,Jaded,Aerosmith
0,2,Stay on These Roads,a-ha
0,3,See You Again (feat. Charlie Puth),Wiz Khalifa
0,4,Empire,Of Monsters and Men
0,5,Love Not War (The Tampa Beat),Jason Derulo
0,6,Sweet Creature,Harry Styles
0,7,Still Fighting It,Ben Folds
0,8,A Lack of Color,Death Cab for Cutie
0,9,Burning the heather,Pet Shop Boys
0,10,Among The Living,The Thorns


In [69]:
df.iloc[0:1,3:16]

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
113344,0.486,0.617,5.0,-7.113,0.0,0.0287,0.0954,3e-06,0.109,0.417,138.0,242373.0,4.0


In [59]:
cosine_similarity(df.iloc[0:1,3:16],playvec.iloc[0:1,3:16])

array([[0.02831392]])

## Resaving scaler after modifying dataset

In [21]:
import pandas as pd
import pickle

In [22]:
from sklearn.preprocessing import MinMaxScaler

In [75]:
dtypes = {'track_uri': 'object', 'artist_uri': 'object', 'album_uri': 'object', 'danceability': 'float16', 'energy': 'float16', 'key': 'float16',
'loudness': 'float16', 'mode': 'float16', 'speechiness': 'float16', 'acousticness': 'float16', 'instrumentalness': 'float16',
'liveness': 'float16', 'valence': 'float16', 'tempo': 'float16', 'duration_ms': 'float32', 'time_signature': 'float16',
'Track_release_date': 'int8', 'Track_pop': 'int8', 'Artist_pop': 'int8', 'Artist_genres': 'object'}
col_name= ['track_uri', 'artist_uri', 'album_uri', 'danceability', 'energy', 'key',
'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature',
'Track_release_date', 'Track_pop', 'Artist_pop', 'Artist_genres']

In [76]:
df=pd.read_csv("Streamlit/data/streamlit_data.csv",dtype=dtypes,names=col_name,skiprows=1)

In [77]:
df.iloc[0]

track_uri                                        0UaMYEvWZi0ZqiDOoHU3YI
artist_uri                                       2wIVse2owClT7go1WT98tk
album_uri                                        6vV5UrXcfyQD1wu4Qo2I9K
danceability                                                   0.903809
energy                                                         0.812988
key                                                                 4.0
loudness                                                      -7.105469
mode                                                                0.0
speechiness                                                    0.120972
acousticness                                                   0.031097
instrumentalness                                               0.006969
liveness                                                       0.047089
valence                                                        0.810059
tempo                                                          1

In [26]:
index=0
for x in df.columns:
    print(f"{index} - {x}")
    index+=1

0 - track_uri
1 - artist_uri
2 - album_uri
3 - danceability
4 - energy
5 - key
6 - loudness
7 - mode
8 - speechiness
9 - acousticness
10 - instrumentalness
11 - liveness
12 - valence
13 - tempo
14 - duration_ms
15 - time_signature
16 - Track_release_date
17 - Track_pop
18 - Artist_pop
19 - Artist_genres


In [83]:
df.iloc[:,3:19].min()

danceability             0.0
energy                   0.0
key                      0.0
loudness               -60.0
mode                     0.0
speechiness              0.0
acousticness             0.0
instrumentalness         0.0
liveness                 0.0
valence                  0.0
tempo                    0.0
duration_ms           4853.0
time_signature           0.0
Track_release_date      38.0
Track_pop                1.0
Artist_pop               0.0
dtype: float32

In [27]:
sc=MinMaxScaler()
df.iloc[:,3:19]=sc.fit_transform(df.iloc[:,3:19])
pickle.dump(sc, open('data/sc.sav', 'wb'))

In [29]:
df.iloc[0]

track_uri                                        0UaMYEvWZi0ZqiDOoHU3YI
artist_uri                                       2wIVse2owClT7go1WT98tk
album_uri                                        6vV5UrXcfyQD1wu4Qo2I9K
danceability                                                   0.914978
energy                                                         0.812988
key                                                            0.363636
loudness                                                       0.845309
mode                                                                0.0
speechiness                                                    0.125761
acousticness                                                   0.031219
instrumentalness                                               0.006983
liveness                                                       0.047089
valence                                                        0.810059
tempo                                                          0

## Cosine Similarity between audio features

In [60]:
tracks=sp.audio_features(["1mea3bSkSGXuIRvnydlB5b"])

In [61]:
audio_features=pd.DataFrame(tracks)

In [62]:
audio_features.columns

Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature'],
      dtype='object')

In [63]:
sc = pickle.load(open('Streamlit/data/sc.sav','rb'))


In [64]:
audio_feat_dropped=audio_features.drop([ 'type', 'id','track_href', 'analysis_url'],axis=1)

In [65]:
audio_feat_dropped["Track_release_date"]=0
audio_feat_dropped["Artist_pop"]=0
audio_feat_dropped["Track_pop"]=0
audio_feat_dropped.columns

Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'uri', 'duration_ms', 'time_signature', 'Track_release_date',
       'Artist_pop', 'Track_pop'],
      dtype='object')

In [66]:
audio_feat_dropped=audio_feat_dropped[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms', 'time_signature','Track_release_date','Track_pop', 'Artist_pop',
        'uri']]

In [67]:
audio_feat_dropped.iloc[:,:-1]=sc.transform(audio_feat_dropped.iloc[:,:-1])

In [68]:
audio_feat_dropped

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,uri
0,0.491903,0.617,0.454545,0.845159,0.0,0.029834,0.095783,3e-06,0.109,0.417,0.582834,0.045028,0.8,-19.0,-0.058824,0.0,spotify:track:1mea3bSkSGXuIRvnydlB5b


In [70]:
audio_feat_dropped.columns

Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms', 'time_signature', 'Track_release_date', 'Track_pop',
       'Artist_pop', 'uri'],
      dtype='object')

In [71]:
audio_feat_dropped=audio_feat_dropped[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature',
       'uri']]

In [72]:
audio_feat_dropped.iloc[0:1,:-1]

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0.491903,0.617,0.454545,0.845159,0.0,0.029834,0.095783,3e-06,0.109,0.417,0.582834,0.045028,0.8


In [77]:
df.iloc[0:1,3:16]

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
113344,0.486,0.617,5.0,-7.113,0.0,0.0287,0.0954,3e-06,0.109,0.417,138.0,242373.0,4.0


In [73]:
cosine_similarity(audio_feat_dropped.iloc[0:1,:-1],playvec.iloc[0:1,3:16])

array([[0.90171789]])

In [78]:
cosine_similarity(df.iloc[0:1,3:16],playvec.iloc[0:1,3:16])

array([[0.02831392]])