In [13]:
import numpy as np
import pandas as pd
spotify = pd.read_csv('data.csv')

In [14]:
spotify = spotify.tail(50).reset_index(drop=True)
spotify.head(5)

Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year
0,0.119,['Tiagz'],0.96,128926,0.412,0,42EG9CclEoBnRq7X5B7O9j,0.0,1,0.0903,-11.281,1,My Heart Went Oops,72,29-01-2020,0.607,121.027,0.49,2020
1,0.171,"['Blueface', 'DaBaby']",0.914,128040,0.583,1,5eViBefGU8Fm9OAeJVZJjW,0.0,7,0.139,-7.912,0,Obama (feat. DaBaby),68,13-03-2020,0.452,134.882,0.588,2020
2,0.76,['Giveon'],0.64,260776,0.355,0,6qBFSepqLCuh5tehehc1bd,7e-05,10,0.114,-7.757,0,LIKE I WANT YOU,68,27-03-2020,0.065,119.513,0.437,2020
3,0.0962,['Ellie Goulding'],0.535,191333,0.648,0,6smYfKpqsvpmqBXkLjYGJo,0.0,10,0.118,-4.479,1,Power,73,21-05-2020,0.159,162.084,0.511,2020
4,0.474,['Mac Miller'],0.677,298440,0.33,0,2Yv2mHzr5AQavVdwQjEokV,0.000871,2,0.064,-11.198,1,Hand Me Downs,68,17-01-2020,0.149,68.985,0.336,2020


In [15]:
li = ['release_date','popularity','mode','key','explicit','duration_ms','id','liveness','valence','speechiness','instrumentalness','year']
for i in li:
    spotify.drop(i,axis=1,inplace=True)

In [16]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
prep = ['acousticness','danceability','energy','loudness','tempo']
for i in prep:
    scaler.fit(spotify[[i]])
    spotify[i] = scaler.transform(spotify[[i]])

In [17]:
selected_features = ['loudness', 'tempo', 'energy', 'danceability', 'acousticness']
feature_vectors = []
song_names = []

for index, song in spotify.iterrows():
    song_name = song['name']
    song_features = [song[feature] for feature in selected_features]
    feature_vector = np.array(song_features)
    feature_vectors.append(feature_vector)
    song_names.append(song_name)

In [18]:
from sklearn.metrics.pairwise import cosine_similarity

num_songs = len(feature_vectors)
similarities = np.zeros((num_songs, num_songs))

def calculate_energy_similarity(song1, song2):
    energy_diff = abs(song1['energy'] - song2['energy'])
    return 1.0 / (1.0 + energy_diff)

for i in range(num_songs):
    for j in range(num_songs):
        if(i == j):
            similarities[i][j] = 1.0
        else:
            similarity_score = calculate_energy_similarity(spotify.iloc[i], spotify.iloc[j])
            similarities[i][j] = similarity_score

In [19]:
def recommend_songs(input_song_name, spotify, similarities, num_recommendations=5):
    input_song_row = spotify[spotify['name'] == input_song_name]

    if input_song_row.empty:
        return []

    input_energy_level = input_song_row.iloc[0]['energy']
    similar_songs = []
    for i, similarity_score in enumerate(similarities[input_song_row.index[0]]):
        if i == input_song_row.index[0]:
            continue

        if abs(spotify.iloc[i]['energy'] - input_energy_level) < 0.1:
            similar_songs.append(spotify.iloc[i]['name'])

        if len(similar_songs) >= num_recommendations:
            break

    return similar_songs

In [20]:
import pickle
pickle.dump(similarities,open("similarity.pkl","wb"))
pickle.dump(spotify,open("spotify.pkl","wb"))

In [21]:
recommended_songs = recommend_songs("More Hearts Than Mine", spotify, similarities)
recommended_songs

['My Heart Went Oops',
 'Friday 13th (feat. Octavian)',
 'Cornelia Street - Live From Paris',
 'Welcome to Chilis',
 'Demon Time (Lil Yachty feat. Draft Day)']