In [71]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neighbors import NearestNeighbors
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [72]:
data = pd.read_csv('genres_v2.csv')

  data = pd.read_csv('genres_v2.csv')


In [73]:
# Preprocessing the data
# Assuming 'genre' as the target variable for classification
X = data.select_dtypes(include=['float64', 'int64']).dropna()  # Selecting numerical features
y = data.loc[X.index, 'genre']  # Corresponding genres

In [74]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [75]:
# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [76]:
# give predictions for input based on the model
def get_recommendations(input):
    # Fetching the index of the song that matches the title
    input_index = data[data['song_name'] == input].index[0]
    
    # Calculating the distance and getting the 6 nearest neighbors
    input_features = X.iloc[input_index]
    input_features = input_features.values.reshape(1, -1)
    nn = NearestNeighbors(n_neighbors = 6) # get 6 nearest neighbors
    nn.fit(X)
    distances, indices = nn.kneighbors(input_features)
    nearest_neighbors = indices[0]
    # Fetching the song names of the nearest neighbors
    recommended_songs = data.iloc[nearest_neighbors]['song_name']
    # Preparing the result excluding the random song itself
    recommendations = recommended_songs[recommended_songs != input].tolist()
    return recommendations




In [77]:
# Test recommendations
input = 'XO Tour Llif3'
recommendations = get_recommendations(input)
print('Recommendations for', input, ':')
print(recommendations)

Recommendations for XO Tour Llif3 :
['Pyro (leak 2019)', "Don't Bang My Line (feat. Night Lovell)", 'Mud', 'Fractals', 'Atlantis']




In [83]:
# Calculate intra-list similarity for recommendations
# calculate intra-list similarity for recommendations of a song
def calculate_intra_list_similarity(recommendations):
    # Fetching the indices of the recommended songs
    indices = []
    for song in recommendations:
        indices.append(data[data['song_name'] == song].index[0])
    # Calculating the intra-list similarity
    similarity = 0
    for i in range(len(indices)):
        for j in range(i+1, len(indices)):
            nameOfSong1 = data.iloc[indices[i]]['song_name']
            nameOfSong2 = data.iloc[indices[j]]['song_name']
            similarity += cosine_similarity(X.iloc[indices[i]].values.reshape(1, -1), X.iloc[indices[j]].values.reshape(1, -1))[0][0]
            print("Similarity between ", nameOfSong1, " and ", nameOfSong2, " is ", cosine_similarity(X.iloc[indices[i]].values.reshape(1, -1), X.iloc[indices[j]].values.reshape(1, -1))[0][0])
    return similarity / (len(indices) * (len(indices) - 1) / 2)
       

In [84]:
# calculate intra-list similarity for song recommendations
intraListSimilarity = calculate_intra_list_similarity(recommendations)
print("Intra-list similarity for song recommendations: ", intraListSimilarity)


Similarity between  Pyro (leak 2019)  and  Don't Bang My Line (feat. Night Lovell)  is  0.9999999391205271
Similarity between  Pyro (leak 2019)  and  Mud  is  0.9999995984964225
Similarity between  Pyro (leak 2019)  and  Fractals  is  0.9999991310863483
Similarity between  Pyro (leak 2019)  and  Atlantis  is  0.9999990368607039
Similarity between  Don't Bang My Line (feat. Night Lovell)  and  Mud  is  0.9999998500419613
Similarity between  Don't Bang My Line (feat. Night Lovell)  and  Fractals  is  0.999999529877572
Similarity between  Don't Bang My Line (feat. Night Lovell)  and  Atlantis  is  0.9999994599295797
Similarity between  Mud  and  Fractals  is  0.9999999108391253
Similarity between  Mud  and  Atlantis  is  0.999999879017465
Similarity between  Fractals  and  Atlantis  is  0.9999999975751512
Intra-list similarity for song recommendations:  0.9999996332844855
