In [173]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

In [174]:
# Load the dataset
df = pd.read_csv('tracks.csv')

### Data Preprocessing

In [175]:
# Convert the release_date to its respective "era"
df['era'] = (df['release_date'].str.split('-').str[0].astype(int) // 10) * 10

In [176]:
# Standardize numerical features
numerical_features = ['popularity', 'duration_ms', 'danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'tempo']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

In [177]:
# Remove unnecessary columns
columns_to_drop = ['explicit', 'mode', 'valence', 'id_artists', 'release_date', 'time_signature']
df.drop(columns=columns_to_drop, inplace=True)

In [178]:
# Remove duplicates
df.drop_duplicates(subset=df.columns.difference(['id', 'popularity']), keep='first', inplace=True)

In [179]:
df.head()

Unnamed: 0,id,name,popularity,duration_ms,artists,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,tempo,era
0,35iwgR4jXetI318WEWsa1Q,Carve,-1.17416,-0.815233,['Uli'],0.490096,-0.385182,-1.483654,-0.615393,1.924128,0.642528,2.362779,-0.341434,-0.457392,1920
1,021ht4sdgPcrDgSk7JTbKY,Capítulo 2.16 - Banquero Anarquista,-1.500768,-1.042088,['Fernando Pessoa'],0.791115,-1.107625,-1.483654,-2.34411,4.736917,0.995129,-0.42512,-0.35771,-0.552876,1920
2,07A5yehtSnoedViJAZkNnc,Vivo para Quererte - Remasterizado,-1.500768,-0.382618,['Ignacio Corsini'],-0.780204,-1.449,-1.199517,-2.156266,-0.298309,1.559864,-0.343432,-0.010498,0.401596,1920
3,08FmqUhxtyLTn6pAh6bk45,El Prisionero - Remasterizado,-1.500768,-0.420026,['Ignacio Corsini'],-1.460507,-1.776084,0.50531,-3.488663,-0.302756,1.56273,3.014787,-0.596418,1.730782,1920
4,08y9GfoqCWfOGsKdwojr5e,Lady of the Evening,-1.500768,-0.529308,['Dick Haymes'],-0.972856,-1.52442,-0.631241,-1.315289,-0.366127,1.54553,0.062013,0.526596,-0.51219,1920


In [180]:
# Define the X column required for recomnnedation and normalize features
features = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'tempo', 'era', 'popularity']
feature_weights = [8, 4, 2, 8, 3, 7, 7, 2, 8, 10, 7]
normalized_weights = [weight / sum(feature_weights) for weight in feature_weights]

X = df[features]
X_weighted = X * normalized_weights

In [181]:
# Initialize the Nearest Neighbors Model
nn_model = NearestNeighbors(n_neighbors=11, algorithm='brute', metric='cosine')
nn_model.fit(X_weighted)

# Define a function to get music recommendations for a given song ID
def get_recommendations(song_id):
    # Look up the features of the given song based on its ID
    song_features = df.loc[df['id'] == song_id, features].values

    if len(song_features) == 0:
        return None  # Return None if the song ID is not found in the dataset

    # Apply the weights to the input features
    song_features_weighted = song_features * normalized_weights

    distances, indices = nn_model.kneighbors(song_features_weighted, 10)

    if len(indices) == 0:
        return None  # Return None if no neighbors were found

    # Extract the recommended song indices
    recommended_songs = df.iloc[indices[0]]
    return recommended_songs

In [183]:
song_id = '7lPN2DXiMsVn7XUKtOW1CS'
recommendations = get_recommendations(song_id)
recommendations.to_csv('recommendations.csv', index=False)