In [1]:
import os
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import pairwise_distances, mean_squared_error, classification_report
import seaborn as sns 
import matplotlib.pyplot as plt 
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler


In [2]:
# Load your CSV file (replace 'your_dataset.csv' with the actual file name)
df = pd.read_csv('songs.csv')

# Scale 'Views' values to be between 0 and 1
scaler = MinMaxScaler()
df['Views_scaled'] = scaler.fit_transform(df[['Views']])


# Create numerical indices for artists and tracks
df['Artist_ID'] = pd.Categorical(df['Artist']).codes
df['Track_ID'] = pd.Categorical(df['Track']).codes

# Use 'Views_scaled' for building the user-song matrix
user_song_matrix = df.pivot_table(index='Artist_ID', columns='Track_ID', values='Views_scaled', fill_value=0)


# Handle NaN values in the user-song matrix
user_song_matrix = user_song_matrix.fillna(0)


In [3]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(user_song_matrix, test_size=0.2, random_state=42)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(train_data)

In [4]:
# Function for prediction
def predict(ratings, similarity, epsilon=1e-9):
    mean_user_rating = ratings.mean(axis=1)
    ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
    pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / (np.abs(similarity).sum(axis=1)[:, np.newaxis] + epsilon)
    return pred

In [5]:
# Making predictions and evaluating using Mean Squared Error (MSE) Method
user_prediction = predict(train_data.values, user_similarity)
common_indices = np.where(test_data.values > 0)
user_prediction = np.nan_to_num(user_prediction)

mse = mean_squared_error(test_data.values[common_indices], user_prediction[common_indices])
rmse = np.sqrt(mse)
print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')

Mean Squared Error: 0.001319220136204168
Root Mean Squared Error: 0.036321070141230254


In [6]:
# Recommend tracks for a given artist
def recommend_tracks(artist_id, user_prediction, df, n=5):
    sorted_indices = np.argsort(user_prediction[artist_id])[::-1]
    recommended_tracks = []
    for i in range(n):
        track_name = df['Track'].iloc[sorted_indices[i]]
        recommended_tracks.append(track_name)
    return recommended_tracks

In [7]:
# Function to get artist ID by name
def get_artist_id_by_name(artist_name, df):
    artist_id = df[df['Artist'] == artist_name]['Artist_ID'].iloc[0] if artist_name in df['Artist'].values else None
    return artist_id

# Example usage:
user_input_artist_name = input("Enter the name of artist: ")
artist_id = get_artist_id_by_name(user_input_artist_name, df)

if artist_id is not None:
    print(f"The numerical ID for {user_input_artist_name} is: {artist_id}")
    # Now you can use artist_id with iloc or other operations
    artist_name_from_id = df[df['Artist_ID'] == artist_id]['Artist'].iloc[0]
    
    # Recommend tracks for the specified artist
    recommended_tracks = recommend_tracks(artist_id, user_prediction, df)
    print(f'Recommended tracks for artist | {artist_name_from_id} | (ID {artist_id}): ')
    for i, track_name in enumerate(recommended_tracks, 1):
        print(f'{i}. {track_name}')

else:
    print(f"Artist {user_input_artist_name} not found in the dataset.")

The numerical ID for Nirvana is: 1383
Recommended tracks for artist | Nirvana | (ID 1383): 
1. Raja Ji Khoon Kaida
2. Bag End
3. Is This Love
4. Soy Yo - Sing 2 Mix
5. Nee Tholaindhaayo
