In [1]:
import pandas as pd
import numpy as np
np.random.seed(10)

In [2]:
user_data = pd.read_csv('../datasets/simulated_user_dataset.csv')
user_data = user_data.drop('Unnamed: 0', axis=1)
print(user_data.columns)

Index(['duration (ms)', 'danceability', 'energy', 'loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'spec_rate', 'labels', 'uri', 'user_id', 'group_no'],
      dtype='object')


In [3]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import MinMaxScaler

def prepare_data_for_svd(df):
    """
    Prepare the data for SVD by creating a user-item interaction matrix
    with normalized features.
    """
    # Select numerical features for creating the interaction value
    feature_columns = [
        'danceability', 'energy', 'loudness', 'speechiness',
        'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
        'duration (ms)', 'spec_rate', 'labels'
    ]
    
    # Normalize numerical features
    scaler = MinMaxScaler()
    df_normalized = pd.DataFrame(
        scaler.fit_transform(df[feature_columns]),
        columns=feature_columns
    )
    
    # Create an interaction score (you can modify this based on your needs)
    df_normalized['interaction_score'] = df_normalized.mean(axis=1)
    
    # Create Surprise reader object
    reader = Reader(rating_scale=(0, 1))
    
    # Prepare data for Surprise
    data = Dataset.load_from_df(
        df[['user_id', 'uri', 'interaction_score']],
        reader
    )
    
    return data

def train_svd_model(data, n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02):
    """
    Train an SVD model with the given parameters.
    """
    # Split data into training and testing sets
    trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
    
    # Initialize and train SVD model
    model = SVD(
        n_factors=n_factors,
        n_epochs=n_epochs,
        lr_all=lr_all,
        reg_all=reg_all
    )
    model.fit(trainset)
    
    return model, testset

def evaluate_model(model, testset):
    """
    Evaluate the model performance on the test set.
    """
    predictions = model.test(testset)
    
    # Calculate RMSE and MAE
    rmse = np.sqrt(np.mean([float((pred.r_ui - pred.est) ** 2) for pred in predictions]))
    mae = np.mean([float(abs(pred.r_ui - pred.est)) for pred in predictions])
    
    return {
        'RMSE': rmse,
        'MAE': mae
    }

def get_recommendations(model, user_id, df, n_recommendations=5):
    """
    Get top N song recommendations for a specific user.
    """
    # Get all unique songs
    all_songs = df['uri'].unique()
    
    # Get songs the user hasn't interacted with
    user_songs = set(df[df['user_id'] == user_id]['uri'])
    songs_to_predict = list(set(all_songs) - user_songs)
    
    # Make predictions
    predictions = [model.predict(user_id, song_id) for song_id in songs_to_predict]
    
    # Sort predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    # Return top N recommendations
    return predictions[:n_recommendations]

# Example usage
def main(df):
    """
    Main function to demonstrate the recommendation system.
    """
    # Prepare data
    data = prepare_data_for_svd(df)
    
    # Train model
    model, testset = train_svd_model(data)
    
    # Evaluate model
    metrics = evaluate_model(model, testset)
    print("Model Performance:")
    print(f"RMSE: {metrics['RMSE']:.4f}")
    print(f"MAE: {metrics['MAE']:.4f}")
    
    # Get recommendations for a sample user
    sample_user = df['user_id'].iloc[0]
    recommendations = get_recommendations(model, sample_user, df)
    
    return model, recommendations

# Cross-validation function for parameter tuning
def perform_cross_validation(data, param_grid):
    """
    Perform cross-validation to find the best parameters.
    """
    results = []
    
    for n_factors in param_grid['n_factors']:
        for n_epochs in param_grid['n_epochs']:
            for lr in param_grid['lr_all']:
                model = SVD(
                    n_factors=n_factors,
                    n_epochs=n_epochs,
                    lr_all=lr
                )
                
                cv_results = cross_validate(model, data, measures=['RMSE', 'MAE'], 
                                         cv=5, verbose=False)
                
                results.append({
                    'n_factors': n_factors,
                    'n_epochs': n_epochs,
                    'lr_all': lr,
                    'RMSE': cv_results['test_rmse'].mean(),
                    'MAE': cv_results['test_mae'].mean()
                })
    
    return pd.DataFrame(results)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/miniconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/opt/miniconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/opt/miniconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/opt/miniconda3/lib/python3.12/s

ImportError: numpy.core.multiarray failed to import (auto-generated because you didn't call 'numpy.import_array()' after cimporting numpy; use '<void>numpy._import_array' to disable if you are certain you don't need it).