In [2]:
import pandas as pd
import numpy as np
np.random.seed(10)

In [5]:
user_data = pd.read_csv('../datasets/simulated_user_dataset.csv')
print(user_data.head())

   Unnamed: 0  duration (ms)  danceability  energy  loudness  speechiness  \
0      114604       246022.0         0.709   0.965    -2.686       0.2110   
1      199817       217187.0         0.552   0.463    -9.976       0.0292   
2          32       266668.0         0.570   0.368    -9.128       0.0329   
3       82712       170418.0         0.629   0.832    -5.633       0.0442   
4       41910       312413.0         0.510   0.887    -5.569       0.0799   

   acousticness  instrumentalness  liveness  valence    tempo     spec_rate  \
0        0.0320          0.025800     0.118    0.699  120.065  8.576469e-07   
1        0.4550          0.000166     0.103    0.498   79.312  1.344464e-07   
2        0.0514          0.000000     0.108    0.136  139.937  1.233744e-07   
3        0.0547          0.203000     0.221    0.120  123.975  2.593623e-07   
4        0.0194          0.000000     0.225    0.255  134.029  2.557512e-07   

   labels                                   uri  user_id  grou

In [6]:
# drop column 'Unnamed: 0'
user_data = user_data.drop('Unnamed: 0', axis=1)
print(user_data.columns)

Index(['duration (ms)', 'danceability', 'energy', 'loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'spec_rate', 'labels', 'uri', 'user_id', 'group_no'],
      dtype='object')


In [8]:
from scipy.sparse.linalg import svds
from sklearn.preprocessing import MinMaxScaler

def create_user_item_matrix(df):
    """
    Step 1: Create the user-item matrix
    - Rows represent users
    - Columns represent songs (items)
    - Values represent the interaction score
    """
    # First, let's create an interaction score based on the features
    feature_columns = [
        'danceability', 'energy', 'loudness', 'speechiness',
        'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
        'duration (ms)', 'spec_rate', 'labels'
    ]
    
    # Normalize the features to 0-1 scale
    scaler = MinMaxScaler()
    normalized_features = scaler.fit_transform(df[feature_columns])
    
    # Create an interaction score (average of normalized features)
    df['interaction_score'] = np.mean(normalized_features, axis=1)
    
    # Create the user-item matrix
    user_item_matrix = df.pivot_table(
        index='user_id',
        columns='uri',
        values='interaction_score',
        fill_value=0
    )
    
    return user_item_matrix

In [12]:

def perform_svd(matrix, k=10):
    """
    Step 2: Perform SVD on the matrix
    - Decompose matrix into U, Sigma, and V matrices
    - k is the number of latent factors
    """
    # Convert to numpy array
    matrix_numpy = matrix.values
    
    # Center the matrix (subtract mean)
    matrix_mean = np.mean(matrix_numpy, axis=1)
    matrix_centered = matrix_numpy - matrix_mean.reshape(-1, 1)
    
    # Perform SVD
    U, sigma, Vt = svds(matrix_centered, k=k)
    
    # Convert sigma to diagonal matrix
    sigma = np.diag(sigma)
    
    return U, sigma, Vt, matrix_mean

def reconstruct_matrix(U, sigma, Vt, matrix_mean):
    """
    Step 3: Reconstruct the matrix to get predictions
    - Multiply U, sigma, and V transpose
    - Add back the mean
    """
    # Reconstruct the matrix
    predictions = np.dot(np.dot(U, sigma), Vt)
    
    # Add the mean back
    predictions += matrix_mean.reshape(-1, 1)
    
    return predictions

def get_recommendations(predictions, user_item_matrix, user_id, n_recommendations=5):
    """
    Step 4: Generate recommendations for a user
    - Find songs the user hasn't interacted with
    - Rank them by predicted score
    """
    # Get user index
    user_idx = user_item_matrix.index.get_loc(user_id)
    
    # Get song indices and names
    song_indices = np.arange(user_item_matrix.shape[1])
    song_names = user_item_matrix.columns
    
    # Get user's predictions
    user_predictions = predictions[user_idx]
    
    # Get indices of songs user hasn't interacted with
    user_songs = user_item_matrix.iloc[user_idx].values
    unlistened_songs = song_indices[user_songs == 0]
    
    # Get predictions for unlistened songs
    unlistened_predictions = user_predictions[unlistened_songs]
    
    # Get top N recommendations
    top_n_idx = np.argsort(unlistened_predictions)[-n_recommendations:][::-1]
    recommended_song_indices = unlistened_songs[top_n_idx]
    recommended_songs = song_names[recommended_song_indices]
    
    return recommended_songs, user_predictions[recommended_song_indices]

In [14]:
def main_svd_pipeline(df, n_factors=10):
    """
    Run the complete SVD pipeline
    """
    # Step 1: Create user-item matrix
    print("Creating user-item matrix...")
    user_item_matrix = create_user_item_matrix(df)
    print(f"Matrix shape: {user_item_matrix.shape}")
    
    # Step 2: Perform SVD
    print("\nPerforming SVD...")
    U, sigma, Vt, matrix_mean = perform_svd(user_item_matrix, k=n_factors)
    print(f"U shape: {U.shape}")
    print(f"Sigma shape: {sigma.shape}")
    print(f"V^T shape: {Vt.shape}")
    
    # Step 3: Generate predictions
    print("\nGenerating predictions...")
    predictions = reconstruct_matrix(U, sigma, Vt, matrix_mean)
    
    return user_item_matrix, predictions

# Example usage
def demonstrate_recommendations(df):
    """
    Demonstrate the recommendation system
    """
    # Run SVD pipeline
    user_item_matrix, predictions = main_svd_pipeline(df)
    
    # Get recommendations for a sample user
    i = np.random.randint(user_item_matrix.shape[0])
    sample_user = user_item_matrix.index[i]
    print(f"\nGetting recommendations for user {sample_user}...")
    recommended_songs, pred_scores = get_recommendations(
        predictions, 
        user_item_matrix, 
        sample_user
    )
    
    return recommended_songs, pred_scores

In [15]:
recommended_songs, pred_scores = demonstrate_recommendations(user_data)

Creating user-item matrix...
Matrix shape: (100, 82752)

Performing SVD...
U shape: (100, 10)
Sigma shape: (10, 10)
V^T shape: (10, 82752)

Generating predictions...
