In [10]:
import pandas as pd
import numpy as np
np.random.seed(10)

In [11]:
user_data = pd.read_csv('../datasets/simulated_user_dataset.csv')
print(user_data.head())

   duration (ms)  danceability  energy  loudness  speechiness  acousticness  \
0       324240.0      0.407733   0.714  0.664428     0.311258      0.473896   
1       285753.0      0.177466   0.101  0.115246     0.313907      0.997992   
2       180570.0      0.429153   0.872  0.632533     0.287417      0.002651   
3       191153.0      0.500910   0.816  0.665492     0.582207      0.068976   
4       207416.0      0.627289   0.759  0.625890     0.198013      0.457831   

   instrumentalness  liveness  valence     tempo     spec_rate  labels  \
0          0.000000  0.881797    0.216  0.400413  1.449544e-07     0.0   
1          0.382000  0.307329    0.122  0.624017  1.658775e-07     0.0   
2          0.000529  0.416076    0.690  0.731116  2.403500e-07     2.0   
3          0.000000  0.401891    0.885  0.848131  4.754654e-07     1.0   
4          0.721000  0.225532    0.738  0.515581  1.441547e-07     0.0   

                                    uri  user_id  group_no  
0  spotify:track:0l

In [12]:
from scipy.sparse.linalg import svds

def create_user_item_matrix(df):
    """
    Step 1: Create the user-item matrix
    - Rows represent users
    - Columns represent songs (items)
    - Values represent the interaction score
    """
    # First, let's create an interaction score based on the features
    feature_columns = [
        'danceability', 'energy', 'loudness', 'speechiness',
        'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
        'duration (ms)', 'spec_rate', 'labels'
    ]
    
    # Create an interaction score (average of normalized features)
    df['interaction_score'] = np.mean(df[feature_columns], axis=1)
    
    # Create the user-item matrix
    user_item_matrix = df.pivot_table(
        index='user_id',
        columns='uri',
        values='interaction_score',
        fill_value=0
    )
    
    return user_item_matrix

In [13]:

def perform_svd(matrix, k=10):
    """
    Step 2: Perform SVD on the matrix
    - Decompose matrix into U, Sigma, and V matrices
    - k is the number of latent factors
    """
    # Convert to numpy array
    matrix_numpy = matrix.values
    
    # Center the matrix (subtract mean)
    matrix_mean = np.mean(matrix_numpy, axis=1)
    matrix_centered = matrix_numpy - matrix_mean.reshape(-1, 1)
    
    # Perform SVD
    U, sigma, Vt = svds(matrix_centered, k=k)
    
    # Convert sigma to diagonal matrix
    sigma = np.diag(sigma)
    
    return U, sigma, Vt, matrix_mean

def reconstruct_matrix(U, sigma, Vt, matrix_mean):
    """
    Step 3: Reconstruct the matrix to get predictions
    - Multiply U, sigma, and V transpose
    - Add back the mean
    """
    # Reconstruct the matrix
    predictions = np.dot(np.dot(U, sigma), Vt)
    
    # Add the mean back
    predictions += matrix_mean.reshape(-1, 1)
    
    return predictions

def get_recommendations(predictions, user_item_matrix, user_id, n_recommendations=5):
    """
    Step 4: Generate recommendations for a user
    - Find songs the user hasn't interacted with
    - Rank them by predicted score
    """
    # Get user index
    user_idx = user_item_matrix.index.get_loc(user_id)
    
    # Get song indices and names
    song_indices = np.arange(user_item_matrix.shape[1])
    song_names = user_item_matrix.columns
    
    # Get user's predictions
    user_predictions = predictions[user_idx]
    
    # Get indices of songs user hasn't interacted with
    user_songs = user_item_matrix.iloc[user_idx].values
    unlistened_songs = song_indices[user_songs == 0]
    
    # Get predictions for unlistened songs
    unlistened_predictions = user_predictions[unlistened_songs]
    
    # Get top N recommendations
    top_n_idx = np.argsort(unlistened_predictions)[-n_recommendations:][::-1]
    recommended_song_indices = unlistened_songs[top_n_idx]
    recommended_songs = song_names[recommended_song_indices]
    
    return recommended_songs, user_predictions[recommended_song_indices]

In [14]:
def main_svd_pipeline(df, n_factors=10):
    """
    Run the complete SVD pipeline
    """
    # Step 1: Create user-item matrix
    print("Creating user-item matrix...")
    user_item_matrix = create_user_item_matrix(df)
    print(f"Matrix shape: {user_item_matrix.shape}")
    
    # Step 2: Perform SVD
    print("\nPerforming SVD...")
    U, sigma, Vt, matrix_mean = perform_svd(user_item_matrix, k=n_factors)
    print(f"U shape: {U.shape}")
    print(f"Sigma shape: {sigma.shape}")
    print(f"V^T shape: {Vt.shape}")
    
    # Step 3: Generate predictions
    print("\nGenerating predictions...")
    predictions = reconstruct_matrix(U, sigma, Vt, matrix_mean)
    
    return user_item_matrix, predictions

# Example usage
def demonstrate_recommendations(df):
    """
    Demonstrate the recommendation system
    """
    # Run SVD pipeline
    user_item_matrix, predictions = main_svd_pipeline(df)
    
    # Get recommendations for a sample user
    i = np.random.randint(user_item_matrix.shape[0])
    sample_user = user_item_matrix.index[i]
    print(f"\nGetting recommendations for user {sample_user}...")
    recommended_songs, pred_scores = get_recommendations(
        predictions, 
        user_item_matrix, 
        sample_user
    )
    
    return recommended_songs, pred_scores

In [15]:
recommended_songs, pred_scores = demonstrate_recommendations(user_data)

Creating user-item matrix...
Matrix shape: (100, 83654)

Performing SVD...
U shape: (100, 10)
Sigma shape: (10, 10)
V^T shape: (10, 83654)

Generating predictions...

Getting recommendations for user 10...


In [16]:
recommended_songs

Index(['spotify:track:3hcUREHAHIjauQzwrxmJIk',
       'spotify:track:37lnHXhscBSkFhkXFwOdaG',
       'spotify:track:5B8pMIEuBMiuWkxSHIgICi',
       'spotify:track:7GVRfHKoJJ47FE5DJI0zVS',
       'spotify:track:5IBen7C8lk7y8memBXqFTa'],
      dtype='object', name='uri')