# SVD Recommender System

This notebook implements a recommender system using Singular Value Decomposition (SVD).

In [6]:
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
import time

In [7]:
def load_data(file_path):
    print("Loading data...")
    rows = []
    cols = []
    data = []
    
    max_user_id = 0
    max_item_id = 0
    
    with open(file_path, 'r') as f:
        for line in f:
            parts = list(map(int, line.strip().split()))
            if not parts:
                continue
            user_id = parts[0]
            items = parts[1:]
            
            if not items:
                continue
                
            max_user_id = max(max_user_id, user_id)
            for item_id in items:
                rows.append(user_id)
                cols.append(item_id)
                data.append(1)
                max_item_id = max(max_item_id, item_id)
                

    X = csr_matrix((data, (rows, cols)), shape=(max_user_id + 1, max_item_id + 1))
    print(f"Data loaded. Shape: {X.shape}, Non-zeros: {X.nnz}")
    return X

In [8]:
def compute_svd(X, k=50):
    print(f"Computing SVD with k={k}...")
    start_time = time.time()
    
    # Convert to float for SVD
    X = X.asfptype()
    
    # Compute SVD
    # U: (n_users, k), sigma: (k,), Vt: (k, n_items)
    U, sigma, Vt = svds(X, k=k)
    
    # Sort singular values in descending order (svds returns them in ascending order)
    U = U[:, ::-1]
    sigma = sigma[::-1]
    Vt = Vt[::-1, :]
    
    sigma = np.diag(sigma)
    
    print(f"SVD computed. U: {U.shape}, Sigma: {sigma.shape}, Vt: {Vt.shape}")
    print(f"Time taken: {time.time() - start_time:.2f}s")
    
    return U, sigma, Vt

In [9]:
def generate_recommendations(X, U, sigma, Vt, output_file, top_k=20):
    print("Generating recommendations...")
    n_users = X.shape[0]
    batch_size = 1000
    
    # Pre-compute U * sigma for efficiency
    U_sigma = np.dot(U, sigma)
    
    with open(output_file, 'w') as f:
        for start_idx in range(0, n_users, batch_size):
            end_idx = min(start_idx + batch_size, n_users)
            
            # Get batch of user vectors from U_sigma
            user_batch_factors = U_sigma[start_idx:end_idx]
            
            # Compute scores: (batch_size, k) * (k, n_items) -> (batch_size, n_items)
            scores_batch = np.dot(user_batch_factors, Vt)
            
            # Mask training items
            # We need the original interaction matrix for this batch
            user_interactions = X[start_idx:end_idx]
            
            # Efficient masking:
            # Iterate over each user in the batch
            for i in range(end_idx - start_idx):
                u_idx = start_idx + i
                # Get indices of items user has interacted with
                interacted_items = user_interactions[i].indices
                scores_batch[i, interacted_items] = -np.inf
            
            # Get top-k items
            # np.argpartition is faster than sort for top-k
            top_items_indices = np.argpartition(scores_batch, -top_k, axis=1)[:, -top_k:]
            
            # Sort the top-k items by score
            rows = np.arange(scores_batch.shape[0])[:, None]
            top_scores = scores_batch[rows, top_items_indices]
            
            sort_ind = np.argsort(top_scores, axis=1)[:, ::-1]
            final_recs = top_items_indices[rows, sort_ind]
            
            for i, u_id in enumerate(range(start_idx, end_idx)):
                recs = final_recs[i]
                f.write(f"{u_id} {' '.join(map(str, recs))}\n")
                
            if start_idx % 5000 == 0:
                print(f"Processed {start_idx} users...")

In [10]:
input_file = '/Users/riteshsingh/Documents/SJSU/Recommender System/projectrec/train-2.txt'
output_file = '/Users/riteshsingh/Documents/SJSU/Recommender System/projectrec/output1.txt'

X = load_data(input_file)
U, sigma, Vt = compute_svd(X, k=50)
generate_recommendations(X, U, sigma, Vt, output_file)
print("Done!")

Loading data...
Data loaded. Shape: (52643, 91605), Non-zeros: 2380730
Computing SVD with k=50...
SVD computed. U: (52643, 50), Sigma: (50, 50), Vt: (50, 91605)
Time taken: 2.12s
Generating recommendations...
Processed 0 users...
Processed 5000 users...
Processed 10000 users...
Processed 15000 users...
Processed 20000 users...
Processed 25000 users...
Processed 30000 users...
Processed 35000 users...
Processed 40000 users...
Processed 45000 users...
Processed 50000 users...
Done!
