In [None]:
import numpy as np
import scipy.sparse as sparse
from sklearn.decomposition import TruncatedSVD
import matplotlib.pyplot as plt
from pathlib import Path

def analyze_zero_vectors(user_item_matrix_path, n_components=610):
    """
    Analyze why certain item vectors become zero after SVD transformation
    """
    # Load the user-item matrix
    user_item_matrix = np.load(user_item_matrix_path, allow_pickle=True).item()
    
    print(f"Original matrix shape: {user_item_matrix.shape}")
    print(f"Matrix sparsity: {user_item_matrix.nnz / (user_item_matrix.shape[0] * user_item_matrix.shape[1]):.6f}")
    
    # Check for empty rows or columns
    item_counts = np.diff(user_item_matrix.tocsc().indptr)
    user_counts = np.diff(user_item_matrix.indptr)
    
    empty_items = np.where(item_counts == 0)[0]
    empty_users = np.where(user_counts == 0)[0]
    
    print(f"Items with no interactions: {len(empty_items)}")
    print(f"Users with no interactions: {len(empty_users)}")
    
    # Get item-user matrix
    item_user_matrix = user_item_matrix.T.tocsr()
    
    # Run SVD
    svd_item = TruncatedSVD(n_components=n_components, random_state=42)
    item_matrix = svd_item.fit_transform(item_user_matrix)
    
    # Find zero vectors
    zero_vector_mask = np.all(item_matrix == 0, axis=1)
    zero_vector_indices = np.where(zero_vector_mask)[0]
    print(f"Found {len(zero_vector_indices)} zero vectors in the transformed item matrix")
    print(f"Zero vector indices: {zero_vector_indices}")
    
    # Analyze these items
    if len(zero_vector_indices) > 0:
        for idx in zero_vector_indices:
            # Check original interactions
            original_interactions = item_user_matrix[idx].nnz
            print(f"Item {idx} has {original_interactions} interactions in original data")
            
            # Get the actual interaction values
            row_data = item_user_matrix[idx].toarray().flatten()
            nonzero_values = row_data[row_data != 0]
            print(f"  Non-zero values: min={nonzero_values.min():.6f}, max={nonzero_values.max():.6f}, mean={nonzero_values.mean():.6f}")
            
            # Check for similar items
            similarities = []
            for i in range(item_user_matrix.shape[0]):
                if i != idx and item_user_matrix[i].nnz > 0:
                    # Calculate Jaccard similarity of non-zero elements
                    i_indices = set(item_user_matrix[i].indices)
                    idx_indices = set(item_user_matrix[idx].indices)
                    intersection = len(i_indices.intersection(idx_indices))
                    union = len(i_indices.union(idx_indices))
                    sim = intersection / union if union > 0 else 0
                    similarities.append((i, sim))
            
            # Show top similar items
            similarities.sort(key=lambda x: x[1], reverse=True)
            print(f"  Top similar items: {similarities[:5]}")
        
        # Try a different SVD algorithm or parameters
        print("\nTrying different SVD parameters...")
        
        # Try with randomized SVD algorithm
        svd_item_randomized = TruncatedSVD(n_components=n_components, algorithm='randomized', random_state=42)
        item_matrix_randomized = svd_item_randomized.fit_transform(item_user_matrix)
        zero_vectors_randomized = np.all(item_matrix_randomized == 0, axis=1)
        print(f"Zero vectors with randomized algorithm: {np.sum(zero_vectors_randomized)}")
        
        # Try with different random states
        for rs in [0, 10, 100, 1000]:
            svd_item_rs = TruncatedSVD(n_components=n_components, random_state=rs)
            item_matrix_rs = svd_item_rs.fit_transform(item_user_matrix)
            zero_vectors_rs = np.all(item_matrix_rs == 0, axis=1)
            print(f"Zero vectors with random_state={rs}: {np.sum(zero_vectors_rs)}")
            
            if np.sum(zero_vectors_rs) > 0:
                zero_indices = np.where(zero_vectors_rs)[0]
                print(f"  Indices: {zero_indices}")
    
    return zero_vector_indices, item_user_matrix

# Usage example:


In [None]:
zero_indices, item_user_matrix = analyze_zero_vectors("/path/to/user_item_matrix.npz")