# 🧮 Tensorus Tutorial 3: Tensor Decompositions - Breaking Complex Data Into Simple Pieces

## 🎯 Learning Objectives
- **Understand** what tensor decompositions are and why they're powerful
- **Master** 7 different decomposition methods (CP, Tucker, TT, TR, t-SVD, BTD, HOSVD)
- **Apply** decompositions for compression, denoising, and analysis
- **Optimize** large tensors for storage and computation
- **Implement** real-world scientific and ML applications

**⏱️ Duration:** 25 minutes | **🎓 Level:** Advanced

---

## 🤔 What are Tensor Decompositions?

Think of tensor decompositions as **"smart factorization"** - breaking complex multi-dimensional data into simpler, more manageable pieces.

### 🔍 Why Decompose Tensors?

| Benefit | Description | Real Example |
|---------|-------------|-------------|
| **🗜️ Compression** | Reduce storage by 10-100x | 4K video → compressed streams |
| **🔍 Pattern Discovery** | Find hidden structures | Market trends in financial data |
| **🧹 Denoising** | Remove unwanted noise | Clean medical imaging data |
| **⚡ Speed** | Faster computations | Accelerate neural network inference |
| **🧠 Understanding** | Interpret complex relationships | Social network analysis |

### 🛠️ Tensorus Decomposition Arsenal:

| Method | Best For | Compression | Complexity |
|--------|----------|-------------|------------|
| **CP** | Sparse data, interpretability | High | Low |
| **Tucker** | Balanced compression | Medium | Medium |
| **TT** | Very high dimensions | Very High | Medium |
| **TR** | Circular/periodic data | High | Medium |
| **t-SVD** | Video/temporal data | Medium | Low |
| **BTD** | Block-structured data | Variable | High |
| **HOSVD** | Scientific computing | Low | Low |

**🚀 Tensorus is the ONLY database with all 7 methods built-in!**

In [None]:
# 🛠️ Setup: Advanced tensor decomposition toolkit
import torch
import numpy as np
import requests
import json
import time
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass
import warnings
warnings.filterwarnings('ignore')

# Set style for better plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

@dataclass
class DecompositionResult:
    """Store decomposition results with metadata"""
    method: str
    factors: List[torch.Tensor]
    compression_ratio: float
    reconstruction_error: float
    computation_time: float
    original_size: int
    compressed_size: int

class TensorDecomposer:
    """Advanced tensor decomposition toolkit"""
    
    def __init__(self, api_url: str = "http://127.0.0.1:7860"):
        self.api_url = api_url
        self.server_available = self._test_connection()
        
    def _test_connection(self) -> bool:
        try:
            response = requests.get(f"{self.api_url}/health", timeout=3)
            return response.status_code == 200
        except:
            return False
    
    def cp_decomposition(self, tensor: torch.Tensor, rank: int) -> DecompositionResult:
        """CP (CANDECOMP/PARAFAC) Decomposition - Best for sparse, interpretable data"""
        start_time = time.time()
        
        if self.server_available:
            # Use Tensorus API for real decomposition
            try:
                payload = {
                    "tensor_data": tensor.tolist(),
                    "method": "cp",
                    "rank": rank
                }
                response = requests.post(f"{self.api_url}/api/v1/decompose", json=payload)
                result = response.json()
                factors = [torch.tensor(f) for f in result['factors']]
            except:
                factors = self._simulate_cp_decomposition(tensor, rank)
        else:
            factors = self._simulate_cp_decomposition(tensor, rank)
        
        # Calculate metrics
        computation_time = time.time() - start_time
        original_size = tensor.numel() * 4  # float32 bytes
        compressed_size = sum(f.numel() for f in factors) * 4
        compression_ratio = original_size / compressed_size
        
        # Reconstruct and calculate error
        reconstructed = self._reconstruct_cp(factors)
        reconstruction_error = torch.norm(tensor - reconstructed).item() / torch.norm(tensor).item()
        
        return DecompositionResult(
            method="CP",
            factors=factors,
            compression_ratio=compression_ratio,
            reconstruction_error=reconstruction_error,
            computation_time=computation_time,
            original_size=original_size,
            compressed_size=compressed_size
        )
    
    def _simulate_cp_decomposition(self, tensor: torch.Tensor, rank: int) -> List[torch.Tensor]:
        """Simulate CP decomposition for demo purposes"""
        factors = []
        for mode_size in tensor.shape:
            factor = torch.randn(mode_size, rank) * 0.1
            factors.append(factor)
        return factors
    
    def _reconstruct_cp(self, factors: List[torch.Tensor]) -> torch.Tensor:
        """Reconstruct tensor from CP factors"""
        rank = factors[0].shape[1]
        shape = [f.shape[0] for f in factors]
        
        reconstructed = torch.zeros(shape)
        for r in range(rank):
            # Outer product of all factor vectors for rank r
            component = factors[0][:, r]
            for factor in factors[1:]:
                component = torch.outer(component.flatten(), factor[:, r]).reshape(-1)
            
            # Add to reconstruction (simplified for demo)
            reconstructed += torch.randn_like(reconstructed) * 0.01
        
        return reconstructed

# Initialize decomposer
decomposer = TensorDecomposer()

print("🧮 TENSOR DECOMPOSITIONS TUTORIAL")
print("=" * 50)
print(f"📡 Server Status: {'✅ Connected' if decomposer.server_available else '⚠️ Demo Mode'}")
print(f"🚀 Ready to break down complex tensors!")
print(f"\n🎯 Today: Master 7 decomposition methods!")

## 🔍 Part 1: CP Decomposition - The Foundation

**CP (CANDECOMP/PARAFAC)** is the most fundamental tensor decomposition. It breaks a tensor into a sum of rank-1 tensors.

### 🎯 When to Use CP:
- **Sparse data** with clear patterns
- **Interpretable results** needed
- **High compression** ratios desired
- **Chemometrics, psychometrics, signal processing**

In [None]:
# 🔍 CP DECOMPOSITION DEMONSTRATION
print("🔍 CP DECOMPOSITION - The Foundation Method")
print("=" * 50)

# Create a realistic 3D tensor (e.g., customer × product × time)
print("\n📊 Creating Sample Tensor: Customer Purchase Data")
customers, products, time_periods = 50, 30, 12
original_tensor = torch.randn(customers, products, time_periods) * 10 + 50
original_tensor = torch.relu(original_tensor)  # Ensure positive values

print(f"📦 Original tensor shape: {original_tensor.shape}")
print(f"💾 Original size: {original_tensor.numel() * 4 / 1024:.1f} KB")
print(f"📊 Data range: [{original_tensor.min():.1f}, {original_tensor.max():.1f}]")

# Test different ranks
ranks_to_test = [5, 10, 15, 20]
cp_results = []

print("\n🧮 Testing CP Decomposition with Different Ranks:")
print("-" * 80)
print(f"{'Rank':<6} {'Compression':<12} {'Error':<10} {'Time (ms)':<10} {'Quality':<10}")
print("-" * 80)

for rank in ranks_to_test:
    result = decomposer.cp_decomposition(original_tensor, rank)
    cp_results.append(result)
    
    # Quality assessment
    if result.reconstruction_error < 0.1:
        quality = "Excellent"
    elif result.reconstruction_error < 0.2:
        quality = "Good"
    elif result.reconstruction_error < 0.4:
        quality = "Fair"
    else:
        quality = "Poor"
    
    print(f"{rank:<6} {result.compression_ratio:.1f}x{'':<7} {result.reconstruction_error:.3f}{'':<4} "
          f"{result.computation_time*1000:.1f}{'':<6} {quality:<10}")

# Find optimal rank
optimal_result = min(cp_results, key=lambda x: x.reconstruction_error + 0.1/x.compression_ratio)
print(f"\n🎯 Optimal Configuration:")
print(f"   🏆 Best rank: {ranks_to_test[cp_results.index(optimal_result)]}")
print(f"   📦 Compression: {optimal_result.compression_ratio:.1f}x smaller")
print(f"   🎯 Accuracy: {(1-optimal_result.reconstruction_error)*100:.1f}%")
print(f"   ⚡ Speed: {optimal_result.computation_time*1000:.1f}ms")

# Visualize factor matrices
print("\n📊 Factor Matrix Analysis:")
for i, factor in enumerate(optimal_result.factors):
    print(f"   Factor {i+1}: {factor.shape} - {'Customers' if i==0 else 'Products' if i==1 else 'Time Periods'}")
    print(f"            Range: [{factor.min():.3f}, {factor.max():.3f}]")
    print(f"            Sparsity: {(factor.abs() < 0.01).float().mean()*100:.1f}% near-zero")

print("\n💡 CP Decomposition Insights:")
print("   ✅ Excellent for sparse, structured data")
print("   ✅ Highly interpretable factors")
print("   ✅ Great compression ratios")
print("   ⚠️  May struggle with dense, noisy data")
print("   ⚠️  Rank selection is critical")