# Sparse Advanced Operations
- Graph algorithms, Sparse eigensolvers, Advanced decompositions
- Real examples: PageRank, Network analysis, Large-scale PCA

In [None]:
import numpy as np
from scipy import sparse
from scipy.sparse import linalg as splinalg
from scipy.sparse import csgraph
print('Sparse advanced operations module loaded')

## Sparse Eigenvalue Problems

**Challenge**: Find eigenvalues of huge matrices
**Solution**: Iterative methods (Arnoldi, Lanczos)
**Functions**: `eigs`, `eigsh` (symmetric)

In [None]:
print('Sparse Eigenvalue Solvers\n')

# Create large sparse matrix
n = 1000
diag = np.ones(n) * 2
off_diag = -np.ones(n-1)
A = sparse.diags([off_diag, diag, off_diag], [-1, 0, 1], format='csr')

print(f'Matrix size: {n}×{n}')
print(f'Non-zeros: {A.nnz:,}')
print(f'Density: {A.nnz/(n*n)*100:.4f}%\n')

# Find 6 largest eigenvalues
k = 6
eigenvalues, eigenvectors = splinalg.eigsh(A, k=k, which='LA')

print(f'Computed {k} largest eigenvalues:')
for i, lam in enumerate(eigenvalues[::-1]):
    print(f'  λ_{i+1} = {lam:.6f}')

print(f'\nEigenvector shape: {eigenvectors.shape}')
print('Method: Lanczos iteration (much faster than dense methods)')

## Graph Algorithms

**scipy.sparse.csgraph**: Compressed sparse graph algorithms
**Algorithms**: Shortest paths, MST, connected components

In [None]:
print('\nGraph Algorithms\n')

# Create graph (adjacency matrix)
edges = np.array([[0,1,4], [0,2,2], [1,2,1], [1,3,5], 
                  [2,3,8], [2,4,10], [3,4,2]])
n_nodes = 5

# Build adjacency matrix
G = sparse.lil_matrix((n_nodes, n_nodes))
for i, j, w in edges:
    G[i,j] = w
    G[j,i] = w  # Undirected
G = G.tocsr()

print(f'Graph: {n_nodes} nodes, {len(edges)} edges\n')

# Shortest paths (Dijkstra)
dist_matrix = csgraph.shortest_path(G, method='D')

print('Shortest paths (Dijkstra):')
for i in range(n_nodes):
    print(f'  From node 0 to {i}: {dist_matrix[0,i]:.0f}')

## Real Example: PageRank Algorithm

**Problem**: Rank web pages by importance
**Method**: Dominant eigenvector of transition matrix

In [None]:
print('\nPageRank Algorithm\n')

# Create web graph (link structure)
n_pages = 10
np.random.seed(42)

# Random links (sparse)
links = sparse.random(n_pages, n_pages, density=0.2, format='lil')
links.setdiag(0)  # No self-links

print(f'Web graph: {n_pages} pages')
print(f'Links: {links.nnz}\n')

# Normalize columns (transition probabilities)
col_sums = np.array(links.sum(axis=0)).flatten()
col_sums[col_sums == 0] = 1  # Handle dangling nodes
P = links.tocsc()
for i in range(n_pages):
    if col_sums[i] > 0:
        P.data[P.indptr[i]:P.indptr[i+1]] /= col_sums[i]

# PageRank with damping
damping = 0.85
M = damping * P + (1-damping)/n_pages * sparse.csr_matrix(np.ones((n_pages, n_pages)))

# Find dominant eigenvector
eigenvalues, eigenvectors = splinalg.eigs(M.T, k=1, which='LM')
pagerank = np.abs(eigenvectors.flatten().real)
pagerank = pagerank / pagerank.sum()

print('PageRank scores:')
ranked = sorted(enumerate(pagerank), key=lambda x: x[1], reverse=True)
for i, (page, score) in enumerate(ranked[:5]):
    print(f'  {i+1}. Page {page}: {score:.4f}')

print('\nHigher score = more important page')

## Connected Components

**Problem**: Find separate subgraphs
**Use**: Network analysis, clustering

In [None]:
print('\nConnected Components\n')

# Create graph with 3 components
G = sparse.lil_matrix((10, 10))
# Component 1
G[0,1] = G[1,0] = 1
G[1,2] = G[2,1] = 1
# Component 2
G[3,4] = G[4,3] = 1
G[4,5] = G[5,4] = 1
G[5,6] = G[6,5] = 1
# Component 3
G[7,8] = G[8,7] = 1
G[8,9] = G[9,8] = 1

G = G.tocsr()

n_components, labels = csgraph.connected_components(G, directed=False)

print(f'Number of components: {n_components}\n')
for comp in range(n_components):
    nodes = np.where(labels == comp)[0]
    print(f'Component {comp+1}: nodes {list(nodes)}')

print('\nApplications: Community detection, network partitioning')

## Sparse SVD

**Purpose**: Dimensionality reduction for large sparse matrices
**Use**: Text mining, recommender systems

In [None]:
print('\nSparse SVD\n')

# Term-document matrix (sparse)
n_terms = 1000
n_docs = 500
tf_idf = sparse.random(n_terms, n_docs, density=0.01, format='csr')

print(f'Term-document matrix: {n_terms}×{n_docs}')
print(f'Non-zeros: {tf_idf.nnz:,}')
print(f'Density: {tf_idf.nnz/(n_terms*n_docs)*100:.2f}%\n')

# Compute top k singular values
k = 10
u, s, vt = splinalg.svds(tf_idf, k=k)

print(f'Computed {k} singular values:')
for i, sv in enumerate(s[::-1]):
    print(f'  σ_{i+1} = {sv:.4f}')

print(f'\nReduced representation: {n_docs}×{k}')
print('Applications: Latent Semantic Analysis, topic modeling')

## Real Example: Social Network Analysis

**Problem**: Find influential users in social network
**Method**: Centrality measures

In [None]:
print('\nSocial Network Analysis\n')

np.random.seed(42)
n_users = 100

# Random social network
follows = sparse.random(n_users, n_users, density=0.05, format='lil')
follows.setdiag(0)
follows = follows.tocsr()

print(f'Social network: {n_users} users')
print(f'Connections: {follows.nnz}\n')

# Degree centrality (number of connections)
in_degree = np.array(follows.sum(axis=0)).flatten()
out_degree = np.array(follows.sum(axis=1)).flatten()
total_degree = in_degree + out_degree

top_users = np.argsort(total_degree)[::-1][:5]

print('Top 5 most connected users:')
for i, user in enumerate(top_users):
    print(f'  {i+1}. User {user}: {int(total_degree[user])} connections')
    print(f'      (followers: {int(in_degree[user])}, following: {int(out_degree[user])})')

print('\nMetrics computed efficiently on sparse graph!')

## Summary

### Sparse Eigensolvers:
```python
# Symmetric
eigenvalues, eigenvectors = splinalg.eigsh(A, k=10, which='LA')

# General
eigenvalues, eigenvectors = splinalg.eigs(A, k=10, which='LM')
```

### Graph Algorithms:
```python
# Shortest paths
dist = csgraph.shortest_path(G, method='D')

# Connected components
n_comp, labels = csgraph.connected_components(G)

# Minimum spanning tree
mst = csgraph.minimum_spanning_tree(G)
```

### Sparse SVD:
```python
u, s, vt = splinalg.svds(A, k=10)
```

### Applications:
- **PageRank**: Web page ranking
- **Social networks**: Influence analysis
- **Recommender systems**: Collaborative filtering
- **Text mining**: Topic modeling (LSA)
- **Network optimization**: Shortest paths, MST