# Sigma Optimization for Kernel PCA
This notebook tests a range of sigma values for kernel PCA.
The reconstruction error is computed by inverting the transform and comparing to the original data.
Use the plot and printed outputs to choose the sigma that minimizes the error.
Record that sigma in your configuration file.

### Loading packages and data, and defining sigma optimization function

In [None]:
import yaml
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import KernelPCA
import utils.pca as pca
import utils.data_ingestion as di

def load_config(config_file="config.yaml"):
    with open(config_file, "r") as f:
        config = yaml.safe_load(f)
    return config

config = load_config()
twix_file = config["data"]["twix_file"]

# Read TWIX file and extract k-space data
scans = di.read_twix_file(twix_file, include_scans=[-1], parse_pmu=False)
kspace = di.extract_image_data(scans)
n_phase_encodes_per_frame = kspace.shape[0] // config["data"]["n_frames"]

In [None]:
def optimize_sigma_kpca(kspace, n_phase_encodes_per_frame, sigma_candidates, n_components=None, kernel="rbf"):
    """
    Optimize the sigma parameter for KernelPCA by minimizing the reconstruction error.
    The k-space data is first converted to a real representation (concatenating real and imaginary parts).
    
    Parameters:
      kspace (np.ndarray): Complex k-space data (n_phase x coils x freq_encodes).
      n_phase_encodes_per_frame (int): Number of phase encodes per frame.
      sigma_candidates (list): Candidate sigma values.
      n_components (int or None): Number of kernel PCA components.
      kernel (str): Kernel type.
      
    Returns:
      best_sigma (float): Sigma with lowest relative reconstruction error.
      best_error (float): The corresponding error.
      best_kpca (KernelPCA): The fitted model.
      best_X_kpca (np.ndarray): Transformed data using best sigma.
      orig_feature_dim (int): Original feature dimension.
    """
    n_phase, n_coils, n_freq = kspace.shape
    n_frames = n_phase // n_phase_encodes_per_frame
    X = kspace.reshape(n_frames, -1)
    orig_feature_dim = X.shape[1]
    X_real = np.hstack((np.real(X), np.imag(X)))
    
    best_sigma = None
    best_error = np.inf
    best_kpca = None
    best_X_kpca = None
    errors = []
    
    for sigma in sigma_candidates:
        gamma = 1.0 / (2 * sigma**2)
        kpca = KernelPCA(n_components=n_components, kernel=kernel, gamma=gamma, fit_inverse_transform=True)
        X_kpca = kpca.fit_transform(X_real)
        X_recon_real = kpca.inverse_transform(X_kpca)
        error = np.linalg.norm(X_real - X_recon_real) / np.linalg.norm(X_real)
        errors.append(error)
        print(f"Sigma: {sigma:.4f}, Reconstruction Error: {error:.4f}")
        if error < best_error:
            best_error = error
            best_sigma = sigma
            best_kpca = kpca
            best_X_kpca = X_kpca
    
    return best_sigma, best_error, best_kpca, best_X_kpca, orig_feature_dim, errors

### Optimize sigma

In [None]:
# Define candidate sigma values (you can adjust the range)
sigma_candidates = np.logspace(-1.2, -0.5, 20)

# Run optimization
best_sigma, best_error, best_kpca, best_X_kpca, orig_feature_dim, errors = optimize_sigma_kpca(
    kspace, n_phase_encodes_per_frame, sigma_candidates
)

print(f"Best Sigma: {best_sigma:.4f} with error {best_error:.4f}")

In [None]:
# Plot the reconstruction error vs sigma
plt.figure(figsize=(8,5))
plt.semilogx(sigma_candidates, errors, marker='o')
plt.xlabel("Sigma")
plt.ylabel("Relative Reconstruction Error")
plt.title("Sigma Optimization for Kernel PCA")
plt.grid(True)
plt.show()