# Cover Parameters Tuning
This notebook runs a parameter sweep over TDA graph cover parameters (`perc_overlap` and `n_cubes`) across different feature sets and filter functions, evaluating each configuration using Louvain community detection modularity scores.

## 1. Imports & Setup

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

from tda.tda_graph import TDAGraph
from tuning.heatmap_creation import Heatmap
from tuning.tuning_utils import *
from community_detection.community import *
from sklearn.preprocessing import StandardScaler

# Set random seed for reproducibility
seed_value = 42

## 2. Configuration
Edit the paths and parameters below before running the notebook.

In [None]:
# ── File paths ──────────────────────────────────────────────────────────────
FMRI_PATH      = "path/to/fmri.csv"
ENV_PATH       = "path/to/env.csv"
DEMO_PATH      = "path/to/demo_vars.csv"
OUTPUT_DIR     = "path/to/output"

# ── Clustering / distance ────────────────────────────────────────────────────
METRIC         = "chebyshev"
CLUSTERER      = "dbscan"
MIN_SAMPLES    = 3
EPS            = 8.0

# ── Bootstrap ────────────────────────────────────────────────────────────────
N_BOOTSTRAP    = 10
BOOTSTRAP_RATIO = 0.7

# ── Parameter grid ───────────────────────────────────────────────────────────
perc_overlap = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
n_cubes      = [10, 20, 30, 40, 50, 60]

# ── Experiment dimensions ────────────────────────────────────────────────────
feature_sets = ['all', 'fmri', 'env']
filter_funcs = ['umap', 'pca', 'tsne', 'isomap']

# Toggle demographic variables
demographic  = True

## 3. Load Data

In [None]:
fmri_df  = pd.read_csv(FMRI_PATH)
env_df   = pd.read_csv(ENV_PATH)
demo_df  = pd.read_csv(DEMO_PATH)

new_demo_df = demo_df[['SEX', 'AGE', 'EDU']]

print(f"fMRI shape   : {fmri_df.shape}")
print(f"Env shape    : {env_df.shape}")
print(f"Demo shape   : {new_demo_df.shape}")

## 4. Helper — Build Feature Matrix
Assembles the right columns depending on `feature` and `demographic` settings, then applies `StandardScaler`.

In [None]:
def build_feature_matrix(feature: str, demographic: bool) -> pd.DataFrame:
    """Concatenate the requested data blocks and return a scaled DataFrame."""
    if demographic:
        blocks = {
            'all':  [fmri_df, env_df, new_demo_df],
            'fmri': [fmri_df, new_demo_df],
            'env':  [env_df,  new_demo_df],
        }
    else:
        blocks = {
            'all':  [fmri_df, env_df],
            'fmri': [fmri_df],
            'env':  [env_df],
        }

    data = pd.concat(blocks[feature], axis=1)
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data)
    return pd.DataFrame(data_scaled, columns=data.columns)

## 5. Parameter Tuning Loop

In [None]:
perc_overlap_labels = [str(v) for v in perc_overlap]
n_cubes_labels      = [str(v) for v in n_cubes]

for filter_ in filter_funcs:
    for feature in feature_sets:
        print(f"\n{'='*60}")
        print(f"Filter: {filter_}  |  Feature set: {feature}")
        print(f"{'='*60}")

        # ── Prepare data ────────────────────────────────────────────
        data_scaled = build_feature_matrix(feature, demographic)

        # ── Bootstrap samples ────────────────────────────────────────
        bootstrap_samples = get_bootstrap_samples(
            df=data_scaled,
            n_bootstrap=N_BOOTSTRAP,
            ratio=BOOTSTRAP_RATIO,
            seed_value=seed_value
        )

        # ── Result matrices ──────────────────────────────────────────
        results_snr = np.zeros((len(perc_overlap), len(n_cubes)))
        results_avg = np.zeros((len(perc_overlap), len(n_cubes)))
        results_std = np.zeros((len(perc_overlap), len(n_cubes)))

        # ── Grid search ──────────────────────────────────────────────
        for i, resolution in enumerate(perc_overlap):
            for j, gain in enumerate(n_cubes):
                print(f"  overlap={resolution}, n_cubes={gain}", end="  →  ")

                modularity_list = []
                for k in range(len(bootstrap_samples)):
                    tdagraph = TDAGraph(
                        X=bootstrap_samples[k],
                        filter_func=filter_,
                        n_cubes=gain,
                        perc_overlap=resolution,
                        cluster_algorithm=CLUSTERER,
                        min_samples=MIN_SAMPLES,
                        metric=METRIC,
                        eps=EPS,
                    )
                    graph = tdagraph.create_graph()

                    community = Community(
                        graph[0],
                        algorithm='louvain',
                        seed_value=seed_value,
                        dataset=pd.DataFrame(bootstrap_samples[k])
                    )
                    community.community_detection(k=10)
                    modularity_list.append(community.compute_modularity())

                avg_mod = np.mean(modularity_list)
                std_mod = np.std(modularity_list)
                snr_mod = 0 if avg_mod <= 0.3 else avg_mod / std_mod

                results_snr[i, j] = snr_mod
                results_avg[i, j] = avg_mod
                results_std[i, j] = std_mod

                print(f"avg={avg_mod:.4f}  std={std_mod:.4f}  SNR={snr_mod:.4f}")

        # ── Save heatmaps ────────────────────────────────────────────
        tag = f"{filter_}_{METRIC}_{feature}"

        for matrix, label in [
            (results_snr, "snr_composite_score"),
            (results_avg, "avg_modularity_score"),
            (results_std, "std_modularity_score"),
        ]:
            heatmap = Heatmap(
                matrix=matrix,
                cmap='viridis',
                perc_overlap_values=perc_overlap_labels,
                n_cubes_values=n_cubes_labels,
            )
            heatmap.save(output_path=f"{OUTPUT_DIR}/{label}_heatmap_{tag}.png")
            print(f"  Saved: {label}_heatmap_{tag}.png")

print("\nDone!")