# Brain Tumor MRI Classification: Validation + XAI (Notebook)

This Colab runs the released pipeline end-to-end using the embedded demo dataset (10 images per class) stored in this repo under `data/images`.

**Pipeline:**
- Create stratified 64/16/20 splits
- Extract features (MobileNetV2 + EfficientNetV2B0, GAP+concat)
- Train KNN (k=5, Euclidean, distance weights)
- Evaluate (confusion matrix, class metrics)
- Display results

**No Google Drive or Kaggle required by default.**


### 1. Idempotent repo setup (clone or update)

In [None]:
%%bash
set -e
cd /content

REPO_DIR="brain-tumor-hybrid-fusion-knn"

if [ -d "$REPO_DIR/.git" ]; then
  echo "Repo already cloned â€“ pulling latest main..."
  cd "$REPO_DIR"
  git fetch origin
  git reset --hard origin/main
  git clean -fd
else
  echo "Cloning fresh repo..."
  git clone https://github.com/mainajajere/brain-tumor-hybrid-fusion-knn.git "$REPO_DIR"
  cd "$REPO_DIR"
fi

echo "Repo ready at $(pwd)"

### 2. Install dependencies + GPU check

In [None]:
!pip install -q tensorflow==2.17.0 scikit-learn==1.4.2 matplotlib==3.8.4 seaborn==0.13.2
!pip install -q opencv-python-headless==4.9.0.80 Pillow==10.3.0 numpy==1.26.4 pandas==2.1.4
!pip install -q pyyaml==6.0.1 tqdm==4.66.4 shap==0.46.0

import os, sys, subprocess, pathlib

REPO = pathlib.Path('/content/brain-tumor-hybrid-fusion-knn')
os.chdir(REPO)
sys.path.insert(0, str(REPO))

os.makedirs(REPO/'outputs', exist_ok=True)
os.makedirs(REPO/'results', exist_ok=True)

print('Repo ready at', REPO)

result = subprocess.run(["bash", "-lc", "nvidia-smi -L"], capture_output=True, text=True)
if result.returncode == 0:
    print(result.stdout.strip())
    print('GPU available')
else:
    print('No GPU detected (CPU mode)')

### 3. Verify dataset

In [None]:
import os

DATA_ROOT = '/content/brain-tumor-hybrid-fusion-knn/data/images'
CLASSES = ['glioma','meningioma','pituitary','notumor']

print('Dataset root:', DATA_ROOT)
for c in CLASSES:
    p = os.path.join(DATA_ROOT, c)
    n = len(os.listdir(p)) if os.path.isdir(p) else 0
    print(f'{c}: {n} images' if n > 0 else f'{c}: MISSING')

if all(os.path.isdir(os.path.join(DATA_ROOT,c)) and len(os.listdir(os.path.join(DATA_ROOT,c)))>0 for c in CLASSES):
    print('Dataset verified')
else:
    raise SystemExit('Dataset missing')

### 4. Write config + set working directory

In [None]:
import yaml
import os

cfg = {
    'data': {'root_dir': DATA_ROOT, 'classes': CLASSES, 'image_size': [224, 224], 'seed': 42, 'split': {'test': 0.20, 'val_from_train': 0.20}},
    'augment': {'rotation': 0.055, 'zoom': 0.10, 'translate': 0.10, 'hflip': True, 'contrast': 0.15},
    'train': {'batch_size': 32, 'epochs': 50, 'optimizer': 'adam', 'lr': 0.001, 'dropout': 0.5},
    'fusion': {'type': 'late', 'pooling': 'gap', 'concat': True},
    'knn': {'n_neighbors': 5, 'metric': 'euclidean', 'weights': 'distance'},
    'cv': {'n_folds': 5, 'stratify': True},
    'xai': {'shap_background_per_class': 25}
}

os.makedirs('configs', exist_ok=True)
with open('configs/config.yaml','w') as f:
    yaml.safe_dump(cfg, f, sort_keys=False)
print('Config written: configs/config.yaml')

os.chdir('/content/brain-tumor-hybrid-fusion-knn')
print('Working directory:', os.getcwd())

### 4.5 Make scripts a package

In [None]:
# Make 'scripts' importable as a package
open('scripts/__init__.py', 'a').close()
print("scripts/__init__.py created")

### 5. Verify scripts + run split check

In [None]:
import os
print("=== Scripts ===")
for s in ['check_split_counts.py', 'run_full_pipeline.py']:
    p = f'scripts/{s}'
    print(f'{p} - exists ({os.path.getsize(p)} bytes)' if os.path.exists(p) else f'{p} - MISSING')

print("\n=== Split check ===")
os.chdir('/content/brain-tumor-hybrid-fusion-knn')
!PYTHONPATH=/content/brain-tumor-hybrid-fusion-knn python -m scripts.check_split_counts --config configs/config.yaml

### 6. Run full pipeline

In [None]:
print("=== Full pipeline ===")
os.chdir('/content/brain-tumor-hybrid-fusion-knn')
!PYTHONPATH=/content/brain-tumor-hybrid-fusion-knn python -m scripts.run_full_pipeline --config configs/config.yaml

### 7. Show results

In [None]:
from IPython.display import Image, display
import os

print("=== Outputs ===")
for p in ['outputs/figures/confusion_matrix.png', 'outputs/figures/class_metrics.png', 'outputs/results/summary.txt']:
    print(f'\n{p}')
    if p.endswith('.png') and os.path.exists(p):
        display(Image(filename=p))
    elif os.path.exists(p):
        print(open(p).read())
    else:
        print('Not generated')

if os.path.exists('outputs'):
    print(f'\nFiles in outputs/:')
    !find outputs -type f | head -15