In [1]:
!nvidia-smi

Fri Nov 28 18:12:15 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.65.06              Driver Version: 580.65.06      CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A40                     On  |   00000000:17:00.0 Off |                    0 |
|  0%   32C    P8             21W /  300W |       0MiB /  46068MiB |      0%   E. Process |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA A40                     On  |   00

In [1]:
!echo $CUDA_VISIBLE_DEVICES

0,1


### Training SimCLR using ResNet backbone

In [1]:
from simclr import SimCLR
import yaml
from data_aug.dataset_wrapper import DataSetWrapper
import os, glob
import pandas as pd
import argparse
import sys
import gc

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

def main():
    # Filter out ALL Jupyter kernel arguments
    sys.argv = [sys.argv[0]]  # Keep only the script name
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--magnification', type=str, default='20x')
    args = parser.parse_args()
    config = yaml.load(open("config.yaml", "r"), Loader=yaml.FullLoader)
    
    # Override GPU settings for single GPU
    config['n_gpu'] = 2
    config['gpu_ids'] = "[0,1]"  # When you set CUDA_VISIBLE_DEVICES='2,3', they become 0 and 1
   
    dataset = DataSetWrapper(config['batch_size'], **config['dataset'])
    simclr = SimCLR(dataset, config)
    simclr.train()
    

if __name__ == "__main__":
    main()

Please install apex for mixed precision training from: https://github.com/NVIDIA/apex
Running on: cuda




Feature extractor: resnet18
✓ No SimCLR checkpoint found. Using ImageNet pretrained ResNet backbone.

TRAINING CONFIGURATION
Total epochs:        5
Steps per epoch:     860
Total steps:         4300
Batch size:          128
Log every:           100 steps


EPOCH [1/5]
[Epoch 1/5] Step 0/4300 (0.0%) | Loss: 5.410 | ETA: 517.6 min
[Epoch 1/5] Step 100/4300 (2.3%) | Loss: 3.730 | ETA: 49.1 min
[Epoch 1/5] Step 200/4300 (4.7%) | Loss: 3.670 | ETA: 46.1 min
[Epoch 1/5] Step 300/4300 (7.0%) | Loss: 3.686 | ETA: 44.2 min
[Epoch 1/5] Step 400/4300 (9.3%) | Loss: 3.648 | ETA: 42.7 min
[Epoch 1/5] Step 500/4300 (11.6%) | Loss: 3.667 | ETA: 41.4 min
[Epoch 1/5] Step 600/4300 (14.0%) | Loss: 3.646 | ETA: 40.1 min
[Epoch 1/5] Step 700/4300 (16.3%) | Loss: 3.693 | ETA: 38.9 min
[Epoch 1/5] Step 800/4300 (18.6%) | Loss: 3.656 | ETA: 37.8 min

Epoch 1 completed in 9.28 minutes
[1/5] Validation Loss: 3.663
✓ Model saved (best validation loss)

EPOCH [2/5]


  _warn_get_lr_called_within_step(self)


[Epoch 2/5] Step 900/4300 (20.9%) | Loss: 3.635 | ETA: 40.9 min
[Epoch 2/5] Step 1000/4300 (23.3%) | Loss: 3.630 | ETA: 39.3 min
[Epoch 2/5] Step 1100/4300 (25.6%) | Loss: 3.675 | ETA: 37.7 min
[Epoch 2/5] Step 1200/4300 (27.9%) | Loss: 3.671 | ETA: 36.3 min
[Epoch 2/5] Step 1300/4300 (30.2%) | Loss: 3.634 | ETA: 34.9 min
[Epoch 2/5] Step 1400/4300 (32.6%) | Loss: 3.651 | ETA: 33.5 min
[Epoch 2/5] Step 1500/4300 (34.9%) | Loss: 3.641 | ETA: 32.2 min
[Epoch 2/5] Step 1600/4300 (37.2%) | Loss: 3.651 | ETA: 30.9 min
[Epoch 2/5] Step 1700/4300 (39.5%) | Loss: 3.628 | ETA: 29.6 min

Epoch 2 completed in 9.27 minutes

EPOCH [3/5]
[Epoch 3/5] Step 1800/4300 (41.9%) | Loss: 3.684 | ETA: 28.5 min
[Epoch 3/5] Step 1900/4300 (44.2%) | Loss: 3.619 | ETA: 27.3 min
[Epoch 3/5] Step 2000/4300 (46.5%) | Loss: 3.628 | ETA: 26.1 min
[Epoch 3/5] Step 2100/4300 (48.8%) | Loss: 3.642 | ETA: 25.0 min
[Epoch 3/5] Step 2200/4300 (51.2%) | Loss: 3.619 | ETA: 23.8 min
[Epoch 3/5] Step 2300/4300 (53.5%) | Loss: 

Test Model with weights using 10 random patches

In [6]:
#!/usr/bin/env python3
"""
Test SimCLR trained model - extract features from sample patches
"""
import torch
import torch.nn.functional as F
from models.resnet_simclr import ResNetSimCLR
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
import os
from glob import glob

# ============================================================
# CONFIG
# ============================================================
CHECKPOINT_PATH = "runs/Nov29_15-00-34_scc-214/checkpoints/model.pth"
TILES_DIR = "/projectnb/ec500kb/projects/Project_1_Team_1/PANDA_DATA_MANNY/tiles_03"
NUM_TEST_PATCHES = 10  # Test on 10 random patches

# ============================================================
# LOAD MODEL
# ============================================================
def load_trained_model(checkpoint_path):
    """Load trained SimCLR model."""
    print("Loading trained SimCLR model...")
    
    # Initialize model (same as training)
    model = ResNetSimCLR(base_model="resnet18", out_dim=512)
    
    # Load trained weights
    state_dict = torch.load(checkpoint_path, map_location='cuda')
    
    # FIX: Remove 'module.' prefix from DataParallel
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:] if k.startswith('module.') else k  # remove 'module.' prefix
        new_state_dict[name] = v
    
    model.load_state_dict(new_state_dict)
    
    # Set to evaluation mode
    model.eval()
    model = model.cuda()
    
    print(f"✓ Model loaded from {checkpoint_path}")
    return model

# ============================================================
# FEATURE EXTRACTION
# ============================================================
def extract_features(model, image_path):
    """Extract 512-dim features from a patch."""
    
    # Load and preprocess image
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    
    img = Image.open(image_path).convert('RGB')
    img_tensor = transform(img).unsqueeze(0).cuda()  # Add batch dimension
    
    # Extract features (no gradients needed)
    with torch.no_grad():
        h, z = model(img_tensor)  # h = features, z = projections
    
    return h.cpu().numpy(), z.cpu().numpy()


# ============================================================
# TEST
# ============================================================
def test_model():
    """Test the trained model on sample patches."""
    
    print("="*70)
    print("SIMCLR MODEL TEST")
    print("="*70)
    
    # Load model
    model = load_trained_model(CHECKPOINT_PATH)
    
    # Get sample patches
    print(f"\nFinding test patches in {TILES_DIR}...")
    all_patches = []
    for wsi_dir in os.listdir(TILES_DIR):
        wsi_path = os.path.join(TILES_DIR, wsi_dir)
        if os.path.isdir(wsi_path):
            patches = glob(os.path.join(wsi_path, "*.jpeg"))
            all_patches.extend(patches)
    
    # Sample random patches
    np.random.shuffle(all_patches)
    test_patches = all_patches[:NUM_TEST_PATCHES]
    
    print(f"✓ Found {len(all_patches)} total patches")
    print(f"✓ Testing on {len(test_patches)} random patches")
    print()
    
    # Extract features
    print("Extracting features...")
    print("-"*70)
    
    all_features = []
    all_projections = []
    
    for i, patch_path in enumerate(test_patches):
        patch_name = os.path.basename(patch_path)
        
        # Extract
        features, projections = extract_features(model, patch_path)
        all_features.append(features)
        all_projections.append(projections)
        
        # Print stats
        print(f"[{i+1}/{len(test_patches)}] {patch_name}")
        print(f"  Features shape:    {features.shape}")
        print(f"  Projections shape: {projections.shape}")
        print(f"  Feature norm:      {np.linalg.norm(features):.3f}")
        print(f"  Feature mean:      {features.mean():.3f} ± {features.std():.3f}")
        print()
    
    # Convert to arrays
    all_features = np.vstack(all_features)
    all_projections = np.vstack(all_projections)
    
    # Analysis
    print("="*70)
    print("FEATURE ANALYSIS")
    print("="*70)
    print(f"Features shape:    {all_features.shape}")
    print(f"Projections shape: {all_projections.shape}")
    print()
    
    print("Feature statistics:")
    print(f"  Mean:     {all_features.mean():.4f}")
    print(f"  Std:      {all_features.std():.4f}")
    print(f"  Min:      {all_features.min():.4f}")
    print(f"  Max:      {all_features.max():.4f}")
    print()
    
    # Test similarity (patches should have diverse features)
    print("Feature diversity check:")
    from scipy.spatial.distance import cosine
    
    similarities = []
    for i in range(len(all_features)):
        for j in range(i+1, len(all_features)):
            sim = 1 - cosine(all_features[i], all_features[j])
            similarities.append(sim)
    
    similarities = np.array(similarities)
    print(f"  Pairwise cosine similarity:")
    print(f"    Mean: {similarities.mean():.3f}")
    print(f"    Std:  {similarities.std():.3f}")
    print(f"    Min:  {similarities.min():.3f}")
    print(f"    Max:  {similarities.max():.3f}")
    print()
    
    # Interpretation
    print("="*70)
    print("INTERPRETATION")
    print("="*70)
    
    if similarities.mean() < 0.5:
        print("✓ GOOD: Features are diverse (low similarity)")
        print("  → Model learned to distinguish different patches")
    elif similarities.mean() < 0.8:
        print("✓ OK: Features have moderate diversity")
        print("  → Model provides useful representations")
    else:
        print("⚠ WARNING: Features are very similar")
        print("  → Model may not have learned well")
    
    print()
    print("Model is ready to use for feature extraction! ✓")
    print("="*70)


# ============================================================
# MAIN
# ============================================================
if __name__ == "__main__":
    test_model()


SIMCLR MODEL TEST
Loading trained SimCLR model...
Feature extractor: resnet18
✓ Model loaded from runs/Nov29_15-00-34_scc-214/checkpoints/model.pth

Finding test patches in /projectnb/ec500kb/projects/Project_1_Team_1/PANDA_DATA_MANNY/tiles_03...
✓ Found 120115 total patches
✓ Testing on 10 random patches

Extracting features...
----------------------------------------------------------------------
[1/10] 26_13.jpeg
  Features shape:    (512,)
  Projections shape: (512,)
  Feature norm:      23.271
  Feature mean:      0.519 ± 0.888

[2/10] 6_8.jpeg
  Features shape:    (512,)
  Projections shape: (512,)
  Feature norm:      21.493
  Feature mean:      0.573 ± 0.757

[3/10] 7_4.jpeg
  Features shape:    (512,)
  Projections shape: (512,)
  Feature norm:      24.652
  Feature mean:      0.738 ± 0.802

[4/10] 6_10.jpeg
  Features shape:    (512,)
  Projections shape: (512,)
  Feature norm:      21.034
  Feature mean:      0.545 ± 0.753

[5/10] 21_16.jpeg
  Features shape:    (512,)
  Pro