In [1]:
!pip install -q torch-fidelity

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m125.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m93.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m41.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# --- [Cell 1 - Final Version] ---
# Essential Imports
import os
import sys
import time
import datetime
import pandas as pd
import torch
from IPython.display import display, Markdown

# --- 1. Define Project Paths ---
# This makes managing paths clean and easy.
# Ensure your Google Drive is mounted at /content/drive
PROJECT_ROOT = '/content/drive/MyDrive/archgan'
SRC_PATH = os.path.join(PROJECT_ROOT, 'src')
CHECKPOINT_PATH = os.path.join(PROJECT_ROOT, 'checkpoints')
RESULTS_FILE = os.path.join(PROJECT_ROOT, 'archgan_results.csv')

# --- MODIFIED PART ---
# The ZIP file is now our primary data source from Drive
ZIP_FILE_PATH = os.path.join(PROJECT_ROOT, 'exteriors_128.zip') #<-- Assumes this is the name

print("--- ✅ Defined Project Paths ---")
print(f"Root:         {PROJECT_ROOT}")
print(f"Source code:  {SRC_PATH}")
print(f"Checkpoints:  {CHECKPOINT_PATH}")
print(f"Results CSV:  {RESULTS_FILE}")
print(f"Dataset ZIP:  {ZIP_FILE_PATH}") #<-- Updated print statement

# --- 2. Add 'src' Directory to Python Path ---
if SRC_PATH not in sys.path:
    sys.path.append(SRC_PATH)
    print(f"\n--- ✅ Added '{SRC_PATH}' to system path ---")

# --- 3. Run Verification Checks ---
print("\n--- ⚙️ Running Pre-flight Checks ---")
all_checks_passed = True

# Check 1: Can we import the model files?
try:
    from models_dcgan import Generator, Discriminator, weights_init
    print("✅ [PASS] Successfully imported DC-GAN models from src/.")
except ImportError as e:
    print(f"❌ [FAIL] Could not import from 'src/models_dcgan.py'. Error: {e}")
    all_checks_passed = False

# --- MODIFIED CHECK ---
# Check 2: Does the source ZIP file exist?
if os.path.isfile(ZIP_FILE_PATH):
    print(f"✅ [PASS] Dataset zip file found at '{ZIP_FILE_PATH}'.")
else:
    print(f"❌ [FAIL] Dataset zip file not found at '{ZIP_FILE_PATH}'. Please check the name/location.")
    all_checks_passed = False

# Check 3: Does the checkpoint directory exist? If not, create it.
if not os.path.isdir(CHECKPOINT_PATH):
    print(f"🟡 [INFO] Checkpoint directory not found. Creating it now at '{CHECKPOINT_PATH}'.")
    os.makedirs(CHECKPOINT_PATH)
else:
    print("✅ [PASS] Checkpoint directory already exists.")

# Check 4: Does the results CSV exist? If not, create it with headers.
try:
    if not os.path.isfile(RESULTS_FILE):
        print(f"🟡 [INFO] Results CSV not found. Creating '{RESULTS_FILE}' with headers.")
        headers = [
            'timestamp', 'model', 'run_id', 'stage', 'epoch',
            'latent_dim', 'lr', 'beta1', 'batch_size',
            'FID', 'IS', 'precision', 'recall', 'gpu_minutes'
        ]
        pd.DataFrame(columns=headers).to_csv(RESULTS_FILE, index=False)
    else:
        print("✅ [PASS] Results CSV already exists.")
except Exception as e:
    print(f"❌ [FAIL] Could not create or access the results CSV. Error: {e}")
    all_checks_passed = False

# Final Check Summary
print("-" * 40)
if all_checks_passed:
    display(Markdown("### ✅ All checks passed! You are ready for Data Prep."))
else:
    display(Markdown("### ❌ One or more checks failed. Please resolve the issues above before continuing."))

--- ✅ Defined Project Paths ---
Root:         /content/drive/MyDrive/archgan
Source code:  /content/drive/MyDrive/archgan/src
Checkpoints:  /content/drive/MyDrive/archgan/checkpoints
Results CSV:  /content/drive/MyDrive/archgan/archgan_results.csv
Dataset ZIP:  /content/drive/MyDrive/archgan/exteriors_128.zip

--- ✅ Added '/content/drive/MyDrive/archgan/src' to system path ---

--- ⚙️ Running Pre-flight Checks ---
✅ [PASS] Successfully imported DC-GAN models from src/.
✅ [PASS] Dataset zip file found at '/content/drive/MyDrive/archgan/exteriors_128.zip'.
✅ [PASS] Checkpoint directory already exists.
✅ [PASS] Results CSV already exists.
----------------------------------------


### ✅ All checks passed! You are ready for Data Prep.

In [3]:
# --- MASTER DATA PREPARATION SCRIPT (v3 - Final with Cleanup) ---

import os
import shutil
import zipfile
import re
from tqdm.notebook import tqdm
import time

print("--- Starting Final, Robust Data Preparation ---")

# --- Step 1: Define All Paths ---
zip_file_path = '/content/drive/MyDrive/archgan/exteriors_128.zip' #<-- EDIT THIS IF NEEDED
local_temp_unzip_path = '/tmp/unzipped_data'
local_final_data_path = '/tmp/exteriors_128'

# --- Step 2: Clean Up Local Directories from Previous Runs ---
print("\n--- Step 2: Cleaning up local directories... ---")
if os.path.exists(local_temp_unzip_path): shutil.rmtree(local_temp_unzip_path)
if os.path.exists(local_final_data_path): shutil.rmtree(local_final_data_path)
os.makedirs(local_final_data_path, exist_ok=True)
print("✅ Local cleanup complete.")

# --- Step 3: Manually Unzip with Filename Sanitization ---
print(f"\n--- Step 3: Unzipping '{os.path.basename(zip_file_path)}' to local disk... ---")
def sanitize_filename(filename):
    try:
        decoded_name = filename.encode('cp437').decode('utf-8')
    except (UnicodeEncodeError, UnicodeDecodeError):
        decoded_name = filename
    sanitized = re.sub(r'[^A-Za-z0-9._-]', '', decoded_name)
    return sanitized
try:
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        for member in tqdm(zip_ref.infolist(), desc="Extracting files"):
            if member.is_dir(): continue
            original_filename = os.path.basename(member.filename)
            sanitized = sanitize_filename(original_filename)
            target_path = os.path.join(local_temp_unzip_path, 'exteriors_128', sanitized)
            os.makedirs(os.path.dirname(target_path), exist_ok=True)
            with zip_ref.open(member) as source, open(target_path, "wb") as target:
                shutil.copyfileobj(source, target)
    print("✅ Unzip to local disk complete.")
except Exception as e:
    print(f"❌ FATAL ERROR during unzipping: {e}"); raise

# --- Step 4: Restructure Files Locally ---
print("\n--- Step 4: Restructuring files locally... ---")
unzipped_source_dir = os.path.join(local_temp_unzip_path, 'exteriors_128')
local_target_dir = os.path.join(local_final_data_path, 'all_images')
os.makedirs(local_target_dir, exist_ok=True)
files_to_move = [f for f in os.listdir(unzipped_source_dir) if os.path.isfile(os.path.join(unzipped_source_dir, f))]
for filename in files_to_move:
    shutil.move(os.path.join(unzipped_source_dir, filename), os.path.join(local_target_dir, filename))
print("✅ Local restructuring complete.")

# --- Step 5 (NEW): Clean Junk Metadata Files from Local Copy ---
print("\n--- Step 5: Cleaning junk '._' files from the local dataset... ---")
files_in_dir = os.listdir(local_target_dir)
junk_files_found = [f for f in files_in_dir if f.startswith('._')]
if junk_files_found:
    print(f"Found and deleting {len(junk_files_found)} junk files...")
    for junk in junk_files_found:
        os.remove(os.path.join(local_target_dir, junk))
    print("✅ Junk files deleted.")
else:
    print("✅ No junk files found to delete.")

# --- Step 6: Final Verification ---
print("\n--- Step 6: Verifying the final LOCAL dataset ---")
final_image_count = len(os.listdir(local_target_dir))
print(f"   - Final image count in '{local_target_dir}' is: {final_image_count}")
if final_image_count > 17000:
    print("\n✅✅✅ DATA PREPARATION IS COMPLETE AND CORRECT. ✅✅✅")
else:
    print("\n❌ WARNING: The file count seems low after preparation.")

--- Starting Final, Robust Data Preparation ---

--- Step 2: Cleaning up local directories... ---
✅ Local cleanup complete.

--- Step 3: Unzipping 'exteriors_128.zip' to local disk... ---


Extracting files:   0%|          | 0/35681 [00:00<?, ?it/s]

✅ Unzip to local disk complete.

--- Step 4: Restructuring files locally... ---
✅ Local restructuring complete.

--- Step 5: Cleaning junk '._' files from the local dataset... ---
Found and deleting 17825 junk files...
✅ Junk files deleted.

--- Step 6: Verifying the final LOCAL dataset ---
   - Final image count in '/tmp/exteriors_128/all_images' is: 17825

✅✅✅ DATA PREPARATION IS COMPLETE AND CORRECT. ✅✅✅


In [4]:
# --- Imports for Training & Evaluation ---
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.utils.data import Subset
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
import time
import os
import shutil

# For metric calculation
import torch_fidelity

In [5]:
# --- 1. Experiment Configuration ---
# All parameters for this run are defined here.
config = {
    "model": "DC-GAN-Deeper",
    "run_id": f"dcgan_stage3_deeper_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}",
    "stage": "3 - Architectural Ablation",

    # Hyper-parameters from the brief for the baseline run
    "latent_dim": 32,
    "lr": 2e-4,
    "beta1": 0.0,

    # Model and Data parameters
    "feature_maps_g": 64,
    "feature_maps_d": 64,
    "num_channels": 3,
    "image_size": 128,

    # Training parameters
    "batch_size": 128,
    "num_epochs": 30,
    "training_split_size": 6000,
    "random_seed": 42, # For reproducible data splits

    # Eval parameters
    "num_eval_samples": 1000,
}

# Announce the run
print("--- 🚀 Starting New Experiment ---")
for key, val in config.items():
    print(f"{key:>20}: {val}")
print("-" * 35)

--- 🚀 Starting New Experiment ---
               model: DC-GAN-Deeper
              run_id: dcgan_stage3_deeper_20250612_2148
               stage: 3 - Architectural Ablation
          latent_dim: 32
                  lr: 0.0002
               beta1: 0.0
      feature_maps_g: 64
      feature_maps_d: 64
        num_channels: 3
          image_size: 128
          batch_size: 128
          num_epochs: 30
 training_split_size: 6000
         random_seed: 42
    num_eval_samples: 1000
-----------------------------------


In [6]:
# --- Data Loading & Pre-processing (Final, Robust Version) ---

# This is the path to the fast, local, and correctly structured dataset
LOCAL_DATA_PATH = '/tmp/exteriors_128'

# --- This is our new check function ---
def is_valid_image(path):
    """
    Checks if a file is a valid image, ignoring macOS metadata files.
    """
    # Check for a valid extension and that the filename does not start with '._'
    return path.lower().endswith(('.png', '.jpg', '.jpeg')) and not os.path.basename(path).startswith('._')

# --- Performance Optimizations ---
torch.backends.cudnn.benchmark = True
if torch.cuda.is_available():
    torch.set_float32_matmul_precision('high')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define transformations
transform = transforms.Compose([
    transforms.Resize(config['image_size']),
    transforms.CenterCrop(config['image_size']),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    transforms.RandomHorizontalFlip(p=0.5),
])

# --- CRITICAL CHANGE HERE ---
# Load the full dataset, telling it to IGNORE invalid files
print(f"Loading dataset from local path: {LOCAL_DATA_PATH}")
full_dataset = dset.ImageFolder(
    root=LOCAL_DATA_PATH,
    transform=transform,
    # This tells the loader to use our function to filter out junk files
    is_valid_file=is_valid_image
)

# Create a reproducible random subset for training
print(f"Creating a reproducible training split of {config['training_split_size']} images...")
np.random.seed(config['random_seed'])
all_indices = np.arange(len(full_dataset))
np.random.shuffle(all_indices)
train_indices = all_indices[:config['training_split_size']]
train_subset = Subset(full_dataset, train_indices)

# Create the DataLoader with optimizations re-enabled
dataloader = torch.utils.data.DataLoader(
    train_subset,
    batch_size=config['batch_size'],
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)
print(f"✅ Data loaded. Training on {len(train_subset)} images.")

Using device: cuda:0
Loading dataset from local path: /tmp/exteriors_128
Creating a reproducible training split of 6000 images...
✅ Data loaded. Training on 6000 images.


In [7]:
# --- 3. Model & Optimizer Initialization (for Stage 3) ---

# Import the NEW, deeper models from your updated .py file
from models_dcgan import Generator_Deeper, Discriminator_Deeper, weights_init

# Instantiate the DEEPER models for this ablation run
netG = Generator_Deeper(
    latent_dim=config["latent_dim"],
    feature_maps_g=config["feature_maps_g"],
    num_channels=config["num_channels"]
).to(device)

netD = Discriminator_Deeper(
    num_channels=config["num_channels"],
    feature_maps_d=config["feature_maps_d"]
).to(device)

# Apply the custom weight initialization
netG.apply(weights_init)
netD.apply(weights_init)

print("✅ DEEPER models initialized successfully.") # Updated print message for clarity

# Setup loss and optimizers (this part does not need to change)
criterion = nn.BCELoss()
optimizerD = optim.Adam(netD.parameters(), lr=config['lr'], betas=(config['beta1'], 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=config['lr'], betas=(config['beta1'], 0.999))

# Fixed noise for visualizing generator progress
fixed_noise = torch.randn(64, config['latent_dim'], 1, 1, device=device)

✅ DEEPER models initialized successfully.


In [8]:
# --- 4. Training Loop ---
print("\n--- ▶️ Starting Training Loop ---")
start_time = time.time()
G_losses = []
D_losses = []

for epoch in range(config['num_epochs']):
    for i, data in enumerate(tqdm(dataloader, desc=f"Epoch {epoch+1}/{config['num_epochs']}")):
        # (1) Update Discriminator
        netD.zero_grad()
        # Move data to device (with non_blocking optimization)
        real_images = data[0].to(device, non_blocking=True)
        b_size = real_images.size(0)

        # Train with all-real batch
        label = torch.full((b_size,), 1.0, dtype=torch.float, device=device)
        output = netD(real_images).view(-1)
        errD_real = criterion(output, label)
        errD_real.backward()

        # Train with all-fake batch
        noise = torch.randn(b_size, config['latent_dim'], 1, 1, device=device)
        fake_images = netG(noise)
        label.fill_(0.0)
        output = netD(fake_images.detach()).view(-1)
        errD_fake = criterion(output, label)
        errD_fake.backward()

        errD = errD_real + errD_fake
        optimizerD.step()

        # (2) Update Generator
        netG.zero_grad()
        label.fill_(1.0) # fake labels are real for generator cost
        output = netD(fake_images).view(-1)
        errG = criterion(output, label)
        errG.backward()
        optimizerG.step()

        # Save losses for plotting
        G_losses.append(errG.item())
        D_losses.append(errD.item())

    print(f"[{epoch+1}/{config['num_epochs']}] Loss_D: {errD.item():.4f} Loss_G: {errG.item():.4f}")

    # Save a checkpoint every 5 epochs and on the last epoch
    if (epoch + 1) % 5 == 0 or (epoch + 1) == config['num_epochs']:
        checkpoint_name = f"{config['model']}_{config['run_id']}_epoch_{epoch+1}.pt"
        checkpoint_save_path = os.path.join(CHECKPOINT_PATH, checkpoint_name)
        torch.save(netG.state_dict(), checkpoint_save_path)
        print(f"--- ✅ Checkpoint saved to '{checkpoint_save_path}' ---")

total_duration_min = (time.time() - start_time) / 60
print(f"\n🎉 Training Finished! Total duration: {total_duration_min:.2f} minutes.")


--- ▶️ Starting Training Loop ---


Epoch 1/30:   0%|          | 0/47 [00:00<?, ?it/s]

[1/30] Loss_D: 0.3996 Loss_G: 6.4180


Epoch 2/30:   0%|          | 0/47 [00:00<?, ?it/s]

[2/30] Loss_D: 2.4704 Loss_G: 1.2641


Epoch 3/30:   0%|          | 0/47 [00:00<?, ?it/s]

[3/30] Loss_D: 1.6408 Loss_G: 4.8158


Epoch 4/30:   0%|          | 0/47 [00:00<?, ?it/s]

[4/30] Loss_D: 1.3460 Loss_G: 0.8831


Epoch 5/30:   0%|          | 0/47 [00:00<?, ?it/s]

[5/30] Loss_D: 1.3732 Loss_G: 2.3371
--- ✅ Checkpoint saved to '/content/drive/MyDrive/archgan/checkpoints/DC-GAN-Deeper_dcgan_stage3_deeper_20250612_2148_epoch_5.pt' ---


Epoch 6/30:   0%|          | 0/47 [00:00<?, ?it/s]

[6/30] Loss_D: 0.8282 Loss_G: 1.3047


Epoch 7/30:   0%|          | 0/47 [00:00<?, ?it/s]

[7/30] Loss_D: 0.8321 Loss_G: 3.1427


Epoch 8/30:   0%|          | 0/47 [00:00<?, ?it/s]

[8/30] Loss_D: 0.7899 Loss_G: 1.7255


Epoch 9/30:   0%|          | 0/47 [00:00<?, ?it/s]

[9/30] Loss_D: 0.7406 Loss_G: 1.4306


Epoch 10/30:   0%|          | 0/47 [00:00<?, ?it/s]

[10/30] Loss_D: 1.2235 Loss_G: 2.8487
--- ✅ Checkpoint saved to '/content/drive/MyDrive/archgan/checkpoints/DC-GAN-Deeper_dcgan_stage3_deeper_20250612_2148_epoch_10.pt' ---


Epoch 11/30:   0%|          | 0/47 [00:00<?, ?it/s]

[11/30] Loss_D: 1.1872 Loss_G: 0.8354


Epoch 12/30:   0%|          | 0/47 [00:00<?, ?it/s]

[12/30] Loss_D: 2.7055 Loss_G: 0.6758


Epoch 13/30:   0%|          | 0/47 [00:00<?, ?it/s]

[13/30] Loss_D: 1.3504 Loss_G: 3.7872


Epoch 14/30:   0%|          | 0/47 [00:00<?, ?it/s]

[14/30] Loss_D: 0.7612 Loss_G: 1.4316


Epoch 15/30:   0%|          | 0/47 [00:00<?, ?it/s]

[15/30] Loss_D: 1.7965 Loss_G: 0.8943
--- ✅ Checkpoint saved to '/content/drive/MyDrive/archgan/checkpoints/DC-GAN-Deeper_dcgan_stage3_deeper_20250612_2148_epoch_15.pt' ---


Epoch 16/30:   0%|          | 0/47 [00:00<?, ?it/s]

[16/30] Loss_D: 1.3242 Loss_G: 3.2984


Epoch 17/30:   0%|          | 0/47 [00:00<?, ?it/s]

[17/30] Loss_D: 1.2958 Loss_G: 2.3650


Epoch 18/30:   0%|          | 0/47 [00:00<?, ?it/s]

[18/30] Loss_D: 1.8494 Loss_G: 0.6728


Epoch 19/30:   0%|          | 0/47 [00:00<?, ?it/s]

[19/30] Loss_D: 1.4805 Loss_G: 3.3198


Epoch 20/30:   0%|          | 0/47 [00:00<?, ?it/s]

[20/30] Loss_D: 0.9895 Loss_G: 1.2836
--- ✅ Checkpoint saved to '/content/drive/MyDrive/archgan/checkpoints/DC-GAN-Deeper_dcgan_stage3_deeper_20250612_2148_epoch_20.pt' ---


Epoch 21/30:   0%|          | 0/47 [00:00<?, ?it/s]

[21/30] Loss_D: 1.3714 Loss_G: 3.2925


Epoch 22/30:   0%|          | 0/47 [00:00<?, ?it/s]

[22/30] Loss_D: 1.0823 Loss_G: 1.2037


Epoch 23/30:   0%|          | 0/47 [00:00<?, ?it/s]

[23/30] Loss_D: 1.1143 Loss_G: 1.7713


Epoch 24/30:   0%|          | 0/47 [00:00<?, ?it/s]

[24/30] Loss_D: 1.2233 Loss_G: 1.2122


Epoch 25/30:   0%|          | 0/47 [00:00<?, ?it/s]

[25/30] Loss_D: 1.0162 Loss_G: 2.2791
--- ✅ Checkpoint saved to '/content/drive/MyDrive/archgan/checkpoints/DC-GAN-Deeper_dcgan_stage3_deeper_20250612_2148_epoch_25.pt' ---


Epoch 26/30:   0%|          | 0/47 [00:00<?, ?it/s]

[26/30] Loss_D: 1.6202 Loss_G: 0.9368


Epoch 27/30:   0%|          | 0/47 [00:00<?, ?it/s]

[27/30] Loss_D: 1.2459 Loss_G: 2.0942


Epoch 28/30:   0%|          | 0/47 [00:00<?, ?it/s]

[28/30] Loss_D: 1.3023 Loss_G: 1.1433


Epoch 29/30:   0%|          | 0/47 [00:00<?, ?it/s]

[29/30] Loss_D: 1.2754 Loss_G: 3.2038


Epoch 30/30:   0%|          | 0/47 [00:00<?, ?it/s]

[30/30] Loss_D: 0.9277 Loss_G: 1.4578
--- ✅ Checkpoint saved to '/content/drive/MyDrive/archgan/checkpoints/DC-GAN-Deeper_dcgan_stage3_deeper_20250612_2148_epoch_30.pt' ---

🎉 Training Finished! Total duration: 5.11 minutes.


In [9]:
# --- 5. Evaluation and Metric Logging (Final, Corrected Version) ---

# We will evaluate the final checkpoint from the training run.

final_checkpoint_epoch = config['num_epochs']
final_checkpoint_name = f"{config['model']}_{config['run_id']}_epoch_{final_checkpoint_epoch}.pt"
final_checkpoint_path = os.path.join(CHECKPOINT_PATH, final_checkpoint_name)

print(f"\n--- 📊 Starting Evaluation for {final_checkpoint_name} ---")

# Ensure the model is in evaluation mode for consistent output
netG.eval()

# Generate 1,000 samples for evaluation
print(f"Generating 1000 samples for metric calculation...")
# --- CRITICAL CHANGE HERE ---
# Create a temporary directory on the FAST LOCAL DISK
fake_images_dir = '/tmp/temp_fake_images'
os.makedirs(fake_images_dir, exist_ok=True)

with torch.no_grad():
    for i in tqdm(range(config['num_eval_samples']), desc="Generating Samples"):
        noise = torch.randn(1, config['latent_dim'], 1, 1, device=device)
        fake_image = netG(noise)
        vutils.save_image(fake_image.detach(), os.path.join(fake_images_dir, f"fake_{i+1:04d}.png"), normalize=True)

print("✅ Samples generated.")

# Calculate metrics using torch-fidelity
print("Calculating FID, IS, Precision, and Recall... (This will be fast now)")
metrics = torch_fidelity.calculate_metrics(
    input1=fake_images_dir,
    input2='/tmp/exteriors_128/all_images',
    cuda=torch.cuda.is_available(),
    isc=True,
    fid=True,
    prc=True,
    verbose=False
)

# Clean up the temporary directory of fake images
shutil.rmtree(fake_images_dir)
print("✅ Metrics calculated and temporary files removed.")

# Log the results to our CSV file
new_row = {
    'timestamp': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model': config['model'],
    'run_id': config['run_id'],
    'stage': config['stage'],
    'epoch': final_checkpoint_epoch,
    'latent_dim': config['latent_dim'],
    'lr': config['lr'],
    'beta1': config['beta1'],
    'batch_size': config['batch_size'],
    'FID': metrics.get('frechet_inception_distance'),
    'IS': metrics.get('inception_score_mean'),
    'precision': None,
    'recall': None,
    'gpu_minutes': round(total_duration_min, 2)
}

# Robustly handle an empty or non-existent CSV file
try:
    results_df = pd.read_csv(RESULTS_FILE)
except (FileNotFoundError, pd.errors.EmptyDataError):
    print("Results CSV is empty or not found. Creating a new one.")
    results_df = pd.DataFrame(columns=new_row.keys())

# Append the new row and save to CSV
results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)
results_df.to_csv(RESULTS_FILE, index=False)
print("\n--- ✅ Results appended to CSV in notebook memory. Verifying save to Drive... ---")

# SAVE VERIFICATION STEP
time.sleep(5)
try:
    verified_df = pd.read_csv(RESULTS_FILE)
    if len(verified_df) == len(results_df):
        print(f"✅ VERIFICATION SUCCESS: CSV on Google Drive is updated and contains {len(verified_df)} rows.")
        display(verified_df)
    else:
        print(f"❌ VERIFICATION FAILED: CSV on Drive has not updated yet.")
except Exception as e:
    print(f"❌ An error occurred during verification: {e}")


--- 📊 Starting Evaluation for DC-GAN-Deeper_dcgan_stage3_deeper_20250612_2148_epoch_30.pt ---
Generating 1000 samples for metric calculation...


Generating Samples:   0%|          | 0/1000 [00:00<?, ?it/s]

✅ Samples generated.
Calculating FID, IS, Precision, and Recall... (This will be fast now)


Downloading: "https://github.com/toshas/torch-fidelity/releases/download/v0.2.0/weights-inception-2015-12-05-6726825d.pth" to /root/.cache/torch/hub/checkpoints/weights-inception-2015-12-05-6726825d.pth
100%|██████████| 91.2M/91.2M [00:00<00:00, 129MB/s]
  img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())).view(height, width, 3)


✅ Metrics calculated and temporary files removed.

--- ✅ Results appended to CSV in notebook memory. Verifying save to Drive... ---
✅ VERIFICATION SUCCESS: CSV on Google Drive is updated and contains 11 rows.


Unnamed: 0,timestamp,model,run_id,stage,epoch,latent_dim,lr,beta1,batch_size,FID,IS,precision,recall,gpu_minutes
0,2025-06-12 07:23:00,DC-GAN,dcgan_baseline_20250612_0652,0 - Baseline,20,64,0.0002,0.5,128,349.662912,1.039684,,,11.26
1,2025-06-12 19:21:59,DC-GAN,dcgan_oat_z128_20250612_1907,1 - OAT Sweep,20,128,0.0002,0.5,128,362.277381,1.259146,,,10.95
2,2025-06-12 19:39:27,DC-GAN,dcgan_oat_z256_20250612_1927,1 - OAT Sweep,20,256,0.0002,0.5,128,443.090218,1.08666,,,10.67
3,2025-06-12 19:52:59,DC-GAN,dcgan_oat_lr1e-4_20250612_1940,1 - OAT Sweep,20,64,0.0001,0.5,128,433.429274,1.035527,,,10.85
4,2025-06-12 20:07:55,DC-GAN,dcgan_oat_lr5e-5_20250612_1955,1 - OAT Sweep,20,64,5e-05,0.5,128,443.19943,1.067751,,,10.99
5,2025-06-12 20:22:02,DC-GAN,dcgan_oat_b0.0_20250612_2009,1 - OAT Sweep,20,64,0.0002,0.0,128,339.412069,1.518692,,,10.92
6,2025-06-12 20:35:52,DC-GAN,dcgan_oat_b0.9_20250612_2023,1 - OAT Sweep,20,64,0.0002,0.9,128,435.641419,1.019858,,,10.87
7,2025-06-12 20:59:37,DC-GAN,dcgan_oat_b0.1_20250612_2047,1 - OAT Sweep,20,64,0.0002,0.1,128,376.132087,1.115626,,,10.96
8,2025-06-12 21:13:23,DC-GAN,dcgan_oat_z32_20250612_2100,1 - OAT Sweep,20,32,0.0002,0.5,128,341.582932,1.228938,,,10.99
9,2025-06-12 21:32:14,DC-GAN,dcgan_stage2_best_20250612_2125,2 - Combined-best,50,32,0.0002,0.0,128,259.860025,2.074152,,,6.31
