# Preparing the Enviorment


## Intsall Essential Packaeges

`super-image` library is built on top of **Hugging Face**'s `transformers` and `datasets`

In [2]:
!pip install super-image datasets transformers -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.9/95.9 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m68.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m48.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Imports

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader

from transformers import Seq2SeqTrainingArguments
from super_image import Trainer
from super_image.models import EdsrModel

In [4]:
import os
from pathlib import Path
from typing import List, Union, Dict

import numpy as np

## Paths and Directories

In [5]:
from google.colab import drive

drive.mount('/content/drive')

# --- Verification ---
!ls /content/drive/MyDrive # Should list the contents of your "My Drive" folder

Mounted at /content/drive
'Colab Notebooks'   Education	        IFTTT   Neuromarketing-EHIA
 datasets	   'Google AI Studio'   manga


In [6]:
ROOT_PATH = Path('/content/drive/MyDrive/datasets/sen2venus')

TACO_RAW_DIR = ROOT_PATH / 'TACO_raw_data'
os.makedirs(TACO_RAW_DIR, exist_ok=True)
print(f"Data will be saved to: {TACO_RAW_DIR}")

SELECTED_SUBSETS = [
    "SUDOUE-4",
    "SUDOUE-5",
    "SUDOUE-6"
]
TACO_FILE_PATHS = [TACO_RAW_DIR / f"{site_name}.taco" for site_name in SELECTED_SUBSETS]


NORMALIZED_SETS_DIR = ROOT_PATH / 'normalized_sets'
os.makedirs(NORMALIZED_SETS_DIR, exist_ok=True)
print(f"Normalaized datest will be saved to:\n\t {NORMALIZED_SETS_DIR}")

TRAIN_SAVE_DIR = NORMALIZED_SETS_DIR / 'train'
os.makedirs(TRAIN_SAVE_DIR, exist_ok=True)
print(f"Train data will be saved to:\n\t {TRAIN_SAVE_DIR}")

VAL_SAVE_DIR = NORMALIZED_SETS_DIR / 'val'
os.makedirs(VAL_SAVE_DIR, exist_ok=True)
print(f"Validation data will be saved to:\n\t {VAL_SAVE_DIR}")

TEST_SAVE_DIR = NORMALIZED_SETS_DIR / 'test'
os.makedirs(TEST_SAVE_DIR, exist_ok=True)
print(f"Test data will be saved to:\n\t {TEST_SAVE_DIR}")

# essential for resuming training and saving final model.
FINETUNR_SAVE_DIR = ROOT_PATH / 'edsr_finetune'
os.makedirs(FINETUNR_SAVE_DIR, exist_ok=True)
print(f"Finetuning data including checkpoints and logs will be saved to:\n\t{FINETUNR_SAVE_DIR}")

Data will be saved to: /content/drive/MyDrive/datasets/sen2venus/TACO_raw_data
Normalaized datest will be saved to:
	 /content/drive/MyDrive/datasets/sen2venus/normalized_sets
Train data will be saved to:
	 /content/drive/MyDrive/datasets/sen2venus/normalized_sets/train
Validation data will be saved to:
	 /content/drive/MyDrive/datasets/sen2venus/normalized_sets/val
Test data will be saved to:
	 /content/drive/MyDrive/datasets/sen2venus/normalized_sets/test
Finetuning data including checkpoints and logs will be saved to:
	/content/drive/MyDrive/datasets/sen2venus/edsr_finetune


# Step 1: Define PyTorch Datasets & Dataloaders

In [7]:
class PreNormalizedDataset(Dataset):
    """
    Efficiently reads pre-processed, sharded tensor files from disk.
    """
    def __init__(self, shard_dir: Union[str, Path]):
        self.shard_dir = Path(shard_dir)
        self.shard_paths: List[Path] = sorted(self.shard_dir.glob("*.pt"))

        if not self.shard_paths:
            raise ValueError(f"No shard files ('*.pt') found in {self.shard_dir}")

        # To calculate length, we check the size of the first shard and assume
        # all but the last are the same size.
        first_shard = torch.load(self.shard_paths[0])
        self.shard_size = len(first_shard)
        last_shard = torch.load(self.shard_paths[-1])
        self.length = (len(self.shard_paths) - 1) * self.shard_size + len(last_shard)

        # Simple cache to avoid re-loading the same shard consecutively
        self._cache = {}
        self._cached_shard_index = -1
        print(f"Initialized dataset from {self.shard_dir} with {self.length} samples across {len(self.shard_paths)} shards.")


    def __len__(self):
        return self.length

    def __getitem__(self, idx)->Dict[str, np.ndarray]:
        shard_index = idx // self.shard_size
        index_in_shard = idx % self.shard_size

        if shard_index != self._cached_shard_index:
            self._cache = torch.load(self.shard_paths[shard_index])
            self._cached_shard_index = shard_index

        # coupled with TACORGBDataset dataset class
        # each item in the shard is a squeezed dictionary with keys lr and hr
        squeezed_sample = self._cache[index_in_shard]
        # return squeezed_sample['lr'], squeezed_sample['hr']
        return {
            'pixel_values': squeezed_sample['lr'],
            'labels': squeezed_sample['hr'],
            }

## Dataloader Instantiation

In [9]:
train_dataset = PreNormalizedDataset(TRAIN_SAVE_DIR)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

Initialized dataset from /content/drive/MyDrive/datasets/sen2venus/normalized_sets/train with 4436 samples across 5 shards.


In [10]:
print("---Verifying  dataset output format:")
sample_output = train_dataset[0]
print(sample_output.keys())
print("LR shape:", sample_output['pixel_values'].shape)
print("HR shape:", sample_output['labels'].shape)


print("---Verifying  batch shape:")
# 1. Get the dictionary batch
batch = next(iter(train_loader))

# 2. Access the tensors using dictionary keys
lr_batch = batch['pixel_values']
hr_batch = batch['labels']

print(f"Verification successful!")
print(f"LR batch shape: {lr_batch.shape}")
print(f"HR batch shape: {hr_batch.shape}")
print(f"LR batch dtype: {lr_batch.dtype}")
print(f"HR batch dtype: {hr_batch.dtype}")

---Verifying  dataset output format:
dict_keys(['pixel_values', 'labels'])
LR shape: torch.Size([3, 128, 128])
HR shape: torch.Size([3, 256, 256])
---Verifying  batch shape:
Verification successful!
LR batch shape: torch.Size([16, 3, 128, 128])
HR batch shape: torch.Size([16, 3, 256, 256])
LR batch dtype: torch.float32
HR batch dtype: torch.float32


In [11]:
val_dataset = PreNormalizedDataset(VAL_SAVE_DIR)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

Initialized dataset from /content/drive/MyDrive/datasets/sen2venus/normalized_sets/val with 554 samples across 1 shards.


In [None]:
test_dataset = PreNormalizedDataset(TEST_SAVE_DIR)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

Initialized dataset from /content/drive/MyDrive/datasets/sen2venus/normalized_sets/test with 556 samples across 1 shards.
Loaded 556 test samples.


# Step 2: Load the Pre-trained EDSR Model

**Objectives**:



1.   Loading a well-known, **pre-trained** architecture (edsr-base) specifically configured for **2x super-resolution**.
2.   Confirming that the model accepts data batches and produces outputs of the correct shape ([16, 3, 256, 256]).

## 2.1 Instantiate and Inspect the pre-trained EDSR model

In [12]:
# The 'from_pretrained' method downloads the model configuration and weights.
# We must specify our desired scale factor.
# (LR: 128x128, HR: 256x256), -> scale is 2.
scale = 2
model_id = 'eugenesiow/edsr-base'
model = EdsrModel.from_pretrained(model_id, scale=scale)

# Inspect the model architecture
print("Model architecture loaded successfully:")
print(model)

config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

pytorch_model_2x.pt:   0%|          | 0.00/5.51M [00:00<?, ?B/s]

https://huggingface.co/eugenesiow/edsr-base/resolve/main/pytorch_model_2x.pt
Model architecture loaded successfully:
DataParallel(
  (module): EdsrModel(
    (sub_mean): MeanShift(3, 3, kernel_size=(1, 1), stride=(1, 1))
    (add_mean): MeanShift(3, 3, kernel_size=(1, 1), stride=(1, 1))
    (head): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (body): Sequential(
      (0): ResBlock(
        (body): Sequential(
          (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): ReLU(inplace=True)
          (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        )
      )
      (1): ResBlock(
        (body): Sequential(
          (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): ReLU(inplace=True)
          (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        )
      )
      (2): ResBlock(
        (body): Sequential(
       

## 2.2 Sanity Check: Pass one batch of data through the model

In [None]:
# a crucial test to ensure the input/output dimensions are compatible.
print("\nPerforming a forward pass sanity check...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # Set to evaluation mode for this check

with torch.no_grad():
    # Get a single batch from our dataloader
    # lr_batch, hr_batch = next(iter(train_loader))
    batch = next(iter(train_loader))
    lr_batch = batch['pixel_values']
    hr_batch = batch['labels']

    # Move the batch to the same device as the model
    lr_batch = lr_batch.to(device)

    # Perform a forward pass
    predictions = model(lr_batch)

    print(f"Sanity check successful!")
    print(f"Running on device: {device}")
    print(f"Model Input Shape (LR): {lr_batch.shape}")
    print(f"Model Output Shape (Predictions): {predictions.shape}")
    print(f"Target Shape (HR): {hr_batch.shape}")

# Compare output shape with the target High-Resolution shape
assert predictions.shape == hr_batch.shape, "Model output shape does not match target HR shape!"
print("Output shape matches target shape. Ready for training.")


Performing a forward pass sanity check...


# Step 3: Configure and Launch the Trainer

## Trainer Config

In [1]:
# 1. Define the directory to save checkpoints and logs

# 2. Define Training Arguments
# These arguments control every aspect of the training loop.
training_args = Seq2SeqTrainingArguments(
    output_dir=FINETUNR_SAVE_DIR,

    # --- Core Training Parameters ---
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,

    # --- Optimizer and Scheduler ---
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    warmup_steps=500,

    # --- Evaluation and Saving Strategy (CORRECTED) ---
    eval_strategy="epoch", # Run evaluation at the end of each epoch.
    save_strategy="epoch", # Save a checkpoint at the end of each epoch.

    # --- Checkpoint Management ---
    save_total_limit=3,          # Keep only the last 3 checkpoints
    load_best_model_at_end=True, # Load the best model based on...
    metric_for_best_model="eval_loss", # Use validation loss to determine the best model.

    # --- Logging and Reporting ---
    logging_dir=f"{FINETUNR_SAVE_DIR}/logs",
    logging_steps=100, # Log training loss every 100 steps.
    report_to="tensorboard",     # Enable TensorBoard logging

    # --- Technical Parameters ---
    fp16=torch.cuda.is_available(), # Use mixed precision if training a GPU is available
    push_to_hub=False,
)


# 3. Instantiate the Trainer
# This object orchestrates the entire training process.
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

ModuleNotFoundError: No module named 'super_image'

## Start Training

In [None]:
# 4. START TRAINING!
# This command will start the fine-tuning process. You will see a progress bar.
# Colab will display the training loss and, at the end of each epoch, the validation loss.
print("Starting model fine-tuning...")
trainer.train()
# trainer.train(resume_from_checkpoint=True)

# 5. Save the final best model
# After training is complete, this explicitly saves the best-performing model.
print("Training complete. Saving the best model.")
trainer.save_model(f"{FINETUNR_SAVE_DIR}/best_model")