In [None]:
from google.colab import drive
import os
import zipfile

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Copy and Unzip
zip_path = '/content/drive/MyDrive/Classes/AML/asos_images_raw.zip'
extract_path = '/content/dataset'

print("Unzipping data... this takes about 30-60 seconds...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Data ready!")

Mounted at /content/drive
Unzipping data... this takes about 30-60 seconds...
Data ready!


In [None]:
%%html
<audio src="https://raw.githubusercontent.com/anars/blank-audio/master/10-minutes-of-silence.mp3" autoplay loop controls />

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm
import torch.amp
import concurrent.futures
from sklearn.model_selection import train_test_split

# --- 1. WINNING CONFIGURATION ---
# Based on hyperparameter tuning
BATCH_SIZE = 128
LEARNING_RATE = 0.0012  # Winner
HIDDEN_DIM = 1024       # Winner
NUM_LAYERS = 2          # Winner
DROPOUT_RATE = 0.3      # Winner
WEIGHT_DECAY = 1e-4

# Training Duration (Longer for production)
EPOCHS_STAGE_1 = 20
EPOCHS_STAGE_2 = 20
LR_FINE_TUNE = 1e-5     # Low LR for unfreezing

DEVICE = torch.device("cuda")
SAVE_DIR = '/content/drive/MyDrive/Classes/AML'
IMG_DIR = '/content/dataset/asos_images_raw'
CLEAN_CSV_PATH = '/content/asos_final_clean.csv'

In [None]:
# --- 2. DATA LOADERS ---
class ASOSMemDataset(Dataset):
    def __init__(self, dataframe, img_dir):
        self.metadata = dataframe.copy().reset_index(drop=True)
        self.img_dir = img_dir
        self.images = [None] * len(self.metadata)
        self.prices = [None] * len(self.metadata)

        print(f"Caching {len(self.metadata)} images...")
        def load_single_image(idx):
            try:
                row = self.metadata.iloc[idx]
                sku = str(int(row['sku']))
                img_path = os.path.join(self.img_dir, f"{sku}.jpg")
                with Image.open(img_path) as img:
                    return idx, img.convert('RGB').resize((224, 224)), float(row['price'])
            except:
                return idx, None, None

        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(load_single_image, i) for i in range(len(self.metadata))]
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), unit="img"):
                idx, img, price = future.result()
                if img is not None:
                    self.images[idx] = img
                    self.prices[idx] = np.log1p(price)

        self.images = [img for img in self.images if img is not None]
        self.prices = [p for p in self.prices if p is not None]

    def __len__(self): return len(self.images)
    def __getitem__(self, idx): return self.images[idx], torch.tensor(self.prices[idx], dtype=torch.float32)

In [None]:
# Transforms
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class TransformedDataset(Dataset):
    def __init__(self, base_dataset, transform):
        self.base = base_dataset
        self.transform = transform
    def __len__(self): return len(self.base)
    def __getitem__(self, idx):
        img, price = self.base[idx]
        return self.transform(img), price

# Split Data
df = pd.read_csv(CLEAN_CSV_PATH, on_bad_lines='skip', engine='python')
df = df[df['price'] < 1000]
train_val_df, test_df = train_test_split(df, test_size=0.10, random_state=42)
train_df, val_df = train_test_split(train_val_df, test_size=0.1111, random_state=42)

raw_train = ASOSMemDataset(train_df, IMG_DIR)
raw_val = ASOSMemDataset(val_df, IMG_DIR)
raw_test = ASOSMemDataset(test_df, IMG_DIR)

train_loader = DataLoader(TransformedDataset(raw_train, train_transforms), batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(TransformedDataset(raw_val, val_transforms), batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader = DataLoader(TransformedDataset(raw_test, val_transforms), batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

Caching 23976 images...


100%|██████████| 23976/23976 [21:53<00:00, 18.25img/s]


Caching 2997 images...


100%|██████████| 2997/2997 [02:41<00:00, 18.57img/s]


Caching 2998 images...


100%|██████████| 2998/2998 [02:42<00:00, 18.43img/s]


In [None]:
def build_model():
    model = models.convnext_tiny(weights='DEFAULT')
    for param in model.parameters(): param.requires_grad = False

    input_dim = model.classifier[2].in_features
    layers = []
    current_in = input_dim

    # Funnel Logic (From Alibaba / Stanford Ideas)
    for i in range(NUM_LAYERS):
        out_dim = int(HIDDEN_DIM / (2**i)) # 1024 -> 512
        layers.append(nn.Linear(current_in, out_dim))
        layers.append(nn.BatchNorm1d(out_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(DROPOUT_RATE))
        current_in = out_dim

    layers.append(nn.Linear(current_in, 1))
    model.classifier[2] = nn.Sequential(*layers)
    return model.to(DEVICE)

model = build_model()
criterion = nn.MSELoss()
scaler = torch.amp.GradScaler('cuda')

Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /root/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth


100%|██████████| 109M/109M [00:00<00:00, 237MB/s] 


In [None]:
# --- STAGE 1: HEAD TRAINING ---
print(f"\n=== STAGE 1: Training Head ({EPOCHS_STAGE_1} Epochs) ===")
optimizer = optim.AdamW(model.classifier.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

for epoch in range(EPOCHS_STAGE_1):
    model.train()
    loop = tqdm(train_loader, desc=f"S1 Ep {epoch+1}")
    for img, price in loop:
        img, price = img.to(DEVICE), price.to(DEVICE)
        optimizer.zero_grad()
        with torch.amp.autocast('cuda'):
            out = model(img)
            loss = criterion(out.squeeze(), price)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        loop.set_postfix(loss=loss.item())

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for img, price in val_loader:
            img, price = img.to(DEVICE), price.to(DEVICE)
            with torch.amp.autocast('cuda'):
                out = model(img)
                val_loss += criterion(out.squeeze(), price).item()

    val_loss /= len(val_loader)
    print(f"  >>> Val Loss: {val_loss:.4f}")
    scheduler.step(val_loss)

# Save Stage 1 Checkpoint
torch.save(model.state_dict(), os.path.join(SAVE_DIR, 'asos_stage1_done.pth'))

# --- STAGE 2: FINE-TUNING ---
print(f"\n=== STAGE 2: Unfreezing Backbone ({EPOCHS_STAGE_2} Epochs) ===")
# Unfreeze last 2 blocks of ConvNeXt
for param in model.features[6].parameters(): param.requires_grad = True
for param in model.features[7].parameters(): param.requires_grad = True

# Low LR for stability
optimizer = optim.AdamW(model.parameters(), lr=LR_FINE_TUNE, weight_decay=WEIGHT_DECAY)
best_mae = float('inf')

for epoch in range(EPOCHS_STAGE_2):
    model.train()
    loop = tqdm(train_loader, desc=f"S2 Ep {epoch+1}")
    for img, price in loop:
        img, price = img.to(DEVICE), price.to(DEVICE)
        optimizer.zero_grad()
        with torch.amp.autocast('cuda'):
            out = model(img)
            loss = criterion(out.squeeze(), price)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        loop.set_postfix(loss=loss.item())

    # Full Validation (Real $)
    model.eval()
    val_preds, val_targets = [], []
    with torch.no_grad():
        for img, price in val_loader:
            img, price = img.to(DEVICE), price.to(DEVICE)
            with torch.amp.autocast('cuda'):
                out = model(img)
            val_preds.extend(out.squeeze().float().cpu().numpy())
            val_targets.extend(price.cpu().numpy())

    real_preds = np.expm1(np.clip(val_preds, 0, 10))
    real_targets = np.expm1(val_targets)
    val_mae = np.mean(np.abs(real_preds - real_targets))

    print(f"  >>> Val MAE: ${val_mae:.2f}")

    if val_mae < best_mae:
        best_mae = val_mae
        torch.save(model.state_dict(), os.path.join(SAVE_DIR, 'asos_final_best.pth'))
        print("  >>> New Best Model Saved!")

print("Training Complete!")


=== STAGE 1: Training Head (20 Epochs) ===


S1 Ep 1: 100%|██████████| 188/188 [01:19<00:00,  2.37it/s, loss=0.61]


  >>> Val Loss: 0.3233


S1 Ep 2: 100%|██████████| 188/188 [01:05<00:00,  2.87it/s, loss=0.349]


  >>> Val Loss: 0.3198


S1 Ep 3: 100%|██████████| 188/188 [01:05<00:00,  2.85it/s, loss=0.213]


  >>> Val Loss: 0.2691


S1 Ep 4: 100%|██████████| 188/188 [01:05<00:00,  2.88it/s, loss=0.261]


  >>> Val Loss: 0.2748


S1 Ep 5: 100%|██████████| 188/188 [01:05<00:00,  2.87it/s, loss=0.227]


  >>> Val Loss: 0.2894


S1 Ep 6: 100%|██████████| 188/188 [01:05<00:00,  2.88it/s, loss=0.586]


  >>> Val Loss: 0.2999


S1 Ep 7: 100%|██████████| 188/188 [01:05<00:00,  2.86it/s, loss=0.311]


  >>> Val Loss: 0.2968


S1 Ep 8: 100%|██████████| 188/188 [01:06<00:00,  2.84it/s, loss=0.291]


  >>> Val Loss: 0.2571


S1 Ep 9: 100%|██████████| 188/188 [01:05<00:00,  2.88it/s, loss=0.241]


  >>> Val Loss: 0.2745


S1 Ep 10: 100%|██████████| 188/188 [01:05<00:00,  2.86it/s, loss=0.301]


  >>> Val Loss: 0.2512


S1 Ep 11: 100%|██████████| 188/188 [01:05<00:00,  2.86it/s, loss=0.423]


  >>> Val Loss: 0.2606


S1 Ep 12: 100%|██████████| 188/188 [01:06<00:00,  2.84it/s, loss=0.13]


  >>> Val Loss: 0.2525


S1 Ep 13: 100%|██████████| 188/188 [01:05<00:00,  2.85it/s, loss=0.226]


  >>> Val Loss: 0.2638


S1 Ep 14: 100%|██████████| 188/188 [01:05<00:00,  2.88it/s, loss=0.291]


  >>> Val Loss: 0.2802


S1 Ep 15: 100%|██████████| 188/188 [01:06<00:00,  2.85it/s, loss=0.326]


  >>> Val Loss: 0.2586


S1 Ep 16: 100%|██████████| 188/188 [01:05<00:00,  2.86it/s, loss=0.256]


  >>> Val Loss: 0.2624


S1 Ep 17: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.123]


  >>> Val Loss: 0.2527


S1 Ep 18: 100%|██████████| 188/188 [01:05<00:00,  2.87it/s, loss=0.319]


  >>> Val Loss: 0.2574


S1 Ep 19: 100%|██████████| 188/188 [01:05<00:00,  2.87it/s, loss=0.218]


  >>> Val Loss: 0.2630


S1 Ep 20: 100%|██████████| 188/188 [01:05<00:00,  2.87it/s, loss=0.321]


  >>> Val Loss: 0.2500

=== STAGE 2: Unfreezing Backbone (20 Epochs) ===


S2 Ep 1: 100%|██████████| 188/188 [01:08<00:00,  2.73it/s, loss=0.136]


  >>> Val MAE: $15.80
  >>> New Best Model Saved!


S2 Ep 2: 100%|██████████| 188/188 [01:05<00:00,  2.89it/s, loss=0.29]


  >>> Val MAE: $15.83


S2 Ep 3: 100%|██████████| 188/188 [01:04<00:00,  2.92it/s, loss=0.155]


  >>> Val MAE: $15.82


S2 Ep 4: 100%|██████████| 188/188 [01:04<00:00,  2.91it/s, loss=0.242]


  >>> Val MAE: $15.83


S2 Ep 5: 100%|██████████| 188/188 [01:05<00:00,  2.86it/s, loss=0.194]


  >>> Val MAE: $15.79
  >>> New Best Model Saved!


S2 Ep 6: 100%|██████████| 188/188 [01:04<00:00,  2.90it/s, loss=0.207]


  >>> Val MAE: $15.80


S2 Ep 7: 100%|██████████| 188/188 [01:04<00:00,  2.91it/s, loss=0.203]


  >>> Val MAE: $15.82


S2 Ep 8: 100%|██████████| 188/188 [01:05<00:00,  2.87it/s, loss=0.18]


  >>> Val MAE: $15.77
  >>> New Best Model Saved!


S2 Ep 9: 100%|██████████| 188/188 [01:05<00:00,  2.87it/s, loss=0.198]


  >>> Val MAE: $15.80


S2 Ep 10: 100%|██████████| 188/188 [01:04<00:00,  2.90it/s, loss=0.174]


  >>> Val MAE: $15.81


S2 Ep 11: 100%|██████████| 188/188 [01:04<00:00,  2.91it/s, loss=0.193]


  >>> Val MAE: $15.81


S2 Ep 12: 100%|██████████| 188/188 [01:05<00:00,  2.87it/s, loss=0.243]


  >>> Val MAE: $15.80


S2 Ep 13: 100%|██████████| 188/188 [01:05<00:00,  2.88it/s, loss=0.17]


  >>> Val MAE: $15.76
  >>> New Best Model Saved!


S2 Ep 14: 100%|██████████| 188/188 [01:05<00:00,  2.88it/s, loss=0.0891]


  >>> Val MAE: $15.83


S2 Ep 15: 100%|██████████| 188/188 [01:05<00:00,  2.88it/s, loss=0.176]


  >>> Val MAE: $15.85


S2 Ep 16: 100%|██████████| 188/188 [01:04<00:00,  2.90it/s, loss=0.216]


  >>> Val MAE: $15.89


S2 Ep 17: 100%|██████████| 188/188 [01:05<00:00,  2.89it/s, loss=0.221]


  >>> Val MAE: $15.83


S2 Ep 18: 100%|██████████| 188/188 [01:04<00:00,  2.89it/s, loss=0.213]


  >>> Val MAE: $15.84


S2 Ep 19: 100%|██████████| 188/188 [01:04<00:00,  2.91it/s, loss=0.183]


  >>> Val MAE: $15.81


S2 Ep 20: 100%|██████████| 188/188 [01:05<00:00,  2.88it/s, loss=0.129]


  >>> Val MAE: $15.81
Training Complete!


In [None]:
from google.colab import files

# This is the path where your script saved the best model
model_path = '/content/drive/MyDrive/Classes/AML/asos_final_best.pth'

# Check if it exists first
if os.path.exists(model_path):
    print(f"Downloading {model_path}...")
    files.download(model_path)
else:
    print("File not found! Check your Drive folder.")

Downloading /content/drive/MyDrive/Classes/AML/asos_final_best.pth...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Model 2 (Mid-fusion)

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm
import torch.amp
import concurrent.futures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Install additional libraries
!pip install -q category_encoders

from category_encoders import TargetEncoder

# --- CONFIGURATION ---
BATCH_SIZE = 128
LEARNING_RATE = 0.0012
HIDDEN_DIM = 1024
NUM_LAYERS = 2
DROPOUT_RATE = 0.3
WEIGHT_DECAY = 1e-4

# Training Duration
EPOCHS_STAGE_1 = 20
EPOCHS_STAGE_2 = 20
LR_FINE_TUNE = 1e-5

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAVE_DIR = '/content/drive/MyDrive/Classes/AML'
IMG_DIR = '/content/dataset/asos_images_raw'
CLEAN_CSV_PATH = '/content/asos_final_clean.csv'

print(f"Device: {DEVICE}")

Device: cuda


In [None]:
# Load data
df = pd.read_csv(CLEAN_CSV_PATH, on_bad_lines='skip', engine='python')
df = df[df['price'] < 1000]

# Create train/val/test splits
train_val_df, test_df = train_test_split(df, test_size=0.10, random_state=42)
train_df, val_df = train_test_split(train_val_df, test_size=0.1111, random_state=42)

print(f"Train: {len(train_df)}")
print(f"Val:   {len(val_df)}")
print(f"Test:  {len(test_df)}")

Train: 23976
Val:   2997
Test:  2998


In [None]:
def prepare_metadata_features(train_df, val_df, test_df):
    """
    Create metadata features aligned with train/val/test splits.
    Returns numpy arrays of metadata features.
    """

    # --- 1. TARGET ENCODING (Brand) ---
    te = TargetEncoder(cols=['brand'])
    train_brand_te = te.fit_transform(train_df['brand'], train_df['price'])
    val_brand_te = te.transform(val_df['brand'])
    test_brand_te = te.transform(test_df['brand'])

    # --- 2. ONE-HOT ENCODING (Low cardinality features) ---
    low_cardinality_cols = ['item_category', 'main_material', 'fit_type', 'color_simple']

    # Get dummies for each split
    train_cats = pd.get_dummies(train_df[low_cardinality_cols], drop_first=False)
    val_cats = pd.get_dummies(val_df[low_cardinality_cols], drop_first=False)
    test_cats = pd.get_dummies(test_df[low_cardinality_cols], drop_first=False)

    # Align columns across all splits
    val_cats = val_cats.reindex(columns=train_cats.columns, fill_value=0)
    test_cats = test_cats.reindex(columns=train_cats.columns, fill_value=0)

    # --- 3. COMBINE FEATURES ---
    X_meta_train = pd.concat([train_brand_te, train_cats], axis=1)
    X_meta_val = pd.concat([val_brand_te, val_cats], axis=1)
    X_meta_test = pd.concat([test_brand_te, test_cats], axis=1)

    X_meta_train = X_meta_train.astype(np.float32).values
    X_meta_val = X_meta_val.astype(np.float32).values
    X_meta_test = X_meta_test.astype(np.float32).values

    print(f"✓ Train metadata shape: {X_meta_train.shape}, dtype: {X_meta_train.dtype}")
    print(f"✓ Val metadata shape:   {X_meta_val.shape}, dtype: {X_meta_val.dtype}")
    print(f"✓ Test metadata shape:  {X_meta_test.shape}, dtype: {X_meta_test.dtype}")

    return X_meta_train, X_meta_val, X_meta_test

# Generate metadata features
X_meta_train, X_meta_val, X_meta_test = prepare_metadata_features(train_df, val_df, test_df)
metadata_feature_dim = X_meta_train.shape[1]
print(f"✓ Metadata feature dimension: {metadata_feature_dim}")

✓ Train metadata shape: (23976, 97), dtype: float32
✓ Val metadata shape:   (2997, 97), dtype: float32
✓ Test metadata shape:  (2998, 97), dtype: float32
✓ Metadata feature dimension: 97


In [None]:
# Image transforms
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

print("✓ Transforms defined")

✓ Transforms defined


In [None]:
class MidFusionModel(nn.Module):
    """
    Mid-Fusion Architecture:
    1. ConvNeXt extracts image features (768-dim)
    2. Metadata features are concatenated with image features
    3. Combined features pass through fusion head for final prediction
    """
    def __init__(self, metadata_dim, hidden_dim=1024, num_layers=2, dropout_rate=0.3):
        super(MidFusionModel, self).__init__()

        # Image backbone (ConvNeXt)
        self.backbone = models.convnext_tiny(weights='DEFAULT')

        # Get the feature dimension from the classifier
        self.image_feature_dim = self.backbone.classifier[2].in_features  # 768

        # Keep only the feature extraction part (remove classifier)
        # ConvNeXt structure: features -> avgpool -> classifier
        # We want features + avgpool only
        self.backbone.classifier = nn.Identity()

        # Add global average pooling to handle the output
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        # Combined dimension: image features + metadata
        combined_dim = self.image_feature_dim + metadata_dim

        # Fusion Head (Funnel Architecture)
        layers = []
        current_in = combined_dim

        for i in range(num_layers):
            out_dim = int(hidden_dim / (2**i))  # 1024 -> 512 -> ...
            layers.append(nn.Linear(current_in, out_dim))
            layers.append(nn.BatchNorm1d(out_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            current_in = out_dim

        # Final prediction layer
        layers.append(nn.Linear(current_in, 1))

        self.fusion_head = nn.Sequential(*layers)

    def forward(self, images, metadata):
        """
        Args:
            images: (batch_size, 3, 224, 224)
            metadata: (batch_size, metadata_dim)
        Returns:
            predictions: (batch_size, 1)
        """
        # Extract image features through backbone
        x = self.backbone.features(images)  # (batch_size, 768, H, W)

        # Apply global average pooling
        x = self.avgpool(x)  # (batch_size, 768, 1, 1)

        # Flatten to get feature vector
        image_features = torch.flatten(x, 1)  # (batch_size, 768)

        # Concatenate image and metadata features
        combined = torch.cat([image_features, metadata], dim=1)

        # Pass through fusion head
        output = self.fusion_head(combined)

        return output

    def freeze_backbone(self):
        """Freeze ConvNeXt backbone for stage 1 training"""
        for param in self.backbone.parameters():
            param.requires_grad = False

    def unfreeze_backbone_layers(self, num_blocks=2):
        """Unfreeze last N blocks of ConvNeXt for fine-tuning"""
        blocks_to_unfreeze = [7, 6, 5, 4][:num_blocks]
        for block_idx in blocks_to_unfreeze:
            for param in self.backbone.features[block_idx].parameters():
                param.requires_grad = True


class ASOSMemDatasetWithMeta(Dataset):
    """Extended dataset that returns images, metadata, and prices"""
    def __init__(self, dataframe, img_dir, metadata_features):
        """
        Args:
            dataframe: Original dataframe with SKUs and prices
            img_dir: Directory containing images
            metadata_features: numpy array of metadata features (aligned with dataframe)
        """
        self.metadata_df = dataframe.copy().reset_index(drop=True)
        self.img_dir = img_dir
        self.metadata_features = metadata_features
        self.images = [None] * len(self.metadata_df)
        self.prices = [None] * len(self.metadata_df)
        self.valid_indices = []

        print(f"Caching {len(self.metadata_df)} images with metadata...")

        def load_single_image(idx):
            try:
                row = self.metadata_df.iloc[idx]
                sku = str(int(row['sku']))
                img_path = os.path.join(self.img_dir, f"{sku}.jpg")
                with Image.open(img_path) as img:
                    return idx, img.convert('RGB').resize((224, 224)), float(row['price'])
            except:
                return idx, None, None

        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(load_single_image, i) for i in range(len(self.metadata_df))]
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), unit="img"):
                idx, img, price = future.result()
                if img is not None:
                    self.images[idx] = img
                    self.prices[idx] = np.log1p(price)
                    self.valid_indices.append(idx)

        # Filter to only valid samples
        self.images = [self.images[i] for i in self.valid_indices]
        self.prices = [self.prices[i] for i in self.valid_indices]
        self.metadata_features = self.metadata_features[self.valid_indices]

        print(f"✓ Loaded {len(self.images)} valid samples")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        price = torch.tensor(self.prices[idx], dtype=torch.float32)
        metadata = torch.tensor(self.metadata_features[idx], dtype=torch.float32)
        return img, metadata, price


class TransformedDatasetWithMeta(Dataset):
    """Wrapper to apply transforms"""
    def __init__(self, base_dataset, transform):
        self.base = base_dataset
        self.transform = transform

    def __len__(self):
        return len(self.base)

    def __getitem__(self, idx):
        img, metadata, price = self.base[idx]
        return self.transform(img), metadata, price

print("✓ Model and dataset classes defined")

✓ Model and dataset classes defined


In [None]:
print("\n" + "="*60)
print("CREATING DATASETS AND DATALOADERS")
print("="*60)

# Create datasets with metadata
train_dataset_fusion = ASOSMemDatasetWithMeta(train_df, IMG_DIR, X_meta_train)
val_dataset_fusion = ASOSMemDatasetWithMeta(val_df, IMG_DIR, X_meta_val)
test_dataset_fusion = ASOSMemDatasetWithMeta(test_df, IMG_DIR, X_meta_test)

# Create data loaders
train_loader_fusion = DataLoader(
    TransformedDatasetWithMeta(train_dataset_fusion, train_transforms),
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

val_loader_fusion = DataLoader(
    TransformedDatasetWithMeta(val_dataset_fusion, val_transforms),
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

test_loader_fusion = DataLoader(
    TransformedDatasetWithMeta(test_dataset_fusion, val_transforms),
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

print(f"✓ Train batches: {len(train_loader_fusion)}")
print(f"✓ Val batches:   {len(val_loader_fusion)}")
print(f"✓ Test batches:  {len(test_loader_fusion)}")


CREATING DATASETS AND DATALOADERS
Caching 23976 images with metadata...


100%|██████████| 23976/23976 [21:41<00:00, 18.43img/s]


✓ Loaded 23976 valid samples
Caching 2997 images with metadata...


100%|██████████| 2997/2997 [02:42<00:00, 18.40img/s]


✓ Loaded 2997 valid samples
Caching 2998 images with metadata...


100%|██████████| 2998/2998 [02:43<00:00, 18.35img/s]


✓ Loaded 2998 valid samples
✓ Train batches: 188
✓ Val batches:   24
✓ Test batches:  24


In [None]:
print("\n" + "="*60)
print("BUILDING MID-FUSION MODEL")
print("="*60)

fusion_model = MidFusionModel(
    metadata_dim=metadata_feature_dim,
    hidden_dim=HIDDEN_DIM,
    num_layers=NUM_LAYERS,
    dropout_rate=DROPOUT_RATE
).to(DEVICE)

# Freeze backbone for stage 1
fusion_model.freeze_backbone()

print(f"✓ Model created")
print(f"  - Image feature dim: {fusion_model.image_feature_dim}")
print(f"  - Metadata dim: {metadata_feature_dim}")
print(f"  - Combined dim: {fusion_model.image_feature_dim + metadata_feature_dim}")
print(f"  - Backbone frozen: {not next(fusion_model.backbone.parameters()).requires_grad}")

# Setup training
criterion = nn.MSELoss()
scaler = torch.amp.GradScaler('cuda')


BUILDING MID-FUSION MODEL
✓ Model created
  - Image feature dim: 768
  - Metadata dim: 97
  - Combined dim: 865
  - Backbone frozen: True


In [None]:
print("\n" + "="*60)
print("STAGE 1: TRAINING FUSION HEAD (Backbone Frozen)")
print("="*60)

optimizer = optim.AdamW(
    fusion_model.fusion_head.parameters(),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

for epoch in range(EPOCHS_STAGE_1):
    fusion_model.train()
    epoch_loss = 0.0
    loop = tqdm(train_loader_fusion, desc=f"Stage 1 Epoch {epoch+1}/{EPOCHS_STAGE_1}")

    for images, metadata, prices in loop:
        images = images.to(DEVICE)
        metadata = metadata.to(DEVICE)
        prices = prices.to(DEVICE)

        optimizer.zero_grad()

        with torch.amp.autocast('cuda'):
            outputs = fusion_model(images, metadata)
            loss = criterion(outputs.squeeze(), prices)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    # Validation
    fusion_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, metadata, prices in val_loader_fusion:
            images = images.to(DEVICE)
            metadata = metadata.to(DEVICE)
            prices = prices.to(DEVICE)

            with torch.amp.autocast('cuda'):
                outputs = fusion_model(images, metadata)
                val_loss += criterion(outputs.squeeze(), prices).item()

    val_loss /= len(val_loader_fusion)
    avg_train_loss = epoch_loss / len(train_loader_fusion)

    print(f"  Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Val Loss = {val_loss:.4f}")
    scheduler.step(val_loss)

# Save Stage 1
torch.save(fusion_model.state_dict(), os.path.join(SAVE_DIR, 'fusion_stage1.pth'))
print("✓ Stage 1 checkpoint saved")


STAGE 1: TRAINING FUSION HEAD (Backbone Frozen)


Stage 1 Epoch 1/20: 100%|██████████| 188/188 [01:11<00:00,  2.64it/s, loss=0.293]


  Epoch 1: Train Loss = 0.9451, Val Loss = 0.1858


Stage 1 Epoch 2/20: 100%|██████████| 188/188 [01:05<00:00,  2.86it/s, loss=0.499]


  Epoch 2: Train Loss = 0.2475, Val Loss = 0.1970


Stage 1 Epoch 3/20: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.522]


  Epoch 3: Train Loss = 0.2263, Val Loss = 0.1470


Stage 1 Epoch 4/20: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.21]


  Epoch 4: Train Loss = 0.2069, Val Loss = 0.1391


Stage 1 Epoch 5/20: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.488]


  Epoch 5: Train Loss = 0.1931, Val Loss = 0.1767


Stage 1 Epoch 6/20: 100%|██████████| 188/188 [01:06<00:00,  2.85it/s, loss=0.216]


  Epoch 6: Train Loss = 0.1793, Val Loss = 0.1450


Stage 1 Epoch 7/20: 100%|██████████| 188/188 [01:06<00:00,  2.84it/s, loss=0.221]


  Epoch 7: Train Loss = 0.1778, Val Loss = 0.1470


Stage 1 Epoch 8/20: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.2]


  Epoch 8: Train Loss = 0.1687, Val Loss = 0.1437


Stage 1 Epoch 9/20: 100%|██████████| 188/188 [01:06<00:00,  2.81it/s, loss=0.195]


  Epoch 9: Train Loss = 0.1495, Val Loss = 0.1398


Stage 1 Epoch 10/20: 100%|██████████| 188/188 [01:06<00:00,  2.82it/s, loss=0.138]


  Epoch 10: Train Loss = 0.1458, Val Loss = 0.1369


Stage 1 Epoch 11/20: 100%|██████████| 188/188 [01:06<00:00,  2.82it/s, loss=0.131]


  Epoch 11: Train Loss = 0.1422, Val Loss = 0.1354


Stage 1 Epoch 12/20: 100%|██████████| 188/188 [01:05<00:00,  2.85it/s, loss=0.174]


  Epoch 12: Train Loss = 0.1420, Val Loss = 0.1325


Stage 1 Epoch 13/20: 100%|██████████| 188/188 [01:06<00:00,  2.84it/s, loss=0.0874]


  Epoch 13: Train Loss = 0.1382, Val Loss = 0.1454


Stage 1 Epoch 14/20: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.15]


  Epoch 14: Train Loss = 0.1370, Val Loss = 0.1234


Stage 1 Epoch 15/20: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.0989]


  Epoch 15: Train Loss = 0.1362, Val Loss = 0.1370


Stage 1 Epoch 16/20: 100%|██████████| 188/188 [01:06<00:00,  2.82it/s, loss=0.103]


  Epoch 16: Train Loss = 0.1357, Val Loss = 0.1425


Stage 1 Epoch 17/20: 100%|██████████| 188/188 [01:06<00:00,  2.82it/s, loss=0.182]


  Epoch 17: Train Loss = 0.1325, Val Loss = 0.1495


Stage 1 Epoch 18/20: 100%|██████████| 188/188 [01:06<00:00,  2.82it/s, loss=0.118]


  Epoch 18: Train Loss = 0.1296, Val Loss = 0.1261


Stage 1 Epoch 19/20: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.176]


  Epoch 19: Train Loss = 0.1220, Val Loss = 0.1218


Stage 1 Epoch 20/20: 100%|██████████| 188/188 [01:06<00:00,  2.83it/s, loss=0.126]


  Epoch 20: Train Loss = 0.1186, Val Loss = 0.1196
✓ Stage 1 checkpoint saved


In [None]:
print("\n" + "="*60)
print("STAGE 2: FINE-TUNING (Unfreezing Last 2 Blocks)")
print("="*60)

fusion_model.unfreeze_backbone_layers(num_blocks=2)

optimizer = optim.AdamW(
    fusion_model.parameters(),
    lr=LR_FINE_TUNE,
    weight_decay=WEIGHT_DECAY
)

best_mae = float('inf')

for epoch in range(EPOCHS_STAGE_2):
    fusion_model.train()
    epoch_loss = 0.0
    loop = tqdm(train_loader_fusion, desc=f"Stage 2 Epoch {epoch+1}/{EPOCHS_STAGE_2}")

    for images, metadata, prices in loop:
        images = images.to(DEVICE)
        metadata = metadata.to(DEVICE)
        prices = prices.to(DEVICE)

        optimizer.zero_grad()

        with torch.amp.autocast('cuda'):
            outputs = fusion_model(images, metadata)
            loss = criterion(outputs.squeeze(), prices)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    # Validation with real dollar MAE
    fusion_model.eval()
    val_preds = []
    val_targets = []

    with torch.no_grad():
        for images, metadata, prices in val_loader_fusion:
            images = images.to(DEVICE)
            metadata = metadata.to(DEVICE)

            with torch.amp.autocast('cuda'):
                outputs = fusion_model(images, metadata)

            val_preds.extend(outputs.squeeze().float().cpu().numpy())
            val_targets.extend(prices.cpu().numpy())

    # Convert from log space to dollars
    real_preds = np.expm1(np.clip(val_preds, 0, 10))
    real_targets = np.expm1(val_targets)
    val_mae = np.mean(np.abs(real_preds - real_targets))
    avg_train_loss = epoch_loss / len(train_loader_fusion)

    print(f"  Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Val MAE = £{val_mae:.2f}")

    if val_mae < best_mae:
        best_mae = val_mae
        torch.save(fusion_model.state_dict(), os.path.join(SAVE_DIR, 'fusion_best.pth'))
        print(f"  ✓ New best model saved! MAE = £{val_mae:.2f}")

print("\n" + "="*60)
print("TRAINING COMPLETE!")
print(f"Best Validation MAE: £{best_mae:.2f}")
print("="*60)


STAGE 2: FINE-TUNING (Unfreezing Last 2 Blocks)


Stage 2 Epoch 1/20: 100%|██████████| 188/188 [01:11<00:00,  2.63it/s, loss=0.303]


  Epoch 1: Train Loss = 0.1144, Val MAE = £10.67
  ✓ New best model saved! MAE = £10.67


Stage 2 Epoch 2/20: 100%|██████████| 188/188 [01:08<00:00,  2.76it/s, loss=0.0868]


  Epoch 2: Train Loss = 0.1091, Val MAE = £10.60
  ✓ New best model saved! MAE = £10.60


Stage 2 Epoch 3/20: 100%|██████████| 188/188 [01:08<00:00,  2.74it/s, loss=0.181]


  Epoch 3: Train Loss = 0.1085, Val MAE = £10.63


Stage 2 Epoch 4/20: 100%|██████████| 188/188 [01:07<00:00,  2.79it/s, loss=0.126]


  Epoch 4: Train Loss = 0.1078, Val MAE = £10.64


Stage 2 Epoch 5/20: 100%|██████████| 188/188 [01:08<00:00,  2.76it/s, loss=0.26]


  Epoch 5: Train Loss = 0.1082, Val MAE = £10.65


Stage 2 Epoch 6/20: 100%|██████████| 188/188 [01:07<00:00,  2.78it/s, loss=0.0897]


  Epoch 6: Train Loss = 0.1046, Val MAE = £10.57
  ✓ New best model saved! MAE = £10.57


Stage 2 Epoch 7/20: 100%|██████████| 188/188 [01:08<00:00,  2.76it/s, loss=0.0813]


  Epoch 7: Train Loss = 0.1023, Val MAE = £10.64


Stage 2 Epoch 8/20: 100%|██████████| 188/188 [01:08<00:00,  2.76it/s, loss=0.0886]


  Epoch 8: Train Loss = 0.1029, Val MAE = £10.63


Stage 2 Epoch 9/20: 100%|██████████| 188/188 [01:07<00:00,  2.77it/s, loss=0.151]


  Epoch 9: Train Loss = 0.1017, Val MAE = £10.64


Stage 2 Epoch 10/20: 100%|██████████| 188/188 [01:07<00:00,  2.77it/s, loss=0.0753]


  Epoch 10: Train Loss = 0.1009, Val MAE = £10.61


Stage 2 Epoch 11/20: 100%|██████████| 188/188 [01:07<00:00,  2.78it/s, loss=0.0703]


  Epoch 11: Train Loss = 0.1004, Val MAE = £10.68


Stage 2 Epoch 12/20: 100%|██████████| 188/188 [01:07<00:00,  2.78it/s, loss=0.17]


  Epoch 12: Train Loss = 0.0991, Val MAE = £10.65


Stage 2 Epoch 13/20: 100%|██████████| 188/188 [01:07<00:00,  2.78it/s, loss=0.138]


  Epoch 13: Train Loss = 0.0980, Val MAE = £10.66


Stage 2 Epoch 14/20: 100%|██████████| 188/188 [01:08<00:00,  2.76it/s, loss=0.0841]


  Epoch 14: Train Loss = 0.0960, Val MAE = £10.65


Stage 2 Epoch 15/20: 100%|██████████| 188/188 [01:08<00:00,  2.76it/s, loss=0.0542]


  Epoch 15: Train Loss = 0.0937, Val MAE = £10.64


Stage 2 Epoch 16/20: 100%|██████████| 188/188 [01:07<00:00,  2.77it/s, loss=0.105]


  Epoch 16: Train Loss = 0.0946, Val MAE = £10.71


Stage 2 Epoch 17/20: 100%|██████████| 188/188 [01:07<00:00,  2.78it/s, loss=0.115]


  Epoch 17: Train Loss = 0.0946, Val MAE = £10.77


Stage 2 Epoch 18/20: 100%|██████████| 188/188 [01:07<00:00,  2.77it/s, loss=0.136]


  Epoch 18: Train Loss = 0.0935, Val MAE = £10.66


Stage 2 Epoch 19/20: 100%|██████████| 188/188 [01:07<00:00,  2.77it/s, loss=0.0707]


  Epoch 19: Train Loss = 0.0921, Val MAE = £10.72


Stage 2 Epoch 20/20: 100%|██████████| 188/188 [01:08<00:00,  2.76it/s, loss=0.0824]


  Epoch 20: Train Loss = 0.0911, Val MAE = £10.69

TRAINING COMPLETE!
Best Validation MAE: £10.57


In [None]:
print("\n" + "="*60)
print("EVALUATING ON TEST SET")
print("="*60)

# Load best model
fusion_model.load_state_dict(torch.load(os.path.join(SAVE_DIR, 'fusion_best.pth')))
fusion_model.eval()

test_preds = []
test_targets = []

with torch.no_grad():
    for images, metadata, prices in tqdm(test_loader_fusion, desc="Test Inference"):
        images = images.to(DEVICE)
        metadata = metadata.to(DEVICE)

        with torch.amp.autocast('cuda'):
            outputs = fusion_model(images, metadata)

        test_preds.extend(outputs.squeeze().float().cpu().numpy())
        test_targets.extend(prices.cpu().numpy())

# Convert to dollars
test_preds_dollar = np.expm1(np.clip(test_preds, 0, 10))
test_targets_dollar = np.expm1(test_targets)

# Calculate metrics
mae = mean_absolute_error(test_targets_dollar, test_preds_dollar)
rmse = np.sqrt(mean_squared_error(test_targets_dollar, test_preds_dollar))
r2 = r2_score(test_targets_dollar, test_preds_dollar)

print("\n" + "="*60)
print("FINAL TEST SET RESULTS - MID-FUSION MODEL")
print("="*60)
print(f"MAE:  £{mae:.2f}")
print(f"RMSE: £{rmse:.2f}")
print(f"R²:   {r2:.4f}")
print("="*60)


EVALUATING ON TEST SET


Test Inference: 100%|██████████| 24/24 [00:08<00:00,  2.83it/s]


FINAL TEST SET RESULTS - MID-FUSION MODEL
MAE:  £11.03
RMSE: £19.88
R²:   0.6706



