In [36]:
import os
import re
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from PIL import Image
# Ensure you have AutoTokenizer, AutoImageProcessor
from transformers import AutoTokenizer, AutoImageProcessor, get_linear_schedule_with_warmup
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F # New import for loss function

In [37]:
from src2.utils import download_images, smape
from src2.model import MultiModalPricer

In [38]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Using device: cuda


In [39]:
TEXT_MODEL_NAME = "distilbert-base-uncased"
IMAGE_MODEL_NAME = "google/vit-base-patch16-224-in21k"

In [40]:
# --- Optimization Hyperparameters ---
NUM_EPOCHS = 7
BATCH_SIZE = 32
VAL_SPLIT_SIZE = 0.15 # Define split size for data loading
RANDOM_STATE = 42

# Tiered Learning Rates (CRITICAL for fine-tuning)
LR_ENCODERS = 3e-5   # 0.00003: Small step for pre-trained weights
LR_HEAD = 3e-4       # 0.0003: 10x higher for new, randomly initialized layers

# Optimizer Settings
WEIGHT_DECAY = 0.01
WARMUP_RATIO = 0.05  # 5% of training steps for learning rate warm-up

# Loss Function Weighting
SMAPE_WEIGHT = 0.8   # Weight for the SMAPE component in the Hybrid Loss
HUBER_DELTA = 1.0    # Delta for the Huber Loss (nn.SmoothL1Loss)

# Model Settings
MAX_SEQ_LENGTH = 128
# Ensure the training loop patience matches the hyperparameter strategy
EARLY_STOPPING_PATIENCE = 3

In [42]:
TRAIN_CSV_PATH = 'dataset/train.csv'
TEST_CSV_PATH = 'dataset/test.csv'
TRAIN_IMAGE_DIR = 'images2/train/'
TEST_IMAGE_DIR = 'images2/test/'
MODEL_SAVE_PATH = 'best_model_01_hyper.pth'
SUBMISSION_PATH = 'test_out_01_hyper.csv'

In [43]:
# --- Custom Hybrid Loss Function (New Cell) ---

class SmoothSmapeLoss(nn.Module):
    """Calculates a differentiable approximation of the SMAPE metric on the original price scale."""
    def __init__(self, epsilon=1e-8):
        super().__init__()
        self.epsilon = epsilon
        
    def forward(self, y_pred_log, y_true_log):
        # 1. Inverse transform back to original price scale (Price = exp(log_price) - 1)
        y_pred_orig = torch.expm1(y_pred_log)
        y_true_orig = torch.expm1(y_true_log)
        
        # Prices must be non-negative
        y_pred_orig = F.relu(y_pred_orig)
        
        # Numerator: Absolute difference
        numerator = torch.abs(y_pred_orig - y_true_orig)
        
        # Denominator: Average of absolute true and predicted values + epsilon for stability
        denominator = (torch.abs(y_true_orig) + torch.abs(y_pred_orig)) / 2.0
        
        # SMAPE loss (ratio)
        loss = numerator / (denominator + self.epsilon)
        
        return torch.mean(loss)

class HybridLoss(nn.Module):
    """
    Hybrid Loss = (SMAPE_WEIGHT * SmoothSmapeLoss) + ((1 - SMAPE_WEIGHT) * HuberLoss)
    """
    def __init__(self, huber_delta=HUBER_DELTA, smape_weight=SMAPE_WEIGHT):
        super().__init__()
        self.smape_loss = SmoothSmapeLoss()
        # Huber Loss (SmoothL1Loss) applied to the stable, log-transformed data
        self.huber_loss = nn.SmoothL1Loss(reduction='mean', beta=huber_delta)
        self.smape_weight = smape_weight
        
    def forward(self, y_pred, y_true):
        smape_loss = self.smape_loss(y_pred, y_true)
        huber_loss = self.huber_loss(y_pred, y_true) 
        
        return self.smape_weight * smape_loss + (1 - self.smape_weight) * huber_loss

In [44]:
# --- Data Loading (Cell 8) ---
print("Loading data...")
train_df = pd.read_csv(TRAIN_CSV_PATH)
test_df = pd.read_csv(TEST_CSV_PATH)

# --- Feature Engineering Functions (Cell 10) ---
def extract_ipq(text):
    """Extracts Item Pack Quantity (IPQ) from text using regex."""
    if not isinstance(text, str):
        return 1.0
    # Look for patterns like "Pack of X", "IPQ: X", etc.
    match = re.search(r'(?:pack of|ipq:?|pk)\s*(\d+)', text, re.IGNORECASE)
    if match:
        return float(match.group(1))
    return 1.0 # Default to 1 if no pack size is found

# --- Apply Feature Engineering (Cell 11) ---
print("Performing feature engineering...")
train_df['ipq'] = train_df['catalog_content'].apply(extract_ipq)
test_df['ipq'] = test_df['catalog_content'].apply(extract_ipq)

# --- Target Transformation (Cell 13) ---
train_df['log_price'] = np.log1p(train_df['price'])

Loading data...
Performing feature engineering...


In [45]:
print(train_df.head())

   sample_id                                    catalog_content  \
0      33127  Item Name: La Victoria Green Taco Sauce Mild, ...   
1     198967  Item Name: Salerno Cookies, The Original Butte...   
2     261251  Item Name: Bear Creek Hearty Soup Bowl, Creamy...   
3      55858  Item Name: Judeeâ€™s Blue Cheese Powder 11.25 oz...   
4     292686  Item Name: kedem Sherry Cooking Wine, 12.7 Oun...   

                                          image_link  price  ipq  log_price  
0  https://m.media-amazon.com/images/I/51mo8htwTH...   4.89  6.0   1.773256  
1  https://m.media-amazon.com/images/I/71YtriIHAA...  13.12  4.0   2.647592  
2  https://m.media-amazon.com/images/I/51+PFEe-w-...   1.97  6.0   1.088562  
3  https://m.media-amazon.com/images/I/41mu0HAToD...  30.34  1.0   3.444895  
4  https://m.media-amazon.com/images/I/41sA037+Qv...  66.49  1.0   4.211979  


In [46]:
# --- 5. Custom PyTorch Dataset (Cell 19 - Modified) ---
class ProductDataset(Dataset):
    def __init__(self, df, image_dir, tokenizer, image_processor, is_train=True):
        self.df = df
        self.image_dir = image_dir
        self.tokenizer = tokenizer
        self.image_processor = image_processor
        self.is_train = is_train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # Text data
        text = row['catalog_content'] if isinstance(row['catalog_content'], str) else ""
        # Use MAX_SEQ_LENGTH constant
        text_inputs = self.tokenizer(text, padding='max_length', truncation=True, max_length=MAX_SEQ_LENGTH, return_tensors="pt")
        
        # Image data
        image_path = os.path.join(self.image_dir, f"{row['sample_id']}.jpg")
        try:
            image = Image.open(image_path).convert('RGB')
        except (IOError, FileNotFoundError):
            # Use a placeholder black image if the original is missing/corrupt
            image = Image.new('RGB', (224, 224), color='black')
        
        image_inputs = self.image_processor(images=image, return_tensors="pt")
        
        # Extra features
        ipq = torch.tensor(row['ipq'], dtype=torch.float32)

        # Prepare output dictionary
        data = {
            'input_ids': text_inputs['input_ids'].squeeze(0),
            'attention_mask': text_inputs['attention_mask'].squeeze(0),
            'pixel_values': image_inputs['pixel_values'].squeeze(0),
            'ipq': ipq
        }
        
        if self.is_train:
            data['target'] = torch.tensor(row['log_price'], dtype=torch.float32)
            
        return data

In [47]:
# --- 8. Initialize Model and Loss (Cell 24 - Modified) ---
model = MultiModalPricer(TEXT_MODEL_NAME, IMAGE_MODEL_NAME).to(DEVICE)

# Initialize the Hybrid Loss Function
LOSS_FN = HybridLoss(huber_delta=HUBER_DELTA, smape_weight=SMAPE_WEIGHT).to(DEVICE)

In [48]:
# --- New Cell: Tiered Optimizer and Scheduler Setup ---

# Calculate total number of training steps for the scheduler
total_batches = len(train_loader)
num_training_steps = total_batches * NUM_EPOCHS
num_warmup_steps = int(num_training_steps * WARMUP_RATIO)


# --- TIERED LEARNING RATE SETUP ---
# 1. Group parameters by desired learning rate and weight decay
param_optimizer = list(model.named_parameters())
encoder_params = []
head_params = []

for n, p in param_optimizer:
    # Check for the regression head layers (assumes names contain 'head', 'linear', or 'final')
    if any(keyword in n.lower() for keyword in ['head', 'linear', 'final']): 
        head_params.append(p)
    else:
        encoder_params.append(p)

# 2. Define grouped parameters for the optimizer
optimizer_grouped_parameters = [
    # Group 1: Encoders (Pre-trained layers - low LR)
    {'params': encoder_params, 'weight_decay': WEIGHT_DECAY, 'lr': LR_ENCODERS},
    # Group 2: Regression Head (New layers - high LR)
    {'params': head_params, 'weight_decay': WEIGHT_DECAY, 'lr': LR_HEAD},
]

# 3. Initialize AdamW Optimizer
optimizer = optim.AdamW(optimizer_grouped_parameters)

# 4. Initialize Linear Learning Rate Scheduler with Warmup
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

print(f"Total training steps: {num_training_steps}")
print(f"Warmup steps: {num_warmup_steps}")
print(f"Encoder LR: {LR_ENCODERS}, Head LR: {LR_HEAD}")

Total training steps: 14770
Warmup steps: 738
Encoder LR: 3e-05, Head LR: 0.0003


In [49]:
# --- 9. Training Loop (Cell 40 - Modified) ---
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

best_val_smape = float('inf')
epochs_no_improve = 0

for epoch in range(NUM_EPOCHS):
    # -----------------------------
    # ðŸ”¹ Training Phase
    # -----------------------------
    model.train()
    running_loss = 0.0
    train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Training]")
    
    for batch in train_pbar:
        # Move batch to device
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        pixel_values = batch['pixel_values'].to(DEVICE)
        ipq = batch['ipq'].to(DEVICE)
        targets = batch['target'].to(DEVICE)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask, pixel_values, ipq)
        
        # Use the Hybrid Loss
        loss = LOSS_FN(outputs.squeeze(), targets)
        
        loss.backward()
        
        # Optional: Gradient Clipping for stability
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        optimizer.step()
        scheduler.step() # <<< CRITICAL: Update the learning rate
        
        running_loss += loss.item()
        train_pbar.set_postfix({'loss': running_loss / (len(train_pbar) + 1e-8)})

    avg_train_loss = running_loss / len(train_loader)

    # -----------------------------
    # ðŸ”¹ Validation Phase
    # -----------------------------
    model.eval()
    val_preds = []
    val_targets = []
    
    with torch.no_grad():
        val_pbar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Validation]")
        for batch in val_pbar:
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            pixel_values = batch['pixel_values'].to(DEVICE)
            ipq = batch['ipq'].to(DEVICE)
            targets = batch['target']  # stay on CPU

            outputs = model(input_ids, attention_mask, pixel_values, ipq)
            val_preds.extend(outputs.squeeze().cpu().numpy())
            val_targets.extend(targets.numpy())

    # Convert back from log scale
    val_preds_orig_scale = np.expm1(val_preds)
    val_targets_orig_scale = np.expm1(val_targets)

    val_preds_orig_scale[val_preds_orig_scale < 0] = 0
    val_smape = smape(val_targets_orig_scale, val_preds_orig_scale)

    print(f"Epoch {epoch+1}/{NUM_EPOCHS}, "
          f"Train Loss: {avg_train_loss:.4f}, "
          f"Validation SMAPE: {val_smape:.4f}%")

    # -----------------------------
    # ðŸ”¹ Checkpointing & Early Stopping
    # -----------------------------
    if val_smape < best_val_smape:
        best_val_smape = val_smape
        epochs_no_improve = 0  # reset counter
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print(f" New best model saved with SMAPE: {best_val_smape:.4f}%")
    else:
        epochs_no_improve += 1
        print(f"No improvement for {epochs_no_improve} epoch(s).")

    # Stop if no improvement for 'patience' epochs
    if epochs_no_improve >= EARLY_STOPPING_PATIENCE:
        print(f"Early stopping triggered. Best SMAPE: {best_val_smape:.4f}%")
        break

Epoch 1/7 [Training]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2110/2110 [30:46<00:00,  1.14it/s, loss=0.6]
Epoch 1/7 [Validation]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 235/235 [01:57<00:00,  2.00it/s]


Epoch 1/7, Train Loss: 0.6000, Validation SMAPE: 57.6992%
 New best model saved with SMAPE: 57.6992%


Epoch 2/7 [Training]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2110/2110 [37:03<00:00,  1.05s/it, loss=0.476]
Epoch 2/7 [Validation]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 235/235 [02:00<00:00,  1.94it/s]


Epoch 2/7, Train Loss: 0.4760, Validation SMAPE: 50.6014%
 New best model saved with SMAPE: 50.6014%


Epoch 3/7 [Training]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2110/2110 [39:33<00:00,  1.12s/it, loss=0.422]
Epoch 3/7 [Validation]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 235/235 [01:58<00:00,  1.98it/s]


Epoch 3/7, Train Loss: 0.4218, Validation SMAPE: 47.7256%
 New best model saved with SMAPE: 47.7256%


Epoch 4/7 [Training]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2110/2110 [39:37<00:00,  1.13s/it, loss=0.371]
Epoch 4/7 [Validation]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 235/235 [02:00<00:00,  1.95it/s]


Epoch 4/7, Train Loss: 0.3713, Validation SMAPE: 46.9122%
 New best model saved with SMAPE: 46.9122%


Epoch 5/7 [Training]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2110/2110 [39:59<00:00,  1.14s/it, loss=0.328]
Epoch 5/7 [Validation]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 235/235 [01:58<00:00,  1.99it/s]


Epoch 5/7, Train Loss: 0.3275, Validation SMAPE: 46.6479%
 New best model saved with SMAPE: 46.6479%


Epoch 6/7 [Training]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2110/2110 [39:57<00:00,  1.14s/it, loss=0.29]
Epoch 6/7 [Validation]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 235/235 [01:58<00:00,  1.99it/s]


Epoch 6/7, Train Loss: 0.2898, Validation SMAPE: 46.0416%
 New best model saved with SMAPE: 46.0416%


Epoch 7/7 [Training]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2110/2110 [38:48<00:00,  1.10s/it, loss=0.26]
Epoch 7/7 [Validation]: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 235/235 [01:58<00:00,  1.99it/s]

Epoch 7/7, Train Loss: 0.2604, Validation SMAPE: 46.0416%
No improvement for 1 epoch(s).





In [50]:
# --- 10. Inference and Submission Generation (Cell 26) ---
print("Starting inference on the test set...")
# Load the best model weights
model.load_state_dict(torch.load(MODEL_SAVE_PATH))
model.to(DEVICE)
model.eval()

# --- Prediction (Cell 27) ---
test_predictions = []
with torch.no_grad():
    test_pbar = tqdm(test_loader, desc="Predicting on Test Set")
    for batch in test_pbar:
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        pixel_values = batch['pixel_values'].to(DEVICE)
        ipq = batch['ipq'].to(DEVICE)

        outputs = model(input_ids, attention_mask, pixel_values, ipq)
        test_predictions.extend(outputs.squeeze().cpu().numpy())

# Inverse transform predictions and ensure they are positive floats
final_prices = np.expm1(test_predictions)
final_prices[final_prices < 0] = 0  # Prices must be positive

# Create submission file
submission_df = pd.DataFrame({
    'sample_id': test_df['sample_id'],
    'price': final_prices.astype(float)
})

submission_df.to_csv(SUBMISSION_PATH, index=False)
print(f"\nSubmission file created successfully at '{SUBMISSION_PATH}'")

Starting inference on the test set...


Predicting on Test Set: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2344/2344 [1:13:16<00:00,  1.88s/it]


Submission file created successfully at 'test_out_01_hyper.csv'



