In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModel, AutoTokenizer
from torchvision import models, transforms
from PIL import Image
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

class Config:
    TRAIN_CSV_PATH = 'dataset/train.csv'
    TEST_CSV_PATH = 'dataset/test.csv'
    TRAIN_IMG_DIR = './src/complete_images/train_images/'
    TEST_IMG_DIR = './src/complete_images/test_images/'
    SUBMISSION_PATH = 'submission_efficientnet_bert.csv'
    MODEL_SAVE_PATH = 'best_efficientnet_bert_model.pth'
    
    # Model parameters
    IMAGE_MODEL = "efficientnet_b4"  # ~19M params
    TEXT_MODEL = "bert-base-uncased"  # ~110M params
    IMAGE_SIZE = 380
    MAX_TEXT_LENGTH = 256 
    
    BATCH_SIZE = 35  
    EPOCHS = 15
    LEARNING_RATE_IMG = 1e-4
    LEARNING_RATE_TEXT = 2e-5
    LEARNING_RATE_HEAD = 1e-3
    WEIGHT_DECAY = 1e-4
    DROPOUT = 0.35
    
    VAL_SPLIT = 0.15
    WARMUP_EPOCHS = 4
    
    USE_AUGMENTATION = True
    
    PATIENCE = 8
    
    USE_AMP = True
    
    SEED = 123  
    NUM_WORKERS = 4

def set_seed(seed=123):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(Config.SEED)

def get_train_transforms():
    return transforms.Compose([
        transforms.Resize((Config.IMAGE_SIZE, Config.IMAGE_SIZE)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

def get_val_transforms():
    return transforms.Compose([
        transforms.Resize((Config.IMAGE_SIZE, Config.IMAGE_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

class ProductPriceDataset(Dataset):
    def __init__(self, df, tokenizer, image_dir, mode='train', use_augmentation=None, testing=False):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.image_dir = image_dir
        self.mode = mode
        self.testing = testing  
        if use_augmentation is None:
            use_augmentation = (mode == 'train')
        
        if use_augmentation and mode == 'train':
            self.transform = get_train_transforms()
        else:
            self.transform = get_val_transforms()
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]  
        
        sample_id = row['sample_id']
        
        try:
            if self.testing: 
                image_filename = f"test_{sample_id}.jpg"
            else:
                image_filename = f"train_{sample_id}.jpg"
            
            image_path = os.path.join(self.image_dir, image_filename)
            
            if not os.path.exists(image_path):
                for ext in ['.png', '.jpeg', '.JPG', '.PNG']:
                    if self.testing: 
                        alt_path = os.path.join(self.image_dir, f"test_{sample_id}{ext}")
                    else:
                        alt_path = os.path.join(self.image_dir, f"train_{sample_id}{ext}")
                    if os.path.exists(alt_path):
                        image_path = alt_path
                        break
            
            image = Image.open(image_path).convert('RGB')
            image = self.transform(image)  
        except Exception as e:
            image = Image.new('RGB', (224, 224), color=(128, 128, 128))
            image = self.transform(image) 
            print(f"Warning: Could not load image for {sample_id}, using blank image")
        
        text = str(row['catalog_content'])
        if pd.isna(text) or text == 'nan':
            text = "No description available"
        
        encoding = self.tokenizer(
            text,
            padding='max_length',
            truncation=True,
            max_length=Config.MAX_TEXT_LENGTH,
            return_tensors='pt'
        )
        
        output = {
            'image': image,
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'sample_id': sample_id
        }
        
        if self.mode == 'train':
            price = float(row['price'])
            log_price = np.log1p(price)
            output['price'] = torch.tensor(log_price, dtype=torch.float32)
        
        return output

class DualEncoderPricePredictor(nn.Module):
    def __init__(self, image_model_name=Config.IMAGE_MODEL, text_model_name=Config.TEXT_MODEL, dropout=Config.DROPOUT):
        super().__init__()
        
        print(f"Loading image model: {image_model_name}")
        if image_model_name == "efficientnet_b4":
            self.image_encoder = models.efficientnet_b4(pretrained=True)
            image_feat_dim = 1792  # EfficientNet-B4 output
            self.image_encoder.classifier = nn.Identity()
        elif image_model_name == "efficientnet_b3":
            self.image_encoder = models.efficientnet_b3(pretrained=True)
            image_feat_dim = 1536
            self.image_encoder.classifier = nn.Identity()
        
        print(f"Loading text model: {text_model_name}")
        self.text_encoder = AutoModel.from_pretrained(text_model_name)
        text_feat_dim = 768 
        
        common_dim = 512
        self.image_projection = nn.Sequential(
            nn.Linear(image_feat_dim, common_dim),
            nn.LayerNorm(common_dim),
            nn.ReLU(),
            nn.Dropout(dropout * 0.5)
        )
        
        self.text_projection = nn.Sequential(
            nn.Linear(text_feat_dim, common_dim),
            nn.LayerNorm(common_dim),
            nn.ReLU(),
            nn.Dropout(dropout * 0.5)
        )
        
        self.cross_attention = nn.MultiheadAttention(
            embed_dim=common_dim,
            num_heads=8,
            dropout=dropout * 0.5,
            batch_first=True
        )
        
        self.fusion_head = nn.Sequential(
            nn.Linear(common_dim * 3, 768), 
            nn.LayerNorm(768),
            nn.GELU(),
            nn.Dropout(dropout),
            
            nn.Linear(768, 384),
            nn.LayerNorm(384),
            nn.GELU(),
            nn.Dropout(dropout),
            
            nn.Linear(384, 192),
            nn.GELU(),
            nn.Dropout(dropout * 0.5),
            
            nn.Linear(192, 1)
        )
        
        self._init_weights()
    
    def _init_weights(self):
        for module in [self.image_projection, self.text_projection, self.fusion_head]:
            for m in module.modules():
                if isinstance(m, nn.Linear):
                    nn.init.xavier_uniform_(m.weight)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
    
    def forward(self, images, input_ids, attention_mask):
        image_features = self.image_encoder(images) 
        image_proj = self.image_projection(image_features)  
        
        text_outputs = self.text_encoder(input_ids=input_ids, attention_mask=attention_mask)
        text_features = text_outputs.last_hidden_state[:, 0, :]  
        text_proj = self.text_projection(text_features)  
        
        image_proj_unsqueezed = image_proj.unsqueeze(1)  
        text_proj_unsqueezed = text_proj.unsqueeze(1)
        
        attended_features, _ = self.cross_attention(
            image_proj_unsqueezed,
            text_proj_unsqueezed,
            text_proj_unsqueezed
        )
        attended_features = attended_features.squeeze(1) 
        
        combined = torch.cat([image_proj, text_proj, attended_features], dim=1)
        
        log_price = self.fusion_head(combined)
        return log_price.squeeze(-1)

class SMAPELoss(nn.Module):
    def __init__(self, epsilon=1e-8):
        super().__init__()
        self.epsilon = epsilon
    
    def forward(self, pred, target):
        pred_original = torch.expm1(pred)
        target_original = torch.expm1(target)
        
        numerator = torch.abs(pred_original - target_original)
        denominator = (torch.abs(target_original) + torch.abs(pred_original)) / 2 + self.epsilon
        smape = torch.mean(numerator / denominator)
        
        return smape

def train_epoch(model, dataloader, optimizer, criterion, device, scaler=None, epoch=0, warmup_epochs=Config.WARMUP_EPOCHS):
    model.train()
    
    if epoch < warmup_epochs:
        for param in model.image_encoder.parameters():
            param.requires_grad = False
        for param in model.text_encoder.parameters():
            param.requires_grad = False
    else:
        for param in model.image_encoder.parameters():
            param.requires_grad = True
        for param in model.text_encoder.parameters():
            param.requires_grad = True
    
    total_loss = 0
    num_batches = 0
    
    progress_bar = tqdm(dataloader, desc=f"Training Epoch {epoch+1}")
    
    for batch in progress_bar:
        images = batch['image'].to(device)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        log_prices = batch['price'].to(device)
        
        optimizer.zero_grad()
        
        if scaler and Config.USE_AMP:
            with torch.cuda.amp.autocast():
                predictions = model(images, input_ids, attention_mask)
                loss = criterion(predictions, log_prices)
            
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
        else:
            predictions = model(images, input_ids, attention_mask)
            loss = criterion(predictions, log_prices)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        
        total_loss += loss.item()
        num_batches += 1
        progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    return total_loss / num_batches

def validate(model, dataloader, device):
    model.eval()
    
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Validating"):
            images = batch['image'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            log_prices = batch['price'].to(device)
            
            with torch.cuda.amp.autocast():
                predictions = model(images, input_ids, attention_mask)
            
            all_preds.extend(predictions.cpu().numpy())
            all_targets.extend(log_prices.cpu().numpy())
    
    all_preds = np.array(all_preds)
    all_targets = np.array(all_targets)
    
    preds_original = np.expm1(all_preds)
    targets_original = np.expm1(all_targets)
    preds_original = np.maximum(preds_original, 0.01)
    
    smape = np.mean(np.abs(preds_original - targets_original) / 
                    ((np.abs(targets_original) + np.abs(preds_original)) / 2 + 1e-8)) * 100
    
    mae = np.mean(np.abs(preds_original - targets_original))
    rmse = np.sqrt(np.mean((preds_original - targets_original) ** 2))
    
    return smape, mae, rmse

def predict(model, dataloader, device):
    model.eval()
    
    all_sample_ids = []
    all_predictions = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Predicting"):
            images = batch['image'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            sample_ids = batch['sample_id']
            
            with torch.cuda.amp.autocast():
                log_predictions = model(images, input_ids, attention_mask)
            
            predictions = torch.expm1(log_predictions)
            predictions = torch.clamp(predictions, min=0.01)
            
            all_sample_ids.extend(sample_ids)
            all_predictions.extend(predictions.cpu().numpy())
    
    return all_sample_ids, all_predictions


In [None]:
print("="*60)
print("EfficientNet-BERT Product Price Prediction")
print("Alternative Approach for Ensemble")
print("="*60)

num_gpus = torch.cuda.device_count()
print(f"Number of GPUs available: {num_gpus}")

gpu_index = 1 if num_gpus > 1 else 0
device = torch.device(f"cuda:{gpu_index}" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")
if device.type == 'cuda':
    print(f"GPU name: {torch.cuda.get_device_name(device)}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(1)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(1).total_memory / 1e9:.2f} GB")

# Load data
print(f"\nLoading training data from {Config.TRAIN_CSV_PATH}")
train_df = pd.read_csv(Config.TRAIN_CSV_PATH)
print(f"Training samples: {len(train_df)}")

print("\nPrice Statistics:")
print(f"Min: ${train_df['price'].min():.2f}")
print(f"Max: ${train_df['price'].max():.2f}")
print(f"Mean: ${train_df['price'].mean():.2f}")
print(f"Median: ${train_df['price'].median():.2f}")

train_data, val_data = train_test_split(
    train_df, 
    test_size=Config.VAL_SPLIT, 
    random_state=Config.SEED
)
print(f"\nTrain set: {len(train_data)} samples")
print(f"Validation set: {len(val_data)} samples")

print(f"\nInitializing tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(Config.TEXT_MODEL)
model = DualEncoderPricePredictor().to(device)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params/1e6:.2f}M")
print(f"Trainable parameters: {trainable_params/1e6:.2f}M")

print("\nCreating datasets with augmentation...")
train_dataset = ProductPriceDataset(
    train_data, tokenizer, Config.TRAIN_IMG_DIR, 
    mode='train', use_augmentation=Config.USE_AUGMENTATION
)
val_dataset = ProductPriceDataset(
    val_data, tokenizer, Config.TRAIN_IMG_DIR, 
    mode='train', use_augmentation=False
)

train_loader = DataLoader(
    train_dataset,
    batch_size=Config.BATCH_SIZE,
    shuffle=True,
    num_workers=Config.NUM_WORKERS,
    pin_memory=True,
    drop_last=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=Config.BATCH_SIZE,
    shuffle=False,
    num_workers=Config.NUM_WORKERS,
    pin_memory=True
)

criterion = SMAPELoss()

optimizer = torch.optim.AdamW([
    {'params': model.image_encoder.parameters(), 'lr': Config.LEARNING_RATE_IMG},
    {'params': model.text_encoder.parameters(), 'lr': Config.LEARNING_RATE_TEXT},
    {'params': model.image_projection.parameters(), 'lr': Config.LEARNING_RATE_HEAD},
    {'params': model.text_projection.parameters(), 'lr': Config.LEARNING_RATE_HEAD},
    {'params': model.cross_attention.parameters(), 'lr': Config.LEARNING_RATE_HEAD},
    {'params': model.fusion_head.parameters(), 'lr': Config.LEARNING_RATE_HEAD}
], weight_decay=Config.WEIGHT_DECAY)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min', 
    factor=0.5, 
    patience=3,
    min_lr=1e-7
)

scaler = torch.cuda.amp.GradScaler() if Config.USE_AMP else None

# Training loop
print(f"\nStarting training for {Config.EPOCHS} epochs...")
print(f"Warmup epochs: {Config.WARMUP_EPOCHS}")
print(f"Image size: {Config.IMAGE_SIZE}x{Config.IMAGE_SIZE}")
print(f"Data augmentation: {Config.USE_AUGMENTATION}")
print("-"*60)

best_smape = float('inf')
patience_counter = 0

for epoch in range(Config.EPOCHS):
    print(f"\nEpoch {epoch+1}/{Config.EPOCHS}")
    
    train_loss = train_epoch(
        model, train_loader, optimizer, criterion, device, scaler, epoch
    )
    
    val_smape, val_mae, val_rmse = validate(model, val_loader, device)
    
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Val SMAPE: {val_smape:.2f}%")
    print(f"Val MAE: ${val_mae:.2f}")
    print(f"Val RMSE: ${val_rmse:.2f}")
    
    scheduler.step(val_smape)
    
    if val_smape < best_smape:
        best_smape = val_smape
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'smape': val_smape
        }, Config.MODEL_SAVE_PATH)
        print(f"✓ Model saved! Best SMAPE: {best_smape:.2f}%")
        patience_counter = 0
    else:
        patience_counter += 1
        print(f"Patience: {patience_counter}/{Config.PATIENCE}")
    
    if patience_counter >= Config.PATIENCE:
        print("\nEarly stopping triggered!")
        break

print("\n" + "="*60)
print("Training completed!")
print(f"Best Validation SMAPE: {best_smape:.2f}%")
print("="*60)

print("\nLoading best model for test predictions...")
checkpoint = torch.load(Config.MODEL_SAVE_PATH, map_location=device, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])

print(f"\nLoading test data from {Config.TEST_CSV_PATH}")
test_df = pd.read_csv(Config.TEST_CSV_PATH)
print(f"Test samples: {len(test_df)}")

EfficientNet-BERT Product Price Prediction
Alternative Approach for Ensemble
Number of GPUs available: 2
Using device: cuda:1
GPU name: Tesla V100S-PCIE-32GB
GPU: Tesla V100S-PCIE-32GB
GPU Memory: 34.08 GB

Loading training data from dataset/train.csv
Training samples: 75000

Price Statistics:
Min: $0.13
Max: $2796.00
Mean: $23.65
Median: $14.00

Train set: 63750 samples
Validation set: 11250 samples

Initializing tokenizer and model...
Loading image model: efficientnet_b4
Loading text model: bert-base-uncased
Total parameters: 130.95M
Trainable parameters: 130.95M

Creating datasets with augmentation...

Starting training for 15 epochs...
Warmup epochs: 4
Image size: 380x380
Data augmentation: True
------------------------------------------------------------

Epoch 1/15


Training Epoch 1:  37%|███▋      | 682/1821 [12:22<13:34,  1.40it/s, loss=0.6421]



Training Epoch 1: 100%|██████████| 1821/1821 [34:08<00:00,  1.12s/it, loss=0.6925]
Validating: 100%|██████████| 322/322 [05:49<00:00,  1.09s/it]


Train Loss: 0.6814
Val SMAPE: 66.55%
Val MAE: $15.62
Val RMSE: $28.37
✓ Model saved! Best SMAPE: 66.55%

Epoch 2/15


Training Epoch 2:  71%|███████▏  | 1302/1821 [25:11<10:32,  1.22s/it, loss=0.7617]



Training Epoch 2: 100%|██████████| 1821/1821 [43:09<00:00,  1.42s/it, loss=0.5610]
Validating: 100%|██████████| 322/322 [06:08<00:00,  1.15s/it]


Train Loss: 0.6297
Val SMAPE: 59.63%
Val MAE: $13.77
Val RMSE: $28.05
✓ Model saved! Best SMAPE: 59.63%

Epoch 3/15


Training Epoch 3:  62%|██████▏   | 1134/1821 [24:57<12:28,  1.09s/it, loss=0.6186] 



Training Epoch 3: 100%|██████████| 1821/1821 [39:07<00:00,  1.29s/it, loss=0.4772]
Validating: 100%|██████████| 322/322 [05:51<00:00,  1.09s/it]


Train Loss: 0.6135
Val SMAPE: 58.75%
Val MAE: $13.59
Val RMSE: $28.55
✓ Model saved! Best SMAPE: 58.75%

Epoch 4/15


Training Epoch 4:  90%|█████████ | 1647/1821 [40:40<02:20,  1.24it/s, loss=0.6156] 



Training Epoch 4: 100%|██████████| 1821/1821 [44:44<00:00,  1.47s/it, loss=0.6593]
Validating: 100%|██████████| 322/322 [05:25<00:00,  1.01s/it]


Train Loss: 0.6035
Val SMAPE: 58.78%
Val MAE: $13.51
Val RMSE: $28.33
Patience: 1/8

Epoch 5/15


Training Epoch 5:   9%|▊         | 157/1821 [03:26<30:54,  1.11s/it, loss=0.5662] 



Training Epoch 5: 100%|██████████| 1821/1821 [37:21<00:00,  1.23s/it, loss=0.5122]  
Validating: 100%|██████████| 322/322 [07:34<00:00,  1.41s/it]


Train Loss: 0.5648
Val SMAPE: 50.54%
Val MAE: $11.47
Val RMSE: $24.13
✓ Model saved! Best SMAPE: 50.54%

Epoch 6/15


Training Epoch 6:  69%|██████▉   | 1265/1821 [35:21<10:24,  1.12s/it, loss=0.5208] 



Training Epoch 6: 100%|██████████| 1821/1821 [51:13<00:00,  1.69s/it, loss=0.4571]
Validating: 100%|██████████| 322/322 [05:12<00:00,  1.03it/s]


Train Loss: 0.5028
Val SMAPE: 48.04%
Val MAE: $11.08
Val RMSE: $23.30
✓ Model saved! Best SMAPE: 48.04%

Epoch 7/15


Training Epoch 7:   5%|▌         | 97/1821 [01:28<18:18,  1.57it/s, loss=0.3320] 



Training Epoch 7: 100%|██████████| 1821/1821 [44:12<00:00,  1.46s/it, loss=0.6107] 
Validating: 100%|██████████| 322/322 [05:37<00:00,  1.05s/it]


Train Loss: 0.4649
Val SMAPE: 48.04%
Val MAE: $10.91
Val RMSE: $22.46
Patience: 1/8

Epoch 8/15


Training Epoch 8:  50%|█████     | 911/1821 [19:09<24:56,  1.64s/it, loss=0.5060] 



Training Epoch 8: 100%|██████████| 1821/1821 [38:02<00:00,  1.25s/it, loss=0.5030]
Validating: 100%|██████████| 322/322 [05:30<00:00,  1.03s/it]


Train Loss: 0.4352
Val SMAPE: 45.71%
Val MAE: $10.32
Val RMSE: $22.84
✓ Model saved! Best SMAPE: 45.71%

Epoch 9/15


Training Epoch 9:  98%|█████████▊| 1778/1821 [40:21<00:46,  1.07s/it, loss=0.2862]



Training Epoch 9: 100%|██████████| 1821/1821 [41:28<00:00,  1.37s/it, loss=0.4466]
Validating: 100%|██████████| 322/322 [04:34<00:00,  1.17it/s]


Train Loss: 0.4071
Val SMAPE: 44.31%
Val MAE: $10.06
Val RMSE: $22.31
✓ Model saved! Best SMAPE: 44.31%

Epoch 10/15


Training Epoch 10:  72%|███████▏  | 1313/1821 [28:07<09:12,  1.09s/it, loss=0.4030]



Training Epoch 10: 100%|██████████| 1821/1821 [37:43<00:00,  1.24s/it, loss=0.4375]
Validating: 100%|██████████| 322/322 [03:24<00:00,  1.58it/s]


Train Loss: 0.3816
Val SMAPE: 45.24%
Val MAE: $10.10
Val RMSE: $22.61
Patience: 1/8

Epoch 11/15


Training Epoch 11:  50%|████▉     | 909/1821 [11:38<09:37,  1.58it/s, loss=0.2866]



Training Epoch 11:  50%|████▉     | 910/1821 [11:38<08:29,  1.79it/s, loss=0.2866]




Training Epoch 11: 100%|██████████| 1821/1821 [31:25<00:00,  1.04s/it, loss=0.4011]
Validating: 100%|██████████| 322/322 [03:16<00:00,  1.64it/s]


Train Loss: 0.3602
Val SMAPE: 45.05%
Val MAE: $10.65
Val RMSE: $22.42
Patience: 2/8

Epoch 12/15


Training Epoch 12:  26%|██▌       | 469/1821 [05:50<13:37,  1.65it/s, loss=0.3703]



Training Epoch 12: 100%|██████████| 1821/1821 [22:27<00:00,  1.35it/s, loss=0.2769]
Validating: 100%|██████████| 322/322 [03:11<00:00,  1.68it/s]


Train Loss: 0.3444
Val SMAPE: 44.30%
Val MAE: $10.01
Val RMSE: $21.91
✓ Model saved! Best SMAPE: 44.30%

Epoch 13/15


Training Epoch 13:   3%|▎         | 57/1821 [00:45<25:06,  1.17it/s, loss=0.2823] 



Training Epoch 13: 100%|██████████| 1821/1821 [22:29<00:00,  1.35it/s, loss=0.3153]
Validating: 100%|██████████| 322/322 [03:14<00:00,  1.66it/s]


Train Loss: 0.3249
Val SMAPE: 44.53%
Val MAE: $10.46
Val RMSE: $22.35
Patience: 1/8

Epoch 14/15


Training Epoch 14:  71%|███████▏  | 1299/1821 [15:56<05:09,  1.69it/s, loss=0.3848]



Training Epoch 14: 100%|██████████| 1821/1821 [22:19<00:00,  1.36it/s, loss=0.2852]
Validating: 100%|██████████| 322/322 [03:14<00:00,  1.66it/s]


Train Loss: 0.3071
Val SMAPE: 43.26%
Val MAE: $9.73
Val RMSE: $21.56
✓ Model saved! Best SMAPE: 43.26%

Epoch 15/15


Training Epoch 15:  14%|█▍        | 259/1821 [03:13<15:04,  1.73it/s, loss=0.2631]



Training Epoch 15: 100%|██████████| 1821/1821 [22:18<00:00,  1.36it/s, loss=0.2452]
Validating: 100%|██████████| 322/322 [03:17<00:00,  1.63it/s]


Train Loss: 0.2937
Val SMAPE: 43.17%
Val MAE: $9.67
Val RMSE: $21.04
✓ Model saved! Best SMAPE: 43.17%

Training completed!
Best Validation SMAPE: 43.17%

Loading best model for test predictions...

Loading test data from dataset/test.csv
Test samples: 75000


In [None]:
test_dataset = ProductPriceDataset(
    test_df, tokenizer, Config.TEST_IMG_DIR, 
    mode='test', use_augmentation=False, testing=True
)
test_loader = DataLoader(
    test_dataset,
    batch_size=Config.BATCH_SIZE,
    shuffle=False,
    num_workers=Config.NUM_WORKERS,
    pin_memory=True
)

In [None]:
print("\nGenerating predictions...")
sample_ids, predictions = predict(model, test_loader, device)

submission_df = pd.DataFrame({
    'sample_id': sample_ids,
    'price': predictions
})

if len(submission_df) != len(test_df):
    print(f"Warning: Prediction count mismatch!")
    print(f"Expected: {len(test_df)}, Got: {len(submission_df)}")

submission_df.to_csv(Config.SUBMISSION_PATH, index=False)
print(f"\n✓ Submission saved to {Config.SUBMISSION_PATH}")
print(f"Sample predictions:")
print(submission_df.head(10))

print("\n" + "="*60)
print("Done! Now ensemble with CLIP predictions!")
print("="*60)


Generating predictions...


Predicting:   0%|          | 0/2143 [00:00<?, ?it/s]

Predicting:   0%|          | 9/2143 [00:10<46:05,  1.30s/it]  



Predicting:   9%|▉         | 188/2143 [02:48<17:15,  1.89it/s]  



Predicting:   9%|▉         | 196/2143 [02:55<17:07,  1.90it/s]



Predicting:  56%|█████▌    | 1196/2143 [17:24<21:38,  1.37s/it]



Predicting:  56%|█████▌    | 1203/2143 [17:30<13:49,  1.13it/s]



Predicting:  59%|█████▉    | 1274/2143 [18:31<07:55,  1.83it/s]



Predicting:  60%|██████    | 1286/2143 [18:43<07:45,  1.84it/s]



Predicting:  67%|██████▋   | 1432/2143 [20:54<06:15,  1.89it/s]



Predicting:  83%|████████▎ | 1768/2143 [25:57<03:50,  1.63it/s]



Predicting:  87%|████████▋ | 1861/2143 [27:28<06:48,  1.45s/it]



Predicting:  88%|████████▊ | 1876/2143 [27:39<02:40,  1.66it/s]



Predicting:  90%|█████████ | 1937/2143 [28:39<03:50,  1.12s/it]



Predicting:  91%|█████████ | 1944/2143 [28:43<01:41,  1.95it/s]



Predicting: 100%|██████████| 2143/2143 [31:41<00:00,  1.13it/s]



✓ Submission saved to submission_efficientnet_bert.csv
Sample predictions:
        sample_id      price
0  tensor(100179)  14.132812
1  tensor(245611)  18.734375
2  tensor(146263)  17.937500
3   tensor(95658)   5.554688
4   tensor(36806)  22.484375
5  tensor(148239)   7.050781
6   tensor(92659)   3.765625
7    tensor(3780)  12.726562
8  tensor(196940)  15.976562
9   tensor(20472)   8.320312

Done! Now ensemble with CLIP predictions!


In [None]:
sample_ids = [int(id.item()) if hasattr(id, 'item') else int(id) for id in sample_ids]
predictions = [float(pred.item()) if hasattr(pred, 'item') else float(pred) for pred in predictions]

submission_df = pd.DataFrame({
    'sample_id': sample_ids,
    'price': predictions
})

if len(submission_df) != len(test_df):
    print(f"Warning: Prediction count mismatch!")
    print(f"Expected: {len(test_df)}, Got: {len(submission_df)}")

submission_df.to_csv(Config.SUBMISSION_PATH, index=False)
print(f"\n✓ Submission saved to {Config.SUBMISSION_PATH}")
print(f"Sample predictions:")
print(submission_df.head(10))

print("\n" + "="*60)
print("Done! Now ensemble with CLIP predictions!")
print("="*60)


✓ Submission saved to submission_efficientnet_bert.csv
Sample predictions:
   sample_id      price
0     100179  14.132812
1     245611  18.734375
2     146263  17.937500
3      95658   5.554688
4      36806  22.484375
5     148239   7.050781
6      92659   3.765625
7       3780  12.726562
8     196940  15.976562
9      20472   8.320312

Done! Now ensemble with CLIP predictions!
