# Final code for Latitude and Longitude

## Using only images for training and not Region_ID

In [6]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import timm
from PIL import Image
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import gc
from collections import Counter

# --------------------- Utility Functions ---------------------
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --------------------- Paths ---------------------
TRAIN_IMG_DIR  = '/kaggle/input/iiith-images-latlong-smai/images_train/images_train/images_train'
VALID_IMG_DIR  = '/kaggle/input/iiith-images-latlong-smai/images_val/images_val'
TEST_IMG_DIR   = '/kaggle/input/iiith-images-latlong-smai/images_test/images_test'
TRAIN_LABELS   = '/kaggle/input/iiith-images-latlong-smai/cleaned_data_train.csv'
VALID_LABELS   = '/kaggle/input/iiith-images-latlong-smai/labels_val_updated.csv'
OUTPUT_CSV     = 'predictions_predict2.csv'
ANOMALIES      = [95,145,146,158,159,160,161]

# --------------------- Read DataFrames ---------------------
train_df = pd.read_csv(TRAIN_LABELS)
valid_df = pd.read_csv(VALID_LABELS)
valid_df['image_id'] = valid_df['filename'].apply(lambda x: int(x.split('_')[1].split('.')[0].lstrip('0') or '0'))
valid_df = valid_df[~valid_df['image_id'].isin(ANOMALIES)].reset_index(drop=True)

# --------------------- Debug: Check Dataset Sizes ---------------------
print(f"Original valid_df size: {len(valid_df)}")

# --------------------- Debug: Check file extensions ---------------------
def get_file_extensions(directory):
    extensions = []
    for filename in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, filename)):
            ext = os.path.splitext(filename)[1].lower()
            extensions.append(ext)
    return Counter(extensions)

# Test if directories exist
print(f"VALID_IMG_DIR exists: {os.path.exists(VALID_IMG_DIR)}")
print(f"TEST_IMG_DIR exists: {os.path.exists(TEST_IMG_DIR)}")

# Only check extensions if directories exist
if os.path.exists(VALID_IMG_DIR):
    print(f"Valid image extensions: {get_file_extensions(VALID_IMG_DIR)}")
if os.path.exists(TEST_IMG_DIR):
    print(f"Test image extensions: {get_file_extensions(TEST_IMG_DIR)}")
    
def count_files_in_folder(folder_path):
    return sum(1 for entry in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, entry)))

# Example usage
print(f"Number of files in '{TEST_IMG_DIR}': {count_files_in_folder(TEST_IMG_DIR)}")

# --------------------- Scaling ---------------------
lat_scaler = StandardScaler().fit(train_df[['latitude']])
long_scaler = StandardScaler().fit(train_df[['longitude']])
for df in [train_df, valid_df]:
    df['scaled_lat'] = lat_scaler.transform(df[['latitude']])
    df['scaled_lon'] = long_scaler.transform(df[['longitude']])

# --------------------- Transforms ---------------------
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ColorJitter(0.3,0.3,0.3,0.05),
    transforms.RandomAffine(20, translate=(0.15,0.15), scale=(0.85,1.15)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
    transforms.RandomErasing(0.2, scale=(0.02,0.15), ratio=(0.3,3.3))
])
val_test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

# --------------------- Dataset ---------------------
class GeoDataset(Dataset):
    def __init__(self, img_dir, df, transform=None, is_test=False):
        self.img_dir = img_dir
        self.df = df.copy()
        self.transform = transform
        self.is_test = is_test
        self.df = self.df[self.df['filename'].apply(lambda fn: os.path.exists(os.path.join(img_dir, fn)))].reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.img_dir, row['filename'])).convert('RGB')
        x = self.transform(img)
        if self.is_test:
            return {'image': x, 'image_id': int(row['filename'].split('_')[1].split('.')[0].lstrip('0') or '0')}
        return {
            'image': x,
            'scaled_lat': torch.tensor(row['scaled_lat'], dtype=torch.float32),
            'scaled_lon': torch.tensor(row['scaled_lon'], dtype=torch.float32),
            'latitude': torch.tensor(row['latitude'], dtype=torch.float32),
            'longitude': torch.tensor(row['longitude'], dtype=torch.float32)
        }

# --------------------- DataLoaders ---------------------
batch_size = 16
train_loader = DataLoader(GeoDataset(TRAIN_IMG_DIR, train_df, train_transform), batch_size, shuffle=True, num_workers=4, pin_memory=True)
valid_loader = DataLoader(GeoDataset(VALID_IMG_DIR, valid_df, val_test_transform), 32, shuffle=False, num_workers=4, pin_memory=True)
test_loader  = DataLoader(GeoDataset(TEST_IMG_DIR, valid_df, val_test_transform, is_test=True), 32, shuffle=False, num_workers=4, pin_memory=True)

# --------------------- Model ---------------------
class SwinGeoWithoutRegion(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model('swin_base_patch4_window7_224', pretrained=True, num_classes=0, global_pool='avg')
        feat = self.backbone.num_features
        self.fuse = nn.Sequential(
            nn.Linear(feat, 1024), nn.LayerNorm(1024), nn.GELU(), nn.Dropout(0.2),
            nn.Linear(1024, 512), nn.LayerNorm(512), nn.GELU(), nn.Dropout(0.2)
        )
        self.lat_head = nn.Sequential(nn.Linear(512, 128), nn.GELU(), nn.Linear(128, 1))
        self.lon_head = nn.Sequential(nn.Linear(512, 128), nn.GELU(), nn.Linear(128, 1))
    def forward(self, x):
        feats = self.backbone(x)
        h = self.fuse(feats)
        return self.lat_head(h).squeeze(-1), self.lon_head(h).squeeze(-1)

model = SwinGeoWithoutRegion().to(device)

# --------------------- Loss, Optimizer & Scheduler ---------------------
class GeoLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    def forward(self, pred_lat, pred_lon, true_lat, true_lon):
        return self.mse(pred_lat, true_lat) + self.mse(pred_lon, true_lon)

criterion = GeoLoss()
params_backbone, params_new = [], []
for name, param in model.named_parameters():
    if 'backbone' in name:
        params_backbone.append(param)
    else:
        params_new.append(param)

optimizer = optim.AdamW([
    {'params': params_backbone, 'lr': 1e-5},
    {'params': params_new,      'lr': 2e-4}
], weight_decay=1e-2)
scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=[3e-5,5e-4], steps_per_epoch=len(train_loader), epochs=30)

# --------------------- Training & Evaluation ---------------------
def train_eval():
    best_mse = float('inf')
    scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None
    history = {'train_loss': [], 'train_lat_loss': [], 'train_lon_loss': [],
               'val_loss': [], 'val_lat_loss': [], 'val_lon_loss': [],
               'val_unscaled_mse': [], 'lr': []}

    for epoch in range(1, 31):
        model.train()
        running_loss = running_lat = running_lon = 0.0
        count = 0
        pbar = tqdm(train_loader, desc=f"Epoch {epoch} Train")
        for batch in pbar:
            imgs = batch['image'].to(device)
            lat_s = batch['scaled_lat'].to(device)
            lon_s = batch['scaled_lon'].to(device)
            
            # Forward pass without region ID
            with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
                p_lat, p_lon = model(imgs)
                loss = criterion(p_lat, p_lon, lat_s, lon_s)
                lat_loss = nn.MSELoss()(p_lat, lat_s)
                lon_loss = nn.MSELoss()(p_lon, lon_s)
            
            optimizer.zero_grad()
            if scaler:
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
            else:
                loss.backward()
                optimizer.step()
            scheduler.step()

            bs = imgs.size(0)
            running_loss += loss.item() * bs
            running_lat += lat_loss.item() * bs
            running_lon += lon_loss.item() * bs
            count += bs
            lr = optimizer.param_groups[0]['lr']
            pbar.set_postfix({'loss': loss.item(), 'lat': lat_loss.item(), 'lon': lon_loss.item(), 'lr': lr})

        epoch_loss = running_loss / count
        epoch_lat = running_lat / count
        epoch_lon = running_lon / count
        history['train_loss'].append(epoch_loss)
        history['train_lat_loss'].append(epoch_lat)
        history['train_lon_loss'].append(epoch_lon)
        history['lr'].append(lr)
        print(f"Epoch {epoch} TRAIN -> Loss: {epoch_loss:.6f}, Lat: {epoch_lat:.6f}, Lon: {epoch_lon:.6f}, LR: {lr:.6e}")

        model.eval()
        v_loss = v_lat = v_lon = 0.0
        v_count = 0
        all_preds, all_true = [], []
        vbar = tqdm(valid_loader, desc=f"Epoch {epoch} Val")
        with torch.no_grad():
            for batch in vbar:
                imgs = batch['image'].to(device)
                
                # Forward pass without region ID
                p_lat, p_lon = model(imgs)
                lat_s = batch['scaled_lat'].to(device)
                lon_s = batch['scaled_lon'].to(device)

                loss = criterion(p_lat, p_lon, lat_s, lon_s)
                lat_loss = nn.MSELoss()(p_lat, lat_s)
                lon_loss = nn.MSELoss()(p_lon, lon_s)

                ulat = lat_scaler.inverse_transform(p_lat.cpu().numpy().reshape(-1,1)).flatten()
                ulon = long_scaler.inverse_transform(p_lon.cpu().numpy().reshape(-1,1)).flatten()
                all_preds.append(np.vstack([ulat, ulon]).T)
                tr_lat = batch['latitude'].numpy()
                tr_lon = batch['longitude'].numpy()
                all_true.append(np.vstack([tr_lat, tr_lon]).T)

                bs = imgs.size(0)
                v_loss += loss.item() * bs
                v_lat += lat_loss.item() * bs
                v_lon += lon_loss.item() * bs
                v_count += bs
                vbar.set_postfix({'v_loss': loss.item(), 'v_lat': lat_loss.item(), 'v_lon': lon_loss.item()})

        val_loss = v_loss / v_count
        val_lat = v_lat / v_count
        val_lon = v_lon / v_count
        preds = np.concatenate(all_preds)
        true  = np.concatenate(all_true)
        unscaled_mse = ((preds - true)**2).mean()

        history['val_loss'].append(val_loss)
        history['val_lat_loss'].append(val_lat)
        history['val_lon_loss'].append(val_lon)
        history['val_unscaled_mse'].append(unscaled_mse)

        print(f"Epoch {epoch} VAL   -> Loss: {val_loss:.6f}, Lat: {val_lat:.6f}, Lon: {val_lon:.6f}, Unscaled MSE: {unscaled_mse:.6f}")

        if unscaled_mse < best_mse:
            best_mse = unscaled_mse
            torch.save(model.state_dict(), 'best_geo2.pth')

        torch.cuda.empty_cache(); gc.collect()

    model.load_state_dict(torch.load('best_geo2.pth'))
    return history

def generate_csv(best_mse):
    rows = []
    model.eval()
    
    # Create a list to track processed IDs to avoid duplicates
    processed_ids = set()
    
    print("\n--- Starting CSV Generation ---")
    print(f"Valid loader dataset size: {len(valid_loader.dataset)}")
    
    # Process validation data with more detailed tracking
    valid_count = 0
    with torch.no_grad():
        for batch in tqdm(valid_loader, desc="Processing validation data"):
            imgs = batch['image'].to(device)
            
            # Forward pass without region ID
            p_lat, p_lon = model(imgs)
            ulat = lat_scaler.inverse_transform(p_lat.cpu().numpy().reshape(-1,1)).flatten()
            ulon = long_scaler.inverse_transform(p_lon.cpu().numpy().reshape(-1,1)).flatten()
            
            # Process each image in the batch
            batch_size = imgs.size(0)
            for i in range(batch_size):
                if i + valid_count >= len(valid_loader.dataset):
                    continue
                    
                # Get the image ID
                image_id = valid_loader.dataset.df.iloc[valid_count + i]['image_id']
                
                # Check if we've already processed this ID
                if image_id in processed_ids:
                    print(f"Warning: Duplicate ID {image_id} in validation data")
                    continue
                    
                processed_ids.add(image_id)
                rows.append({
                    'id': image_id, 
                    'Latitude': ulat[i], 
                    'Longitude': ulon[i]
                })
            
            valid_count += batch_size
    
    print(f"Processed {valid_count} validation images, added {len(processed_ids)} unique IDs")
    
    # Process test data (IDs 369–737)
    test_start_id = 369
    test_end_id = 737
    test_count = 0
    
    # Check if test directory exists and list files
    if os.path.exists(TEST_IMG_DIR):
        test_files = sorted(os.listdir(TEST_IMG_DIR))
        print(f"Found {len(test_files)} files in test directory")
        
        # Create a list to hold test file information
        test_file_list = []
        
        # Check if files have 'img_' prefix
        if test_files and any(fn.startswith('img_') for fn in test_files):
            print("Detected 'img_' prefix in test files")
            
            # Map test files to test IDs sequentially
            for i, filename in enumerate(test_files):
                if i < (test_end_id - test_start_id + 1):  # Ensure we don't exceed the test ID range
                    test_id = test_start_id + i
                    test_file_list.append({
                        'filename': filename,
                        'image_id': test_id
                    })
            
            print(f"Created mapping for {len(test_file_list)} test files")
            if len(test_file_list) > 0:
                print(f"Sample mapping: {test_file_list[:3]}")
    else:
        print(f"Test directory {TEST_IMG_DIR} not found")
        test_file_list = []
    
    # Process test images if we have any
    if test_file_list:
        # Define a dataset for test images
        class TestImageDataset(Dataset):
            def __init__(self, img_dir, file_list, transform):
                self.img_dir = img_dir
                self.file_list = file_list
                self.transform = transform
                
            def __len__(self):
                return len(self.file_list)
                
            def __getitem__(self, idx):
                file_info = self.file_list[idx]
                img_path = os.path.join(self.img_dir, file_info['filename'])
                img = Image.open(img_path).convert('RGB')
                return {
                    'image': self.transform(img),
                    'image_id': file_info['image_id']
                }
        
        # Create test dataset and dataloader
        test_dataset = TestImageDataset(TEST_IMG_DIR, test_file_list, val_test_transform)
        test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
        
        print(f"Created test dataloader with {len(test_dataset)} images")
        
        # Process test data
        with torch.no_grad():
            for batch in tqdm(test_dataloader, desc="Processing test data"):
                imgs = batch['image'].to(device)
                image_ids = batch['image_id'].tolist()  # Convert tensor to list
                
                # Forward pass without region ID
                p_lat, p_lon = model(imgs)
                ulat = lat_scaler.inverse_transform(p_lat.cpu().numpy().reshape(-1,1)).flatten()
                ulon = long_scaler.inverse_transform(p_lon.cpu().numpy().reshape(-1,1)).flatten()
                
                for i in range(len(image_ids)):
                    img_id = image_ids[i]
                    
                    # Check if we've already processed this ID
                    if img_id in processed_ids:
                        print(f"Warning: Duplicate ID {img_id} in test data")
                        continue
                        
                    processed_ids.add(img_id)
                    rows.append({
                        'id': img_id, 
                        'Latitude': ulat[i], 
                        'Longitude': ulon[i]
                    })
                    test_count += 1
        
        print(f"Processed {test_count} test images")
    
    # Handle any missing test IDs (fill with zeros or nearest neighbor)
    missing_test_ids = set(range(test_start_id, test_end_id + 1)) - processed_ids
    if missing_test_ids:
        print(f"Warning: {len(missing_test_ids)} test IDs missing. Adding placeholders.")
        for img_id in missing_test_ids:
            rows.append({
                'id': img_id,
                'Latitude': 0.0,  # Use a default or interpolate from nearest neighbors
                'Longitude': 0.0
            })
    
    # Save CSV
    result_df = pd.DataFrame(rows).sort_values('id')
    result_df.to_csv(OUTPUT_CSV, index=False)
    
    print(f"\nCSV saved with {len(result_df)} rows:")
    print(f"- {len(processed_ids.intersection(set(range(0, test_start_id))))} validation entries")
    print(f"- {len(processed_ids.intersection(set(range(test_start_id, test_end_id + 1))))} test entries")
    print(f"- {len(missing_test_ids)} placeholder entries for missing test IDs")
    
    # Report best MSE
    if isinstance(best_mse, dict):
        best_mse_value = best_mse.get('val_unscaled_mse', [-1])[-1] if 'val_unscaled_mse' in best_mse else "N/A"
        print(f"Best MSE: {best_mse_value}")
    else:
        print(f"Best MSE: {best_mse:.6f}")
            
# --------------------- Main ---------------------
best_mse = train_eval()
generate_csv(best_mse)

print("Training complete.")

Using device: cuda
Original valid_df size: 362
VALID_IMG_DIR exists: True
TEST_IMG_DIR exists: True
Valid image extensions: Counter({'.jpg': 246, '.jpeg': 96, '.png': 27})
Test image extensions: Counter({'.jpg': 241, '.jpeg': 101, '.png': 27})
Number of files in '/kaggle/input/iiith-images-latlong-smai/images_test/images_test': 369


  scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 1 Train: 100%|██████████| 405/405 [01:45<00:00,  3.82it/s, loss=0.00707, lat=0.000206, lon=0.00686, lr=2.07e-6]


Epoch 1 TRAIN -> Loss: 1.476800, Lat: 0.775467, Lon: 0.701334, LR: 2.068898e-06


Epoch 1 Val: 100%|██████████| 12/12 [00:05<00:00,  2.27it/s, v_loss=0.285, v_lat=0.173, v_lon=0.113]


Epoch 1 VAL   -> Loss: 0.910843, Lat: 0.495232, Lon: 0.415611, Unscaled MSE: 503451.937500


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 2 Train: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s, loss=0.243, lat=0.0479, lon=0.195, lr=4.57e-6]


Epoch 2 TRAIN -> Loss: 0.901840, Lat: 0.478796, Lon: 0.423044, LR: 4.570734e-06


Epoch 2 Val: 100%|██████████| 12/12 [00:05<00:00,  2.29it/s, v_loss=0.221, v_lat=0.121, v_lon=0.0997]


Epoch 2 VAL   -> Loss: 0.536795, Lat: 0.308611, Lon: 0.228184, Unscaled MSE: 291883.843750


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 3 Train: 100%|██████████| 405/405 [01:45<00:00,  3.82it/s, loss=0.602, lat=0.33, lon=0.272, lr=8.4e-6]   


Epoch 3 TRAIN -> Loss: 0.610107, Lat: 0.333673, Lon: 0.276434, LR: 8.403584e-06


Epoch 3 Val: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s, v_loss=0.169, v_lat=0.0817, v_lon=0.0872]


Epoch 3 VAL   -> Loss: 0.350837, Lat: 0.181555, Lon: 0.169281, Unscaled MSE: 196564.781250


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 4 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0895, lat=0.0714, lon=0.0182, lr=1.31e-5]


Epoch 4 TRAIN -> Loss: 0.441012, Lat: 0.236420, Lon: 0.204592, LR: 1.310490e-05


Epoch 4 Val: 100%|██████████| 12/12 [00:05<00:00,  2.31it/s, v_loss=0.13, v_lat=0.0782, v_lon=0.052]


Epoch 4 VAL   -> Loss: 0.397108, Lat: 0.217806, Lon: 0.179302, Unscaled MSE: 218949.031250


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 5 Train: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s, loss=0.401, lat=0.121, lon=0.281, lr=1.81e-5]   


Epoch 5 TRAIN -> Loss: 0.323096, Lat: 0.177709, Lon: 0.145387, LR: 1.810733e-05


Epoch 5 Val: 100%|██████████| 12/12 [00:05<00:00,  2.32it/s, v_loss=0.0705, v_lat=0.033, v_lon=0.0374]


Epoch 5 VAL   -> Loss: 0.353834, Lat: 0.179782, Lon: 0.174052, Unscaled MSE: 199201.125000


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 6 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.0997, lat=0.0315, lon=0.0682, lr=2.28e-5]


Epoch 6 TRAIN -> Loss: 0.272191, Lat: 0.149808, Lon: 0.122384, LR: 2.280717e-05


Epoch 6 Val: 100%|██████████| 12/12 [00:05<00:00,  2.29it/s, v_loss=0.0499, v_lat=0.0282, v_lon=0.0217]


Epoch 6 VAL   -> Loss: 0.208587, Lat: 0.100095, Lon: 0.108492, Unscaled MSE: 119123.843750


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 7 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.0227, lat=0.00102, lon=0.0217, lr=2.66e-5]


Epoch 7 TRAIN -> Loss: 0.233845, Lat: 0.130862, Lon: 0.102983, LR: 2.663724e-05


Epoch 7 Val: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s, v_loss=0.0662, v_lat=0.0384, v_lon=0.0278]


Epoch 7 VAL   -> Loss: 0.198263, Lat: 0.092800, Lon: 0.105463, Unscaled MSE: 113901.679688


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 8 Train: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s, loss=0.0136, lat=9.7e-6, lon=0.0136, lr=2.91e-5]


Epoch 8 TRAIN -> Loss: 0.215415, Lat: 0.113046, Lon: 0.102369, LR: 2.913534e-05


Epoch 8 Val: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s, v_loss=0.0456, v_lat=0.0211, v_lon=0.0245]


Epoch 8 VAL   -> Loss: 0.180314, Lat: 0.076507, Lon: 0.103807, Unscaled MSE: 105860.507812


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 9 Train: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s, loss=0.00134, lat=0.000353, lon=0.000987, lr=3e-5]


Epoch 9 TRAIN -> Loss: 0.144425, Lat: 0.083558, Lon: 0.060867, LR: 3.000000e-05


Epoch 9 Val: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s, v_loss=0.035, v_lat=0.013, v_lon=0.0221] 


Epoch 9 VAL   -> Loss: 0.161975, Lat: 0.069518, Lon: 0.092457, Unscaled MSE: 94866.429688


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 10 Train: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s, loss=0.00267, lat=0.00266, lon=1.57e-5, lr=2.98e-5]


Epoch 10 TRAIN -> Loss: 0.139324, Lat: 0.078263, Lon: 0.061060, LR: 2.983164e-05


Epoch 10 Val: 100%|██████████| 12/12 [00:05<00:00,  2.33it/s, v_loss=0.043, v_lat=0.0209, v_lon=0.0221] 


Epoch 10 VAL   -> Loss: 0.142734, Lat: 0.063872, Lon: 0.078862, Unscaled MSE: 82845.351562


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 11 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0041, lat=0.000972, lon=0.00312, lr=2.93e-5]


Epoch 11 TRAIN -> Loss: 0.118800, Lat: 0.067046, Lon: 0.051754, LR: 2.933196e-05


Epoch 11 Val: 100%|██████████| 12/12 [00:05<00:00,  2.29it/s, v_loss=0.0352, v_lat=0.0153, v_lon=0.0199]


Epoch 11 VAL   -> Loss: 0.160020, Lat: 0.066476, Lon: 0.093544, Unscaled MSE: 94355.015625


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 12 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0894, lat=0.0548, lon=0.0346, lr=2.85e-5]


Epoch 12 TRAIN -> Loss: 0.148187, Lat: 0.076779, Lon: 0.071407, LR: 2.851213e-05


Epoch 12 Val: 100%|██████████| 12/12 [00:05<00:00,  2.31it/s, v_loss=0.0317, v_lat=0.0137, v_lon=0.018] 


Epoch 12 VAL   -> Loss: 0.188285, Lat: 0.092289, Lon: 0.095996, Unscaled MSE: 106972.265625


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 13 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0387, lat=0.0374, lon=0.00132, lr=2.74e-5]


Epoch 13 TRAIN -> Loss: 0.149658, Lat: 0.074119, Lon: 0.075539, LR: 2.739047e-05


Epoch 13 Val: 100%|██████████| 12/12 [00:05<00:00,  2.31it/s, v_loss=0.0448, v_lat=0.0111, v_lon=0.0337]


Epoch 13 VAL   -> Loss: 0.139571, Lat: 0.070005, Lon: 0.069566, Unscaled MSE: 78837.570312


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 14 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0692, lat=0.0254, lon=0.0437, lr=2.6e-5]  


Epoch 14 TRAIN -> Loss: 0.100592, Lat: 0.057300, Lon: 0.043292, LR: 2.599202e-05


Epoch 14 Val: 100%|██████████| 12/12 [00:05<00:00,  2.34it/s, v_loss=0.0276, v_lat=0.0156, v_lon=0.012]


Epoch 14 VAL   -> Loss: 0.114521, Lat: 0.069597, Lon: 0.044925, Unscaled MSE: 61190.121094


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 15 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0147, lat=0.00116, lon=0.0136, lr=2.43e-5]


Epoch 15 TRAIN -> Loss: 0.091778, Lat: 0.048726, Lon: 0.043052, LR: 2.434804e-05


Epoch 15 Val: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s, v_loss=0.03, v_lat=0.0135, v_lon=0.0165]  


Epoch 15 VAL   -> Loss: 0.089946, Lat: 0.048377, Lon: 0.041569, Unscaled MSE: 49867.691406


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 16 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.153, lat=0.0567, lon=0.0964, lr=2.25e-5]  


Epoch 16 TRAIN -> Loss: 0.066839, Lat: 0.037406, Lon: 0.029433, LR: 2.249523e-05


Epoch 16 Val: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s, v_loss=0.0202, v_lat=0.00986, v_lon=0.0104]


Epoch 16 VAL   -> Loss: 0.082579, Lat: 0.044127, Lon: 0.038452, Unscaled MSE: 45865.996094


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 17 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0207, lat=0.00437, lon=0.0163, lr=2.05e-5]


Epoch 17 TRAIN -> Loss: 0.067978, Lat: 0.036802, Lon: 0.031176, LR: 2.047500e-05


Epoch 17 Val: 100%|██████████| 12/12 [00:05<00:00,  2.31it/s, v_loss=0.023, v_lat=0.0102, v_lon=0.0127]  


Epoch 17 VAL   -> Loss: 0.075182, Lat: 0.042942, Lon: 0.032240, Unscaled MSE: 40961.164062


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 18 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.00881, lat=0.00759, lon=0.00122, lr=1.83e-5]


Epoch 18 TRAIN -> Loss: 0.057188, Lat: 0.031355, Lon: 0.025833, LR: 1.833246e-05


Epoch 18 Val: 100%|██████████| 12/12 [00:05<00:00,  2.33it/s, v_loss=0.0197, v_lat=0.0102, v_lon=0.00946]


Epoch 18 VAL   -> Loss: 0.102492, Lat: 0.051858, Lon: 0.050634, Unscaled MSE: 57763.777344


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 19 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0194, lat=0.0066, lon=0.0128, lr=1.61e-5]  


Epoch 19 TRAIN -> Loss: 0.051687, Lat: 0.030118, Lon: 0.021569, LR: 1.611548e-05


Epoch 19 Val: 100%|██████████| 12/12 [00:05<00:00,  2.33it/s, v_loss=0.0223, v_lat=0.0112, v_lon=0.0111] 


Epoch 19 VAL   -> Loss: 0.092919, Lat: 0.059299, Lon: 0.033620, Unscaled MSE: 48833.664062


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 20 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.0166, lat=0.0157, lon=0.000876, lr=1.39e-5]


Epoch 20 TRAIN -> Loss: 0.049288, Lat: 0.029561, Lon: 0.019727, LR: 1.387359e-05


Epoch 20 Val: 100%|██████████| 12/12 [00:05<00:00,  2.34it/s, v_loss=0.0214, v_lat=0.0117, v_lon=0.00968] 


Epoch 20 VAL   -> Loss: 0.078052, Lat: 0.043843, Lon: 0.034209, Unscaled MSE: 42737.164062


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 21 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.000912, lat=2.27e-5, lon=0.000889, lr=1.17e-5]


Epoch 21 TRAIN -> Loss: 0.042917, Lat: 0.023103, Lon: 0.019814, LR: 1.165686e-05


Epoch 21 Val: 100%|██████████| 12/12 [00:05<00:00,  2.31it/s, v_loss=0.0223, v_lat=0.012, v_lon=0.0103]  


Epoch 21 VAL   -> Loss: 0.091403, Lat: 0.046715, Lon: 0.044688, Unscaled MSE: 51379.347656


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 22 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.0755, lat=0.0388, lon=0.0366, lr=9.51e-6]   


Epoch 22 TRAIN -> Loss: 0.039817, Lat: 0.022748, Lon: 0.017069, LR: 9.514809e-06


Epoch 22 Val: 100%|██████████| 12/12 [00:05<00:00,  2.31it/s, v_loss=0.0231, v_lat=0.011, v_lon=0.012]   


Epoch 22 VAL   -> Loss: 0.064765, Lat: 0.037179, Lon: 0.027586, Unscaled MSE: 35232.277344


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 23 Train: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s, loss=0.0262, lat=0.0249, lon=0.00137, lr=7.5e-6]   


Epoch 23 TRAIN -> Loss: 0.037369, Lat: 0.021838, Lon: 0.015531, LR: 7.495292e-06


Epoch 23 Val: 100%|██████████| 12/12 [00:05<00:00,  2.32it/s, v_loss=0.0238, v_lat=0.0131, v_lon=0.0107] 


Epoch 23 VAL   -> Loss: 0.069874, Lat: 0.041475, Lon: 0.028399, Unscaled MSE: 37619.011719


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 24 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.0659, lat=0.0333, lon=0.0326, lr=5.64e-6]   


Epoch 24 TRAIN -> Loss: 0.033180, Lat: 0.019143, Lon: 0.014037, LR: 5.643419e-06


Epoch 24 Val: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s, v_loss=0.0213, v_lat=0.0111, v_lon=0.0102] 


Epoch 24 VAL   -> Loss: 0.064207, Lat: 0.037987, Lon: 0.026220, Unscaled MSE: 34604.109375


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 25 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.0188, lat=0.00986, lon=0.00894, lr=4e-6]    


Epoch 25 TRAIN -> Loss: 0.031124, Lat: 0.017368, Lon: 0.013757, LR: 4.000558e-06


Epoch 25 Val: 100%|██████████| 12/12 [00:05<00:00,  2.32it/s, v_loss=0.0193, v_lat=0.0105, v_lon=0.00873]


Epoch 25 VAL   -> Loss: 0.056008, Lat: 0.031511, Lon: 0.024498, Unscaled MSE: 30652.966797


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 26 Train: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s, loss=0.000611, lat=0.000151, lon=0.00046, lr=2.6e-6]


Epoch 26 TRAIN -> Loss: 0.029908, Lat: 0.017133, Lon: 0.012775, LR: 2.603408e-06


Epoch 26 Val: 100%|██████████| 12/12 [00:05<00:00,  2.34it/s, v_loss=0.0196, v_lat=0.0106, v_lon=0.00906]


Epoch 26 VAL   -> Loss: 0.060980, Lat: 0.031071, Lon: 0.029909, Unscaled MSE: 34305.394531


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 27 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.054, lat=0.0469, lon=0.00704, lr=1.48e-6]   


Epoch 27 TRAIN -> Loss: 0.028277, Lat: 0.015410, Lon: 0.012867, LR: 1.483178e-06


Epoch 27 Val: 100%|██████████| 12/12 [00:05<00:00,  2.32it/s, v_loss=0.0202, v_lat=0.0103, v_lon=0.00988]


Epoch 27 VAL   -> Loss: 0.052590, Lat: 0.031156, Lon: 0.021433, Unscaled MSE: 28330.380859


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 28 Train: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s, loss=0.0156, lat=0.0138, lon=0.00177, lr=6.65e-7]  


Epoch 28 TRAIN -> Loss: 0.026218, Lat: 0.014353, Lon: 0.011865, LR: 6.648931e-07


Epoch 28 Val: 100%|██████████| 12/12 [00:05<00:00,  2.31it/s, v_loss=0.02, v_lat=0.0109, v_lon=0.0091]   


Epoch 28 VAL   -> Loss: 0.051863, Lat: 0.030420, Lon: 0.021444, Unscaled MSE: 28027.417969


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 29 Train: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s, loss=0.0494, lat=0.0365, lon=0.0129, lr=1.67e-7]   


Epoch 29 TRAIN -> Loss: 0.025703, Lat: 0.014517, Lon: 0.011185, LR: 1.668321e-07


Epoch 29 Val: 100%|██████████| 12/12 [00:05<00:00,  2.31it/s, v_loss=0.0195, v_lat=0.0105, v_lon=0.00897]


Epoch 29 VAL   -> Loss: 0.050835, Lat: 0.029795, Lon: 0.021040, Unscaled MSE: 27477.873047


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Epoch 30 Train: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s, loss=0.00331, lat=0.0033, lon=5.1e-6, lr=1.21e-10]  


Epoch 30 TRAIN -> Loss: 0.024597, Lat: 0.013719, Lon: 0.010878, LR: 1.210233e-10


Epoch 30 Val: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s, v_loss=0.0195, v_lat=0.0105, v_lon=0.00895]
  model.load_state_dict(torch.load('best_geo2.pth'))


Epoch 30 VAL   -> Loss: 0.050950, Lat: 0.029835, Lon: 0.021115, Unscaled MSE: 27547.646484

--- Starting CSV Generation ---
Valid loader dataset size: 362


Processing validation data: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s]


Processed 362 validation images, added 362 unique IDs
Found 369 files in test directory
Detected 'img_' prefix in test files
Created mapping for 369 test files
Sample mapping: [{'filename': 'img_0000.jpg', 'image_id': 369}, {'filename': 'img_0001.jpg', 'image_id': 370}, {'filename': 'img_0002.jpg', 'image_id': 371}]
Created test dataloader with 369 images


Processing test data: 100%|██████████| 12/12 [00:05<00:00,  2.26it/s]

Processed 369 test images

CSV saved with 731 rows:
- 362 validation entries
- 369 test entries
- 0 placeholder entries for missing test IDs
Best MSE: 27547.646484375
Training complete.





# Fine-tuning for more epochs

In [8]:
# Improved retraining code with better LR scheduling
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, ReduceLROnPlateau

# Load the previously trained model
model.load_state_dict(torch.load('/kaggle/working/best_geo2.pth'))

# Reset optimizer with lower learning rates for fine-tuning
optimizer = optim.AdamW([
    {'params': params_backbone, 'lr': 5e-6},  # Lower LR for backbone
    {'params': params_new, 'lr': 1e-4}        # Lower LR for new layers
], weight_decay=1e-3)  # Slightly reduced weight decay

# Better LR scheduler - Cosine annealing with warm restarts
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=5, T_mult=1, eta_min=1e-7)
scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None
# Alternative: ReduceLROnPlateau (uncomment to use instead)
# scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, min_lr=1e-7, verbose=True)

# Number of additional training epochs
additional_epochs = 20
best_mse = float('inf')

# Training loop
for epoch in range(1, additional_epochs+1):
    model.train()
    running_loss = running_lat = running_lon = 0.0
    count = 0
    
    for batch in tqdm(train_loader, desc=f"Retrain Epoch {epoch}"):
        imgs = batch['image'].to(device)
        lat_s = batch['scaled_lat'].to(device)
        lon_s = batch['scaled_lon'].to(device)
        
        # Forward pass
        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
            p_lat, p_lon = model(imgs)
            loss = criterion(p_lat, p_lon, lat_s, lon_s)
        
        optimizer.zero_grad()
        if torch.cuda.is_available():
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        
        bs = imgs.size(0)
        running_loss += loss.item() * bs
        count += bs
    
    epoch_loss = running_loss / count
    print(f"Retrain Epoch {epoch} Train Loss: {epoch_loss:.6f}")
    
    # Evaluate
    model.eval()
    v_loss = 0.0
    v_count = 0
    all_preds, all_true = [], []
    
    with torch.no_grad():
        for batch in valid_loader:
            imgs = batch['image'].to(device)
            lat_s = batch['scaled_lat'].to(device)
            lon_s = batch['scaled_lon'].to(device)
            
            p_lat, p_lon = model(imgs)
            loss = criterion(p_lat, p_lon, lat_s, lon_s)
            
            # Calculate unscaled predictions for MSE
            ulat = lat_scaler.inverse_transform(p_lat.cpu().numpy().reshape(-1,1)).flatten()
            ulon = long_scaler.inverse_transform(p_lon.cpu().numpy().reshape(-1,1)).flatten()
            all_preds.append(np.vstack([ulat, ulon]).T)
            tr_lat = batch['latitude'].numpy()
            tr_lon = batch['longitude'].numpy()
            all_true.append(np.vstack([tr_lat, tr_lon]).T)
            
            bs = imgs.size(0)
            v_loss += loss.item() * bs
            v_count += bs
    
    val_loss = v_loss / v_count
    preds = np.concatenate(all_preds)
    true = np.concatenate(all_true)
    unscaled_mse = ((preds - true)**2).mean()
    
    print(f"Retrain Epoch {epoch} Val Loss: {val_loss:.6f}, Unscaled MSE: {unscaled_mse:.6f}")
    
    # Update scheduler (use this for ReduceLROnPlateau)
    # scheduler.step(unscaled_mse)
    
    # Update scheduler (use this for CosineAnnealingWarmRestarts)
    scheduler.step()
    
    # Save best model
    if unscaled_mse < best_mse:
        best_mse = unscaled_mse
        torch.save(model.state_dict(), 'best_retrained_geo2.pth')
        print(f"New best model saved with MSE: {best_mse:.6f}")
    
    torch.cuda.empty_cache()
    gc.collect()

# Load best retrained model and generate predictions
model.load_state_dict(torch.load('best_retrained_geo2.pth'))
generate_csv(best_mse)

print(f"Retraining complete. Best MSE: {best_mse:.6f}")

  model.load_state_dict(torch.load('/kaggle/working/best_geo2.pth'))
  scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 1: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s]

Retrain Epoch 1 Train Loss: 0.028745





Retrain Epoch 1 Val Loss: 0.067705, Unscaled MSE: 37737.519531
New best model saved with MSE: 37737.519531


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 2: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 2 Train Loss: 0.026634





Retrain Epoch 2 Val Loss: 0.052268, Unscaled MSE: 28238.128906
New best model saved with MSE: 28238.128906


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 3: 100%|██████████| 405/405 [01:45<00:00,  3.83it/s]

Retrain Epoch 3 Train Loss: 0.024891





Retrain Epoch 3 Val Loss: 0.054874, Unscaled MSE: 30919.587891


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 4: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s]

Retrain Epoch 4 Train Loss: 0.023314





Retrain Epoch 4 Val Loss: 0.049093, Unscaled MSE: 26863.105469
New best model saved with MSE: 26863.105469


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 5: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 5 Train Loss: 0.021386





Retrain Epoch 5 Val Loss: 0.048936, Unscaled MSE: 26670.064453
New best model saved with MSE: 26670.064453


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 6: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s]

Retrain Epoch 6 Train Loss: 0.025166





Retrain Epoch 6 Val Loss: 0.065952, Unscaled MSE: 37581.089844


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 7: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 7 Train Loss: 0.025610





Retrain Epoch 7 Val Loss: 0.055263, Unscaled MSE: 31331.394531


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 8: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 8 Train Loss: 0.023573





Retrain Epoch 8 Val Loss: 0.047829, Unscaled MSE: 26281.480469
New best model saved with MSE: 26281.480469


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 9: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 9 Train Loss: 0.020808





Retrain Epoch 9 Val Loss: 0.045826, Unscaled MSE: 25049.507812
New best model saved with MSE: 25049.507812


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 10: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 10 Train Loss: 0.019348





Retrain Epoch 10 Val Loss: 0.044690, Unscaled MSE: 24359.527344
New best model saved with MSE: 24359.527344


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 11: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s]

Retrain Epoch 11 Train Loss: 0.021348





Retrain Epoch 11 Val Loss: 0.053681, Unscaled MSE: 28868.271484


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 12: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s]

Retrain Epoch 12 Train Loss: 0.022080





Retrain Epoch 12 Val Loss: 0.048114, Unscaled MSE: 26465.292969


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 13: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s]

Retrain Epoch 13 Train Loss: 0.019360





Retrain Epoch 13 Val Loss: 0.041927, Unscaled MSE: 22757.236328
New best model saved with MSE: 22757.236328


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 14: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s]

Retrain Epoch 14 Train Loss: 0.017940





Retrain Epoch 14 Val Loss: 0.044041, Unscaled MSE: 23836.271484


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 15: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 15 Train Loss: 0.016570





Retrain Epoch 15 Val Loss: 0.042766, Unscaled MSE: 23037.937500


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 16: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 16 Train Loss: 0.020770





Retrain Epoch 16 Val Loss: 0.046596, Unscaled MSE: 25304.138672


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 17: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 17 Train Loss: 0.020016





Retrain Epoch 17 Val Loss: 0.049322, Unscaled MSE: 26713.115234


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 18: 100%|██████████| 405/405 [01:45<00:00,  3.85it/s]

Retrain Epoch 18 Train Loss: 0.017772





Retrain Epoch 18 Val Loss: 0.051159, Unscaled MSE: 28460.939453


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 19: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 19 Train Loss: 0.017593





Retrain Epoch 19 Val Loss: 0.048611, Unscaled MSE: 26569.818359


  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
Retrain Epoch 20: 100%|██████████| 405/405 [01:45<00:00,  3.84it/s]

Retrain Epoch 20 Train Loss: 0.016256





Retrain Epoch 20 Val Loss: 0.047211, Unscaled MSE: 25936.833984


  model.load_state_dict(torch.load('best_retrained_geo2.pth'))



--- Starting CSV Generation ---
Valid loader dataset size: 362


Processing validation data: 100%|██████████| 12/12 [00:05<00:00,  2.30it/s]


Processed 362 validation images, added 362 unique IDs
Found 369 files in test directory
Detected 'img_' prefix in test files
Created mapping for 369 test files
Sample mapping: [{'filename': 'img_0000.jpg', 'image_id': 369}, {'filename': 'img_0001.jpg', 'image_id': 370}, {'filename': 'img_0002.jpg', 'image_id': 371}]
Created test dataloader with 369 images


Processing test data: 100%|██████████| 12/12 [00:05<00:00,  2.24it/s]

Processed 369 test images

CSV saved with 731 rows:
- 362 validation entries
- 369 test entries
- 0 placeholder entries for missing test IDs
Best MSE: 22757.236328
Retraining complete. Best MSE: 22757.236328





# Final code :- angle using only images and not Region_ID

In [13]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import pandas as pd
import numpy as np
import timm
import math
import random
from torch.cuda.amp import GradScaler, autocast

# -----------------------
# Paths and Globals
# -----------------------
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 96     # increased batch size
EPOCHS = 60        # more epochs
BASE_LR = 5e-4      # higher learning rate
WEIGHT_DECAY = 1e-4 # added weight decay
SEED = 42           # added seed for reproducibility

TRAIN_CSV = '/kaggle/input/iiith-images-latlong-smai/cleaned_data_train.csv'
TRAIN_IMG_DIR = '/kaggle/input/iiith-images-latlong-smai/images_train/images_train/images_train/'
VAL_CSV = '/kaggle/input/iiith-images-latlong-smai/labels_val_updated.csv'
VAL_IMG_DIR = '/kaggle/input/iiith-images-latlong-smai/images_val/images_val/'
MODEL_PATH = '/kaggle/working/efficientnet_angle_regressor.pt'

# Set seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(SEED)

# -----------------------
# Dataset - REMOVED Region_ID dependency
# -----------------------
class CampusDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None, is_val=False):
        df = pd.read_csv(csv_file)
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.is_val = is_val
        if is_val:
            self.df['idx'] = self.df.index
        # Convert angles to radians for smoother learning
        self.df['angle_rad'] = self.df['angle'] * (math.pi / 180.0)
        # Create sin and cos components for circular regression - ensure float32
        self.df['sin_angle'] = np.sin(self.df['angle_rad']).astype(np.float32)
        self.df['cos_angle'] = np.cos(self.df['angle_rad']).astype(np.float32)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['filename'])
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
            
        angle = float(row['angle'])
        sin_angle = float(row['sin_angle'])
        cos_angle = float(row['cos_angle'])
        
        if self.is_val:
            return img, torch.tensor(angle, dtype=torch.float32), torch.tensor(sin_angle, dtype=torch.float32), torch.tensor(cos_angle, dtype=torch.float32), torch.tensor(int(row['idx']), dtype=torch.long)
        return img, torch.tensor(angle, dtype=torch.float32), torch.tensor(sin_angle, dtype=torch.float32), torch.tensor(cos_angle, dtype=torch.float32)

# -----------------------
# Augmentations - Same as original
# -----------------------
train_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.2)
])

val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# -----------------------
# Model: EfficientNet WITHOUT Region Conditioning
# -----------------------
class AngleRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        # Use EfficientNet B0 as backbone
        self.backbone = timm.create_model('efficientnet_b0', pretrained=True, features_only=True)
        
        # Extract feature dimensions from the backbone
        dummy_input = torch.zeros(1, 3, 224, 224)
        features = self.backbone(dummy_input)
        feature_dim = features[-1].shape[1]  # Last feature map channels
        
        # Global average pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Feature processing
        self.features = nn.Sequential(
            nn.Linear(feature_dim, 512),
            nn.BatchNorm1d(512),
            nn.SiLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(0.2)
        )
        
        # Sin/Cos prediction - circular regression approach
        self.head = nn.Linear(256, 2)
        
        # Enable gradient checkpointing if available
        if hasattr(self.backbone, 'gradient_checkpointing_enable'):
            self.backbone.gradient_checkpointing_enable()

    def forward(self, x):
        features = self.backbone(x)
        x = self.global_pool(features[-1]).squeeze(-1).squeeze(-1)
        
        # Process features
        x = self.features(x)
        
        # Predict sin and cos components
        sin_cos = self.head(x)
        sin_pred, cos_pred = sin_cos.split(1, dim=1)
        
        # Normalize the output to ensure it falls on the unit circle - ensure float32
        norm = torch.sqrt(sin_pred**2 + cos_pred**2) + 1e-8
        sin_norm = sin_pred / norm
        cos_norm = cos_pred / norm
        
        # Convert to angle in degrees
        angle = torch.atan2(sin_norm, cos_norm) * (180.0 / torch.tensor(math.pi, dtype=torch.float32, device=sin_pred.device))
        # Ensure angle is in [0, 360)
        angle = (angle + 360) % 360
        
        return angle.squeeze(1), sin_norm.squeeze(1), cos_norm.squeeze(1)

# -----------------------
# Loss Functions - Same as original
# -----------------------
def circle_loss(sin_pred, cos_pred, sin_true, cos_true):
    # MSE loss between the normalized sin and cos components
    return nn.MSELoss()(sin_pred, sin_true) + nn.MSELoss()(cos_pred, cos_true)

def maae_loss(pred, true):
    diff = torch.abs(pred - true)
    return torch.mean(torch.min(diff, 360 - diff))

# -----------------------
# Mixup Implementation - Modified to remove region_id dependency
# -----------------------
def mixup_data(x, y_angle, y_sin, y_cos, alpha=0.2):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    lam = float(lam)  # Ensure lam is a float32 compatible value
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)

    mixed_x = lam * x + (1 - lam) * x[index]
    
    # We mix angles in the sin/cos space to handle the circular nature
    mixed_sin = lam * y_sin + (1 - lam) * y_sin[index]
    mixed_cos = lam * y_cos + (1 - lam) * y_cos[index]
    
    # Reconstruct angle from sin/cos - ensure torch.float32
    mixed_angle = torch.atan2(mixed_sin, mixed_cos) * (180.0 / math.pi)
    mixed_angle = (mixed_angle + 360) % 360
    
    return mixed_x, mixed_angle, mixed_sin, mixed_cos, index, lam

# -----------------------
# DataLoaders
# -----------------------
train_ds = CampusDataset(TRAIN_CSV, TRAIN_IMG_DIR, transform=train_tf)
val_ds = CampusDataset(VAL_CSV, VAL_IMG_DIR, transform=val_tf, is_val=True)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)

# -----------------------
# Model, Optimizer, AMP, Scheduler
# -----------------------
model = AngleRegressor().to(DEVICE)
optimizer = optim.AdamW(model.parameters(), lr=BASE_LR, weight_decay=WEIGHT_DECAY)
scaler = GradScaler()

# OneCycleLR for faster convergence
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer, 
    max_lr=BASE_LR,
    steps_per_epoch=len(train_loader),
    epochs=EPOCHS,
    pct_start=0.1,
    div_factor=10.0,
    final_div_factor=1000.0
)

# -----------------------
# EMA Model (Exponential Moving Average for better stability) - Same as original
# -----------------------
class EMA():
    def __init__(self, model, decay=0.999):
        self.model = model
        self.decay = decay
        self.shadow = {}
        self.backup = {}
        
    def register(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                self.shadow[name] = param.data.clone()
                
    def update(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                assert name in self.shadow
                new_average = self.decay * self.shadow[name] + (1.0 - self.decay) * param.data
                self.shadow[name] = new_average.clone()
                
    def apply_shadow(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                assert name in self.shadow
                self.backup[name] = param.data
                param.data = self.shadow[name]
                
    def restore(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                assert name in self.backup
                param.data = self.backup[name]
        self.backup = {}

# Initialize EMA
ema = EMA(model, decay=0.998)
ema.register()

# -----------------------
# Training & Validation - Modified to remove region_id dependency
# -----------------------
best_maae = float('inf')
val_maae_history = []
train_loss_history = []

print(f"Training on {DEVICE} with {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
print(f"Model: EfficientNet B0 Image-Only")
print(f"Batch Size: {BATCH_SIZE}, Epochs: {EPOCHS}, Base LR: {BASE_LR}")

for epoch in range(1, EPOCHS+1):
    # Train
    model.train()
    train_loss = 0
    train_angle_loss = 0
    train_circle_loss = 0
    
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch}/{EPOCHS} [Train]')
    for imgs, angles, sin_angles, cos_angles in progress_bar:
        imgs = imgs.to(DEVICE)
        angles = angles.to(DEVICE)
        sin_angles = sin_angles.to(DEVICE)
        cos_angles = cos_angles.to(DEVICE)

        # Apply mixup with 50% probability
        if random.random() < 0.5:
            imgs, mixed_angles, mixed_sin, mixed_cos, _, _ = mixup_data(
                imgs, angles, sin_angles, cos_angles
            )
            sin_angles, cos_angles = mixed_sin, mixed_cos
            angles = mixed_angles

        optimizer.zero_grad()
        
        with autocast():
            pred_angles, pred_sin, pred_cos = model(imgs)
            
            # Combined loss: angle MAAE + sin-cos circle loss
            angle_loss = maae_loss(pred_angles, angles)
            circ_loss = circle_loss(pred_sin, pred_cos, sin_angles, cos_angles)
            loss = angle_loss * 0.5 + circ_loss * 0.5
            
        scaler.scale(loss).backward()
        
        # Gradient clipping
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        
        # Update EMA model
        ema.update()
        
        train_loss += loss.item() * imgs.size(0)
        train_angle_loss += angle_loss.item() * imgs.size(0)
        train_circle_loss += circ_loss.item() * imgs.size(0)
        
        # Update progress bar
        progress_bar.set_postfix({
            'loss': f"{loss.item():.4f}", 
            'angle_loss': f"{angle_loss.item():.4f}",
            'lr': f"{optimizer.param_groups[0]['lr']:.6f}"
        })
    
    train_loss /= len(train_loader.dataset)
    train_angle_loss /= len(train_loader.dataset)
    train_circle_loss /= len(train_loader.dataset)
    train_loss_history.append(train_loss)
    
    # Validate with EMA model
    ema.apply_shadow()
    model.eval()
    val_loss = 0
    all_preds, all_trues, all_indices = [], [], []
    
    with torch.no_grad():
        progress_bar = tqdm(val_loader, desc=f'Epoch {epoch}/{EPOCHS} [Val]')
        for imgs, angles, sin_angles, cos_angles, indices in progress_bar:
            imgs = imgs.to(DEVICE)
            angles = angles.to(DEVICE)
            sin_angles = sin_angles.to(DEVICE)
            cos_angles = cos_angles.to(DEVICE)
            
            pred_angles, pred_sin, pred_cos = model(imgs)
            angle_loss = maae_loss(pred_angles, angles)
            val_loss += angle_loss.item() * imgs.size(0)
            
            all_preds.append(pred_angles.cpu().numpy())
            all_trues.append(angles.cpu().numpy())
            all_indices.append(indices.numpy())
            
            # Update progress bar
            progress_bar.set_postfix({'val_loss': f"{angle_loss.item():.4f}"})
    
    # Restore original model
    ema.restore()
            
    val_loss /= len(val_loader.dataset)
    preds = np.concatenate(all_preds)
    trues = np.concatenate(all_trues)
    indices = np.concatenate(all_indices)
    
    # Calculate MAAE (Mean Absolute Angular Error)
    val_maae = np.mean(np.minimum(np.abs(preds-trues), 360-np.abs(preds-trues)))
    val_maae_history.append(val_maae)
    
    print(f"Epoch {epoch}/{EPOCHS}")
    print(f"  Train Loss: {train_loss:.4f} (Angle: {train_angle_loss:.4f}, Circle: {train_circle_loss:.4f})")
    print(f"  Val MAAE: {val_maae:.4f}")
    
    # Save best model
    if val_maae < best_maae:
        best_maae = val_maae
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'ema_shadow': ema.shadow,
            'optimizer_state_dict': optimizer.state_dict(),
            'val_maae': val_maae,
            'best_maae': best_maae,
        }, MODEL_PATH)
        print(f"  Saved best model (MAAE: {best_maae:.4f})")

print(f"\nTraining completed!")
print(f"Best validation MAAE: {best_maae:.4f}")

# -----------------------
# Plot training history - Same as original
# -----------------------
try:
    import matplotlib.pyplot as plt
    
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(range(1, EPOCHS+1), train_loss_history, label='Train Loss')
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid(True)
    
    plt.subplot(1, 2, 2)
    plt.plot(range(1, EPOCHS+1), val_maae_history, label='Val MAAE')
    plt.axhline(y=best_maae, color='r', linestyle='--', label=f'Best MAAE: {best_maae:.4f}')
    plt.title('Validation MAAE')
    plt.xlabel('Epoch')
    plt.ylabel('MAAE (degrees)')
    plt.grid(True)
    plt.legend()
    
    plt.tight_layout()
    plt.close()
except:
    print("Could not generate training history plot.")
    

  scaler = GradScaler()


Training on cuda with Tesla T4
Model: EfficientNet B0 Image-Only
Batch Size: 96, Epochs: 60, Base LR: 0.0005


  with autocast():
Epoch 1/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.02it/s, loss=47.6558, angle_loss=93.2566, lr=0.000080]
Epoch 1/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=96.3957]


Epoch 1/60
  Train Loss: 45.1741 (Angle: 88.4477, Circle: 1.9006)
  Val MAAE: 90.2269
  Saved best model (MAAE: 90.2269)


Epoch 2/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.01it/s, loss=41.3328, angle_loss=80.8589, lr=0.000163]
Epoch 2/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.57it/s, val_loss=91.1004]


Epoch 2/60
  Train Loss: 41.2569 (Angle: 80.8316, Circle: 1.6822)
  Val MAAE: 90.5432


Epoch 3/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.05it/s, loss=39.2201, angle_loss=76.8418, lr=0.000276]
Epoch 3/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.60it/s, val_loss=97.1045]


Epoch 3/60
  Train Loss: 37.6660 (Angle: 73.8294, Circle: 1.5026)
  Val MAAE: 88.2815
  Saved best model (MAAE: 88.2815)


Epoch 4/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=35.8647, angle_loss=70.3229, lr=0.000389]
Epoch 4/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=91.0350]


Epoch 4/60
  Train Loss: 34.7292 (Angle: 68.1236, Circle: 1.3349)
  Val MAAE: 82.3251
  Saved best model (MAAE: 82.3251)


Epoch 5/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.06it/s, loss=31.6362, angle_loss=62.0145, lr=0.000471]
Epoch 5/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=81.9334]


Epoch 5/60
  Train Loss: 32.5006 (Angle: 63.7585, Circle: 1.2426)
  Val MAAE: 79.1243
  Saved best model (MAAE: 79.1243)


Epoch 7/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=35.8391, angle_loss=70.2102, lr=0.000500]
Epoch 7/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.48it/s, val_loss=87.5187]


Epoch 7/60
  Train Loss: 27.8058 (Angle: 54.5938, Circle: 1.0178)
  Val MAAE: 79.8383


Epoch 8/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.06it/s, loss=26.3826, angle_loss=51.8686, lr=0.000498]
Epoch 8/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.58it/s, val_loss=87.1834]


Epoch 8/60
  Train Loss: 25.8825 (Angle: 50.8454, Circle: 0.9195)
  Val MAAE: 80.7491


Epoch 9/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=26.0094, angle_loss=51.1406, lr=0.000496]
Epoch 9/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=82.7574]


Epoch 9/60
  Train Loss: 24.6198 (Angle: 48.3803, Circle: 0.8593)
  Val MAAE: 74.0406
  Saved best model (MAAE: 74.0406)


Epoch 10/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.02it/s, loss=18.8700, angle_loss=37.1217, lr=0.000493]
Epoch 10/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.65it/s, val_loss=71.3560]


Epoch 10/60
  Train Loss: 22.8172 (Angle: 44.8516, Circle: 0.7828)
  Val MAAE: 67.8933
  Saved best model (MAAE: 67.8933)


Epoch 11/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.03it/s, loss=20.6200, angle_loss=40.5470, lr=0.000489]
Epoch 11/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s, val_loss=74.6027]


Epoch 11/60
  Train Loss: 21.8532 (Angle: 42.9825, Circle: 0.7239)
  Val MAAE: 66.3289
  Saved best model (MAAE: 66.3289)


Epoch 12/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=19.2256, angle_loss=37.8410, lr=0.000485]
Epoch 12/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=76.0772]


Epoch 12/60
  Train Loss: 21.1968 (Angle: 41.7035, Circle: 0.6900)
  Val MAAE: 64.7201
  Saved best model (MAAE: 64.7201)


Epoch 13/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=17.4922, angle_loss=34.4151, lr=0.000479]
Epoch 13/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=67.4819]


Epoch 13/60
  Train Loss: 19.4300 (Angle: 38.2368, Circle: 0.6232)
  Val MAAE: 59.1580
  Saved best model (MAAE: 59.1580)


Epoch 14/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.09it/s, loss=20.0971, angle_loss=39.4822, lr=0.000473]
Epoch 14/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s, val_loss=62.2910]


Epoch 14/60
  Train Loss: 18.2845 (Angle: 35.9996, Circle: 0.5695)
  Val MAAE: 55.6432
  Saved best model (MAAE: 55.6432)


Epoch 15/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=23.1690, angle_loss=45.5119, lr=0.000466]
Epoch 15/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=59.0864]


Epoch 15/60
  Train Loss: 17.2116 (Angle: 33.9060, Circle: 0.5172)
  Val MAAE: 52.1176
  Saved best model (MAAE: 52.1176)


Epoch 16/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=10.3336, angle_loss=20.4826, lr=0.000459]
Epoch 16/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=47.1095]


Epoch 16/60
  Train Loss: 16.0727 (Angle: 31.6818, Circle: 0.4636)
  Val MAAE: 48.0302
  Saved best model (MAAE: 48.0302)


Epoch 17/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=15.1039, angle_loss=29.7915, lr=0.000450]
Epoch 17/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=47.2622]


Epoch 17/60
  Train Loss: 15.8186 (Angle: 31.1835, Circle: 0.4537)
  Val MAAE: 48.7463


Epoch 18/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.06it/s, loss=16.4535, angle_loss=32.4478, lr=0.000441]
Epoch 18/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.58it/s, val_loss=50.8076]


Epoch 18/60
  Train Loss: 14.8277 (Angle: 29.2454, Circle: 0.4100)
  Val MAAE: 52.7694


Epoch 19/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=15.0118, angle_loss=29.6045, lr=0.000432]
Epoch 19/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.24it/s, val_loss=47.2585]


Epoch 19/60
  Train Loss: 14.0868 (Angle: 27.7894, Circle: 0.3842)
  Val MAAE: 48.8718


Epoch 20/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=15.5221, angle_loss=30.6018, lr=0.000421]
Epoch 20/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.51it/s, val_loss=39.2212]


Epoch 20/60
  Train Loss: 13.3433 (Angle: 26.3305, Circle: 0.3562)
  Val MAAE: 41.8666
  Saved best model (MAAE: 41.8666)


Epoch 21/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.06it/s, loss=10.8795, angle_loss=21.5214, lr=0.000411]
Epoch 21/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=35.4735]


Epoch 21/60
  Train Loss: 13.3919 (Angle: 26.4373, Circle: 0.3464)
  Val MAAE: 39.4482
  Saved best model (MAAE: 39.4482)


Epoch 22/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=10.6234, angle_loss=20.9554, lr=0.000399]
Epoch 22/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.57it/s, val_loss=38.0022]


Epoch 22/60
  Train Loss: 13.4117 (Angle: 26.4721, Circle: 0.3513)
  Val MAAE: 42.1670


Epoch 23/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.06it/s, loss=9.1587, angle_loss=18.1404, lr=0.000387] 
Epoch 23/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=36.8472]


Epoch 23/60
  Train Loss: 11.7994 (Angle: 23.3041, Circle: 0.2948)
  Val MAAE: 41.7416


Epoch 24/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=10.3291, angle_loss=20.4388, lr=0.000375]
Epoch 24/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.60it/s, val_loss=33.1920]


Epoch 24/60
  Train Loss: 11.3187 (Angle: 22.3586, Circle: 0.2788)
  Val MAAE: 37.8459
  Saved best model (MAAE: 37.8459)


Epoch 25/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.05it/s, loss=22.4987, angle_loss=44.3272, lr=0.000362]
Epoch 25/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=34.8263]


Epoch 25/60
  Train Loss: 11.0513 (Angle: 21.8350, Circle: 0.2675)
  Val MAAE: 38.6658


Epoch 26/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.05it/s, loss=7.7331, angle_loss=15.3434, lr=0.000349] 
Epoch 26/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.54it/s, val_loss=31.0665]


Epoch 26/60
  Train Loss: 10.2820 (Angle: 20.3236, Circle: 0.2403)
  Val MAAE: 36.3232
  Saved best model (MAAE: 36.3232)


Epoch 27/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.09it/s, loss=9.5151, angle_loss=18.8267, lr=0.000335] 
Epoch 27/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s, val_loss=30.9623]


Epoch 27/60
  Train Loss: 10.3507 (Angle: 20.4592, Circle: 0.2422)
  Val MAAE: 37.9068


Epoch 28/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=6.9884, angle_loss=13.8750, lr=0.000322] 
Epoch 28/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.60it/s, val_loss=27.2174]


Epoch 28/60
  Train Loss: 9.9855 (Angle: 19.7440, Circle: 0.2270)
  Val MAAE: 34.7621
  Saved best model (MAAE: 34.7621)


Epoch 29/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=7.4522, angle_loss=14.7707, lr=0.000307] 
Epoch 29/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.68it/s, val_loss=25.8492]


Epoch 29/60
  Train Loss: 10.0012 (Angle: 19.7753, Circle: 0.2270)
  Val MAAE: 33.4927
  Saved best model (MAAE: 33.4927)


Epoch 30/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=13.8425, angle_loss=27.1840, lr=0.000293]
Epoch 30/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=26.2866]


Epoch 30/60
  Train Loss: 8.9739 (Angle: 17.7501, Circle: 0.1976)
  Val MAAE: 34.2366


Epoch 31/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.08it/s, loss=7.2895, angle_loss=14.4527, lr=0.000279] 
Epoch 31/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.64it/s, val_loss=26.3812]


Epoch 31/60
  Train Loss: 8.3786 (Angle: 16.5822, Circle: 0.1750)
  Val MAAE: 34.1375


Epoch 32/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=6.0154, angle_loss=11.9560, lr=0.000264] 
Epoch 32/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.64it/s, val_loss=24.6930]


Epoch 32/60
  Train Loss: 8.6595 (Angle: 17.1356, Circle: 0.1834)
  Val MAAE: 32.4656
  Saved best model (MAAE: 32.4656)


Epoch 33/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.06it/s, loss=9.7122, angle_loss=19.2269, lr=0.000250] 
Epoch 33/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.27it/s, val_loss=24.1219]


Epoch 33/60
  Train Loss: 8.1102 (Angle: 16.0555, Circle: 0.1648)
  Val MAAE: 32.4749


Epoch 34/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.06it/s, loss=8.8437, angle_loss=17.5112, lr=0.000235] 
Epoch 34/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=23.7033]


Epoch 34/60
  Train Loss: 7.4151 (Angle: 14.6884, Circle: 0.1419)
  Val MAAE: 32.0336
  Saved best model (MAAE: 32.0336)


Epoch 35/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.06it/s, loss=6.0978, angle_loss=12.1116, lr=0.000221] 
Epoch 35/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.53it/s, val_loss=25.4083]


Epoch 35/60
  Train Loss: 7.8016 (Angle: 15.4454, Circle: 0.1577)
  Val MAAE: 31.5983
  Saved best model (MAAE: 31.5983)


Epoch 36/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.06it/s, loss=10.5728, angle_loss=20.9054, lr=0.000206]
Epoch 36/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=25.3514]


Epoch 36/60
  Train Loss: 8.2009 (Angle: 16.2335, Circle: 0.1683)
  Val MAAE: 31.2960
  Saved best model (MAAE: 31.2960)


Epoch 37/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=15.6646, angle_loss=30.8549, lr=0.000192]
Epoch 37/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.58it/s, val_loss=24.0141]


Epoch 37/60
  Train Loss: 7.6698 (Angle: 15.1822, Circle: 0.1573)
  Val MAAE: 31.2344
  Saved best model (MAAE: 31.2344)


Epoch 38/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.09it/s, loss=10.1972, angle_loss=20.0752, lr=0.000178]
Epoch 38/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.56it/s, val_loss=25.0896]


Epoch 38/60
  Train Loss: 8.4913 (Angle: 16.7991, Circle: 0.1834)
  Val MAAE: 30.6204
  Saved best model (MAAE: 30.6204)


Epoch 39/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.05it/s, loss=6.9582, angle_loss=13.8051, lr=0.000164] 
Epoch 39/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.56it/s, val_loss=25.7476]


Epoch 39/60
  Train Loss: 6.8743 (Angle: 13.6186, Circle: 0.1301)
  Val MAAE: 30.1967
  Saved best model (MAAE: 30.1967)


Epoch 40/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.08it/s, loss=5.2556, angle_loss=10.4453, lr=0.000151] 
Epoch 40/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.53it/s, val_loss=26.9776]


Epoch 40/60
  Train Loss: 5.8241 (Angle: 11.5509, Circle: 0.0973)
  Val MAAE: 30.5645


Epoch 41/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.08it/s, loss=14.0469, angle_loss=27.7194, lr=0.000138]
Epoch 41/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.60it/s, val_loss=26.1816]


Epoch 41/60
  Train Loss: 5.6222 (Angle: 11.1502, Circle: 0.0943)
  Val MAAE: 30.3598


Epoch 42/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.05it/s, loss=4.1206, angle_loss=8.2090, lr=0.000125]  
Epoch 42/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.54it/s, val_loss=24.3134]


Epoch 42/60
  Train Loss: 7.0039 (Angle: 13.8750, Circle: 0.1329)
  Val MAAE: 30.5326


Epoch 43/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.04it/s, loss=4.7743, angle_loss=9.4947, lr=0.000112]  
Epoch 43/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=24.4236]


Epoch 43/60
  Train Loss: 6.0282 (Angle: 11.9496, Circle: 0.1069)
  Val MAAE: 29.3808
  Saved best model (MAAE: 29.3808)


Epoch 44/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.05it/s, loss=20.3997, angle_loss=40.1639, lr=0.000101]
Epoch 44/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s, val_loss=23.9655]


Epoch 44/60
  Train Loss: 6.8940 (Angle: 13.6548, Circle: 0.1333)
  Val MAAE: 29.8314


Epoch 45/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.09it/s, loss=5.7809, angle_loss=11.4791, lr=0.000089] 
Epoch 45/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=24.3756]


Epoch 45/60
  Train Loss: 6.9232 (Angle: 13.7102, Circle: 0.1362)
  Val MAAE: 29.2214
  Saved best model (MAAE: 29.2214)


Epoch 46/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=8.6972, angle_loss=17.1870, lr=0.000078] 
Epoch 46/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.44it/s, val_loss=24.3276]


Epoch 46/60
  Train Loss: 6.9389 (Angle: 13.7382, Circle: 0.1397)
  Val MAAE: 30.2040


Epoch 47/60 [Train]: 100%|██████████| 68/68 [00:33<00:00,  2.05it/s, loss=7.8051, angle_loss=15.4799, lr=0.000068] 
Epoch 47/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s, val_loss=24.7095]


Epoch 47/60
  Train Loss: 6.5635 (Angle: 12.9998, Circle: 0.1272)
  Val MAAE: 29.0273
  Saved best model (MAAE: 29.0273)


Epoch 48/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.08it/s, loss=3.8697, angle_loss=7.7045, lr=0.000058]  
Epoch 48/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.58it/s, val_loss=24.0946]


Epoch 48/60
  Train Loss: 6.2232 (Angle: 12.3271, Circle: 0.1193)
  Val MAAE: 28.9008
  Saved best model (MAAE: 28.9008)


Epoch 49/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=5.2557, angle_loss=10.3958, lr=0.000049] 
Epoch 49/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.58it/s, val_loss=23.4897]


Epoch 49/60
  Train Loss: 6.0028 (Angle: 11.8928, Circle: 0.1129)
  Val MAAE: 29.8130


Epoch 50/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.08it/s, loss=6.7962, angle_loss=13.4089, lr=0.000041] 
Epoch 50/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.61it/s, val_loss=24.2816]


Epoch 50/60
  Train Loss: 6.1097 (Angle: 12.1029, Circle: 0.1166)
  Val MAAE: 29.6281


Epoch 51/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.09it/s, loss=28.2478, angle_loss=55.5473, lr=0.000033]
Epoch 51/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.57it/s, val_loss=23.3980]


Epoch 51/60
  Train Loss: 5.2991 (Angle: 10.5038, Circle: 0.0944)
  Val MAAE: 29.9089


Epoch 52/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.09it/s, loss=4.6913, angle_loss=9.3390, lr=0.000027]  
Epoch 52/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.58it/s, val_loss=23.4580]


Epoch 52/60
  Train Loss: 5.6719 (Angle: 11.2447, Circle: 0.0990)
  Val MAAE: 29.2683


Epoch 53/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=5.1557, angle_loss=10.2575, lr=0.000020] 
Epoch 53/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s, val_loss=23.3829]


Epoch 53/60
  Train Loss: 4.5634 (Angle: 9.0585, Circle: 0.0682)
  Val MAAE: 28.7269
  Saved best model (MAAE: 28.7269)


Epoch 54/60 [Train]: 100%|██████████| 68/68 [00:34<00:00,  1.99it/s, loss=7.0336, angle_loss=13.9154, lr=0.000015] 
Epoch 54/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.59it/s, val_loss=23.4525]


Epoch 54/60
  Train Loss: 4.9430 (Angle: 9.8072, Circle: 0.0788)
  Val MAAE: 28.7971


Epoch 55/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=7.7289, angle_loss=15.2837, lr=0.000010] 
Epoch 55/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s, val_loss=23.0305]


Epoch 55/60
  Train Loss: 5.6133 (Angle: 11.1278, Circle: 0.0989)
  Val MAAE: 28.4882
  Saved best model (MAAE: 28.4882)


Epoch 56/60 [Train]: 100%|██████████| 68/68 [00:36<00:00,  1.86it/s, loss=7.5578, angle_loss=14.9532, lr=0.000007] 
Epoch 56/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.49it/s, val_loss=22.9142]


Epoch 56/60
  Train Loss: 4.4992 (Angle: 8.9330, Circle: 0.0654)
  Val MAAE: 27.9167
  Saved best model (MAAE: 27.9167)


Epoch 57/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.10it/s, loss=6.5487, angle_loss=12.9489, lr=0.000004] 
Epoch 57/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=22.6347]


Epoch 57/60
  Train Loss: 4.9105 (Angle: 9.7400, Circle: 0.0809)
  Val MAAE: 28.0832


Epoch 58/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.09it/s, loss=5.2121, angle_loss=10.3680, lr=0.000002] 
Epoch 58/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.63it/s, val_loss=22.6544]


Epoch 58/60
  Train Loss: 5.9642 (Angle: 11.8162, Circle: 0.1123)
  Val MAAE: 29.0653


Epoch 59/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.07it/s, loss=8.4401, angle_loss=16.6733, lr=0.000000] 
Epoch 59/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.28it/s, val_loss=22.3818]


Epoch 59/60
  Train Loss: 5.1313 (Angle: 10.1734, Circle: 0.0891)
  Val MAAE: 28.6878


Epoch 60/60 [Train]: 100%|██████████| 68/68 [00:32<00:00,  2.09it/s, loss=4.3060, angle_loss=8.5531, lr=0.000000]  
Epoch 60/60 [Val]: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s, val_loss=22.7880]


Epoch 60/60
  Train Loss: 5.7940 (Angle: 11.4813, Circle: 0.1067)
  Val MAAE: 27.8772
  Saved best model (MAAE: 27.8772)

Training completed!
Best validation MAAE: 27.8772


## Predicting and saving in csv

In [16]:
import os
import numpy as np
import pandas as pd
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm

# ---- Paths ----
VAL_CSV       = '/kaggle/input/iiith-images-latlong-smai/labels_val_updated.csv'
VAL_IMG_DIR   = '/kaggle/input/iiith-images-latlong-smai/images_val/images_val'
TEST_IMG_DIR  = '/kaggle/input/iiith-images-latlong-smai/images_test/images_test'
ANGLE_MODEL_PATH = '/kaggle/working/efficientnet_angle_regressor.pt'
OUTPUT_CSV    = '/kaggle/working/final_predictions.csv'

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ---- Transforms ----
val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# ---- TestImageDataset (same) ----
class TestImageDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.files = sorted(os.listdir(img_dir))
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(os.path.join(self.img_dir, self.files[idx])).convert('RGB')
        if self.transform: img = self.transform(img)
        return img, idx

# ---- Load Angle Regressor ----
angle_model = AngleRegressor()
ckpt = torch.load(ANGLE_MODEL_PATH, map_location=DEVICE)
angle_model.load_state_dict(ckpt['model_state_dict'])
angle_model.to(DEVICE).eval()
# apply EMA if present
if 'ema_shadow' in ckpt:
    for n, p in angle_model.named_parameters():
        if n in ckpt['ema_shadow']:
            p.data = ckpt['ema_shadow'][n].clone()

# ---- Dataloaders ----
val_ds  = CampusDataset(VAL_CSV, VAL_IMG_DIR, transform=val_tf, is_val=True)
test_ds = TestImageDataset(TEST_IMG_DIR, transform=val_tf)
val_loader  = DataLoader(val_ds,  batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=4)

# ---- Predict Function ----
def predict_angles(angle_model, dataloader):
    preds = np.zeros(len(dataloader.dataset), dtype=np.float32)
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Predicting"):
            # Unpack img batch and index batch regardless of val vs test
            if len(batch) == 2:
                imgs, indices = batch               # test_loader: (img, idx)
            else:
                imgs, *_, indices = batch           # val_loader: (img, angle, sin, cos, idx)
            
            imgs = imgs.to(DEVICE)

            # Predict angles
            ang, _, _ = angle_model(imgs)

            # Store into preds array
            for i, idx in enumerate(indices):
                preds[idx] = ang[i].item()
    return preds

# ---- Run Prediction & Save ----
val_preds  = predict_angles(angle_model, val_loader)
test_preds = predict_angles(angle_model, test_loader)

# combine with correct indexing
all_preds = np.concatenate([val_preds, test_preds], axis=0)
df = pd.DataFrame({'id': np.arange(len(all_preds)), 'angle': all_preds})
df.to_csv(OUTPUT_CSV, index=False)

print(f" Saved predictions to {OUTPUT_CSV}")
print(f" Val (0–{len(val_preds)-1}): mean={val_preds.mean():.2f}, min={val_preds.min():.2f}, max={val_preds.max():.2f}")
print(f" Test ({len(val_preds)}–{len(all_preds)-1}): mean={test_preds.mean():.2f}, min={test_preds.min():.2f}, max={test_preds.max():.2f}")


  ckpt = torch.load(ANGLE_MODEL_PATH, map_location=DEVICE)
Predicting: 100%|██████████| 12/12 [00:01<00:00,  7.42it/s]
Predicting: 100%|██████████| 12/12 [00:01<00:00,  7.97it/s]

 Saved predictions to /kaggle/working/final_predictions.csv
 Val (0–368): mean=181.66, min=0.07, max=359.70
 Test (369–737): mean=187.17, min=2.69, max=359.60



