In [1]:
import torch
import torch.nn as nn
from torchvision import transforms, models
import pandas as pd
import numpy as np
from PIL import Image
import os
from tqdm import tqdm

# Constants
IMAGE_SIZE = (256, 256)
BATCH_SIZE = 32

# Fixed paths - adjust these according to your setup
MODEL_PATH = '/kaggle/input/lat-long-precictor/pytorch/default/1/best_geo_model.pth'
TEST_REGIONS_PATH = '/kaggle/input/predicted-regions-test2/predicted_regions_test.csv'
TEST_IMAGES_DIR = '/kaggle/input/smai-proj-test-set/images_test'
OUTPUT_FILE = 'predictions_test.csv'

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define the model architecture (needs to match the saved model)
class GeoModel(nn.Module):
    def __init__(self, num_regions, backbone_name='efficientnet_b1'):
        super(GeoModel, self).__init__()
        
        # Select backbone
        if backbone_name == 'efficientnet_b1':
            self.backbone = models.efficientnet_b1(pretrained=True)
            feature_dim = 1280
        else:
            raise ValueError(f"Unsupported backbone: {backbone_name}")
            
        self.backbone.classifier = nn.Identity()  # Remove classifier
        
        # Region embedding
        self.region_embedding = nn.Embedding(num_regions, 128)
        
        # Region-based attention mechanism
        self.attention = nn.Sequential(
            nn.Linear(128, 512),
            nn.ReLU(),
            nn.Linear(512, feature_dim),
            nn.Sigmoid()  # Output attention weights in [0,1]
        )
        
        # Feature fusion with attention
        self.fusion = nn.Sequential(
            nn.Linear(feature_dim + 128, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(768, 384),
            nn.BatchNorm1d(384),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(384, 64),
            nn.ReLU(),
            nn.Dropout(0.1)
        )
        
        # Separate heads for latitude and longitude
        self.lat_head = nn.Linear(64, 1)
        self.lon_head = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x, regions):
        # Extract image features
        img_features = self.backbone(x)
        
        # Get region embeddings
        region_features = self.region_embedding(regions)
        
        # Generate attention weights based on region
        attention_weights = self.attention(region_features)
        
        # Apply attention to image features
        attended_features = img_features * attention_weights
        
        # Concatenate attended features with region embedding
        combined_features = torch.cat([attended_features, region_features], dim=1)
        
        # Shared feature processing
        shared_features = self.fusion(combined_features)
        
        # Separate predictions for latitude and longitude
        lat = self.sigmoid(self.lat_head(shared_features)).squeeze(-1)
        lon = self.sigmoid(self.lon_head(shared_features)).squeeze(-1)
        
        # Combine predictions
        return torch.stack([lat, lon], dim=1)

# Dataset class for test data
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, df, img_dir):
        self.df = df
        self.img_dir = img_dir
        self.transform = transforms.Compose([
            transforms.Resize(IMAGE_SIZE),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.df.iloc[idx]['filename'])
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)
        
        region = torch.tensor(self.df.iloc[idx]['region_encoded'], dtype=torch.long)
        
        return image, region, self.df.iloc[idx]['filename']

def main():
    print(f"Loading model from {MODEL_PATH}")
    
    # Load the model checkpoint
    checkpoint = torch.load(MODEL_PATH, map_location=device)
    
    # Extract parameters from checkpoint
    lat_min = checkpoint['lat_min']
    lat_max = checkpoint['lat_max']
    lon_min = checkpoint['lon_min']
    lon_max = checkpoint['lon_max']
    region_encoder = checkpoint['region_encoder']
    num_regions = len(region_encoder.classes_)
    
    print(f"Model parameters: lat_min={lat_min}, lat_max={lat_max}, lon_min={lon_min}, lon_max={lon_max}")
    print(f"Number of regions: {num_regions}")
    
    # Initialize model
    model = GeoModel(num_regions, backbone_name='efficientnet_b1').to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    
    print(f"Loading test data from {TEST_REGIONS_PATH}")
    
    # Load test data
    test_df = pd.read_csv(TEST_REGIONS_PATH)
    
    # Encode regions
    test_df['region_encoded'] = region_encoder.transform(test_df['Region_ID'])
    
    print(f"Creating dataset with {len(test_df)} images")
    
    # Create test dataset and dataloader
    test_dataset = TestDataset(test_df, TEST_IMAGES_DIR)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    # Make predictions
    all_preds = []
    all_filenames = []
    
    print("Making predictions...")
    with torch.no_grad():
        for images, regions, filenames in tqdm(test_loader):
            images = images.to(device)
            regions = regions.to(device)
            outputs = model(images, regions)
            
            # Denormalize predictions
            pred_lats = outputs[:, 0].cpu().numpy() * (lat_max - lat_min) + lat_min
            pred_lons = outputs[:, 1].cpu().numpy() * (lon_max - lon_min) + lon_min
            
            for i in range(len(filenames)):
                all_preds.append((pred_lats[i], pred_lons[i]))
                all_filenames.append(filenames[i])
    
    # Create dataframe with predictions
    predictions_df = pd.DataFrame({
        'filename': all_filenames,
        'Latitude': [pred[0] for pred in all_preds],
        'Longitude': [pred[1] for pred in all_preds]
    })
    
    # Save predictions
    predictions_df.to_csv(OUTPUT_FILE, index=False)
    print(f"Predictions saved to {OUTPUT_FILE}")
    
    # Display sample predictions
    print("\nSample predictions:")
    print(predictions_df.head(5))

if __name__ == "__main__":
    main()

Using device: cuda
Loading model from /kaggle/input/lat-long-precictor/pytorch/default/1/best_geo_model.pth


  checkpoint = torch.load(MODEL_PATH, map_location=device)


Model parameters: lat_min=217805, lat_max=221696, lon_min=140523, lon_max=146067
Number of regions: 15


Downloading: "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 169MB/s]


Loading test data from /kaggle/input/predicted-regions-test2/predicted_regions_test.csv
Creating dataset with 369 images
Making predictions...


100%|██████████| 12/12 [00:06<00:00,  1.95it/s]

Predictions saved to predictions_test.csv

Sample predictions:
       filename       Latitude      Longitude
0  img_0000.jpg  219614.659668  144893.706055
1  img_0001.jpg  219190.734497  145357.229980
2  img_0002.jpg  219874.955078  144570.798584
3  img_0003.jpg  219995.219238  142058.842651
4  img_0004.jpg  220219.393555  142283.443481



