In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from torch import nn, optim
from PIL import Image
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from torchvision.models import vit_b_16, ViT_B_16_Weights
from torchvision.transforms.functional import InterpolationMode

## Configurations

In [14]:
# Hyperparameter configuration class
class Config:
    BATCH_SIZE = 16
    NUM_WORKERS = 0
    TRAIN_SPLIT = 0.8
    IMG_SIZE = 384
    LEARNING_RATE = 3e-3
    DROPOUT = 0.4
    NUM_NEURONS = 256  # Number of neurons in the hidden layer
    EPOCHS_FEATURE_EXTRACTION = 15
    EPOCHS_FINE_TUNING = 35
    FINE_TUNE_LR = 3e-4
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    MODEL_SAVE_PATH = "best_model.pth"

## Model Architecture

In [3]:
weights = ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1

# Define the model
class LatitudeLongitudeModel(nn.Module):
    def __init__(self):
        super(LatitudeLongitudeModel, self).__init__()

        # Load a pretrained EfficientNet model
        self.base_model = vit_b_16(weights=weights)
        
        # Replace the classifier with a custom head
        self.base_model.heads = nn.Sequential(
            nn.Linear(self.base_model.heads[0].in_features, Config.NUM_NEURONS),
            nn.ReLU(),
            nn.Dropout(Config.DROPOUT),
            nn.Linear(Config.NUM_NEURONS, 2)  # 2 outputs: latitude and longitude
        )

    def forward(self, x):
        return self.base_model(x)
    
# Define transformations
transform = weights.transforms() # Predefined transforms that match the pretrained model

# Create Data Loader and Preprocessing 

In [4]:
latitude_min = 41.1001236366018
latitude_range = 0.010612474509322567
longitude_min = 29.015327288005498
longitude_range = 0.021001475024995386

In [15]:
# Custom dataset class
class CampusDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file, sep=";")
        
        self.data['latitude'] = (self.data['latitude'] - latitude_min) / latitude_range
        self.data['longitude'] = (self.data['longitude'] - longitude_min) / longitude_range
        
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Load image and labels
        img_path = f"{self.img_dir}/{self.data.iloc[idx]['filename']}"
        image = Image.open(img_path).convert("RGB")
        latitude = self.data.iloc[idx]["latitude"]
        longitude = self.data.iloc[idx]["longitude"]

        # Apply transformations if specified
        if self.transform:
            image = self.transform(image)

        # Return image and target (latitude, longitude)
        target = torch.tensor([latitude, longitude], dtype=torch.float32)
        return image, target

# Initialize dataset with training data
csv_file = f"data_{Config.IMG_SIZE}/train.csv"
img_dir = f"data_{Config.IMG_SIZE}/train"

# Full dataset and split
full_dataset = CampusDataset(csv_file=csv_file, img_dir=img_dir, transform=transform)

train_size = int(Config.TRAIN_SPLIT * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create DataLoaders for train and validation sets
train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True, num_workers=Config.NUM_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=Config.BATCH_SIZE, shuffle=False, num_workers=Config.NUM_WORKERS)

# Print dataset sizes
print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

Train dataset size: 3500
Validation dataset size: 875


## RMSEHaversineLoss Loss

In [6]:
class RMSEHaversineLoss(nn.Module):
    def __init__(self):
        super(RMSEHaversineLoss, self).__init__()
        self.earth_radius = 6371  # Radius of the Earth in kilometers

    def forward(self, preds, targets):
        # Ensure tensors are on the same device
        preds = preds.to(targets.device)

        # Convert latitude and longitude from degrees to radians
        lat1, lon1 = preds[:, 0] * (torch.pi / 180), preds[:, 1] * (torch.pi / 180)
        lat2, lon2 = targets[:, 0] * (torch.pi / 180), targets[:, 1] * (torch.pi / 180)

        # Compute haversine distance
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        a = torch.sin(dlat / 2) ** 2 + torch.cos(lat1) * torch.cos(lat2) * torch.sin(dlon / 2) ** 2
        c = 2 * torch.asin(torch.sqrt(torch.clamp(a, 0, 1)))  # Use clamp for stability

        # Compute distances
        distances = self.earth_radius * c

        # Compute RMSE
        rmse = torch.sqrt(torch.mean(distances ** 2))
        return rmse

## Model Training

In [17]:
# Initialize the model, loss, and optimizer
model = LatitudeLongitudeModel().to(Config.DEVICE)
criterion = RMSEHaversineLoss().to(Config.DEVICE)

def train_one_epoch(model, dataloader, criterion, optimizer):
    model.train()
    epoch_loss = 0
    for images, targets in tqdm(dataloader, desc="Training Epoch"):
        images, targets = images.to(Config.DEVICE), targets.to(Config.DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        targets = targets * torch.tensor([latitude_range, longitude_range], device=Config.DEVICE) + torch.tensor([latitude_min, longitude_min], device=Config.DEVICE)
        outputs = outputs * torch.tensor([latitude_range, longitude_range], device=Config.DEVICE) + torch.tensor([latitude_min, longitude_min], device=Config.DEVICE)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(dataloader)

def validate(model, dataloader, criterion):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for images, targets in tqdm(dataloader, desc="Validation Epoch"):
            images, targets = images.to(Config.DEVICE), targets.to(Config.DEVICE)
            outputs = model(images)
            targets = targets * torch.tensor([latitude_range, longitude_range], device=Config.DEVICE) + torch.tensor([latitude_min, longitude_min], device=Config.DEVICE)
            outputs = outputs * torch.tensor([latitude_range, longitude_range], device=Config.DEVICE) + torch.tensor([latitude_min, longitude_min], device=Config.DEVICE)
            loss = criterion(outputs, targets)
            epoch_loss += loss.item()
    return epoch_loss / len(dataloader) 

RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


## Feature Extraction

In [None]:
# Feature Extraction
for param in model.base_model.parameters():
    param.requires_grad = False  # Freeze base model parameters
for param in model.base_model.heads.parameters():
    param.requires_grad = True  # Keep classifier 
optimizer = optim.Adam(model.base_model.heads.parameters(), lr=Config.LEARNING_RATE)

print("Starting Feature Extraction Phase")
for epoch in range(Config.EPOCHS_FEATURE_EXTRACTION):
    train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
    val_loss = validate(model, val_loader, criterion)
    print(f"Feature Extraction - Epoch {epoch+1}/{Config.EPOCHS_FEATURE_EXTRACTION}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

In [9]:
model.load_state_dict(torch.load(Config.MODEL_SAVE_PATH))  # Load the best model
model

  model.load_state_dict(torch.load(Config.MODEL_SAVE_PATH))  # Load the best model


LatitudeLongitudeModel(
  (base_model): VisionTransformer(
    (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (encoder): Encoder(
      (dropout): Dropout(p=0.0, inplace=False)
      (layers): Sequential(
        (encoder_layer_0): EncoderBlock(
          (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (self_attention): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (dropout): Dropout(p=0.0, inplace=False)
          (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (mlp): MLPBlock(
            (0): Linear(in_features=768, out_features=3072, bias=True)
            (1): GELU(approximate='none')
            (2): Dropout(p=0.0, inplace=False)
            (3): Linear(in_features=3072, out_features=768, bias=True)
            (4): Dropout(p=0.0, inplace=False)
          )
        )
        (encoder_layer_1): EncoderBlock(
     

## Fine Tuning

In [16]:
# Fine-Tuning
# Unfreeze more layers or blocks for fine-tuning (e.g., blocks 4 to 7)
for name, param in model.base_model.named_parameters():
    if "encoder_layer_8" in name or "encoder_layer_9" in name or "encoder_layer_10" in name or "encoder_layer_11" in name:  # Fine-tune blocks 9 to 11
        param.requires_grad = True

optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=Config.FINE_TUNE_LR)

print("Starting Fine-Tuning Phase")
best_val_loss = float('inf')
for epoch in range(Config.EPOCHS_FINE_TUNING):
    train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
    val_loss = validate(model, val_loader, criterion)

    # Save the model if validation loss improves
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), Config.MODEL_SAVE_PATH)
        print(f"Model saved at epoch {epoch+1} with validation loss: {val_loss:.4f}")

    print(f"Fine-Tuning - Epoch {epoch+1}/{Config.EPOCHS_FINE_TUNING}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

Starting Fine-Tuning Phase


Training Epoch:   0%|          | 0/219 [00:00<?, ?it/s]

Validation Epoch:   0%|          | 0/55 [00:00<?, ?it/s]

Model saved at epoch 1 with validation loss: 0.3269
Fine-Tuning - Epoch 1/35, Train Loss: 32.4115, Validation Loss: 0.3269


Training Epoch:   0%|          | 0/219 [00:00<?, ?it/s]

Validation Epoch:   0%|          | 0/55 [00:00<?, ?it/s]

Model saved at epoch 2 with validation loss: 0.3096
Fine-Tuning - Epoch 2/35, Train Loss: 27.8241, Validation Loss: 0.3096


Training Epoch:   0%|          | 0/219 [00:00<?, ?it/s]

Validation Epoch:   0%|          | 0/55 [00:00<?, ?it/s]

Model saved at epoch 3 with validation loss: 0.2826
Fine-Tuning - Epoch 3/35, Train Loss: 24.9128, Validation Loss: 0.2826


Training Epoch:   0%|          | 0/219 [00:00<?, ?it/s]

Validation Epoch:   0%|          | 0/55 [00:00<?, ?it/s]

Model saved at epoch 4 with validation loss: 0.2161
Fine-Tuning - Epoch 4/35, Train Loss: 21.2165, Validation Loss: 0.2161


Training Epoch:   0%|          | 0/219 [00:00<?, ?it/s]

Validation Epoch:   0%|          | 0/55 [00:00<?, ?it/s]

Fine-Tuning - Epoch 5/35, Train Loss: 20.2239, Validation Loss: 0.2261


Training Epoch:   0%|          | 0/219 [00:00<?, ?it/s]

RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
