In [1]:
import os
import math
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn.preprocessing import LabelEncoder
from torch.cuda.amp import GradScaler, autocast

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Reproducibility
def seed_everything(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    np.random.seed(seed)

seed_everything()

# Timestamp parser
def parse_timestamp(timestamp_str):
    if pd.isna(timestamp_str):
        return 0
    try:
        parts = timestamp_str.split(':')
        if len(parts) == 2:
            h, m = map(int, parts)
            return h * 60 + m
        elif len(parts) == 3:
            h, m, s = map(int, parts)
            return h * 60 + m + s / 60
    except:
        return 0
    return 0

# Dataset
enabled_meta = ['timestamp', 'latitude', 'longitude', 'Region_ID', 'filename', 'angle']
class MultimodalAngleDataset(Dataset):
    def __init__(self, df, img_dir, region_encoder, transform=None):
        self.df = df.copy().reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
        self.region_encoder = region_encoder

        # parse and fill
        self.df['time_minutes'] = self.df['timestamp'].apply(parse_timestamp)
        for c in ['latitude', 'longitude', 'time_minutes']:
            self.df[c].fillna(self.df[c].median(), inplace=True)
        self.df['region_idx'] = self.region_encoder.transform(self.df['Region_ID'])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.img_dir, row['filename'])).convert('RGB')
        if self.transform:
            img = self.transform(img)

        # angle target
        angle = float(row['angle'])
        sin_a = math.sin(math.radians(angle))
        cos_a = math.cos(math.radians(angle))
        target = torch.tensor([cos_a, sin_a], dtype=torch.float32)

        # meta features
        t = row['time_minutes'] / (24*60) * 2 * math.pi
        ts, tc = math.sin(t), math.cos(t)
        lat_rad = row['latitude'] / 180 * math.pi
        ls, lc = math.sin(lat_rad), math.cos(lat_rad)
        lon_rad = row['longitude'] / 360 * 2 * math.pi
        los, loc = math.sin(lon_rad), math.cos(lon_rad)
        meta = torch.tensor([ts, tc, ls, lc, los, loc], dtype=torch.float32)

        region_idx = torch.tensor(row['region_idx'], dtype=torch.long)
        return img, meta, region_idx, target

# Transforms (must be multiple of 14)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ColorJitter(0.1, 0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load data
root = "/kaggle/input/smainewdataset/Phase_2_data"
train_df = pd.read_csv(os.path.join(root, "labels_train.csv"))
val_df   = pd.read_csv(os.path.join(root, "labels_val.csv"))
train_img_dir = os.path.join(root, "images_train/images_train")
val_img_dir   = os.path.join(root, "images_val/images_val")

# Region encoder
region_encoder = LabelEncoder()
region_encoder.fit(pd.concat([train_df['Region_ID'], val_df['Region_ID']]))
num_regions = len(region_encoder.classes_)

# DataLoaders
train_ds = MultimodalAngleDataset(train_df, train_img_dir, region_encoder, train_transform)
val_ds   = MultimodalAngleDataset(val_df,   val_img_dir,   region_encoder, val_transform)
train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=4, shuffle=False, num_workers=4, pin_memory=True)

# Model definition
class DinoV2AngleRegressor(nn.Module):
    def __init__(self, num_regions, region_emb_dim=16):
        super().__init__()
        # load DINOv2 ViT-B/14
        self.backbone = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14', pretrained=True)
        embed_dim = self.backbone.embed_dim  # 768
        # freeze patch and first half blocks
        for name, module in self.backbone.named_children():
            if name == 'patch_embed':
                for p in module.parameters(): p.requires_grad=False
            if name == 'blocks':
                for i, blk in enumerate(module):
                    if i < 6:
                        for p in blk.parameters(): p.requires_grad=False
        # region embedding and meta encoder
        self.region_emb = nn.Embedding(num_regions, region_emb_dim)
        self.meta_enc = nn.Sequential(
            nn.Linear(6 + region_emb_dim, 64), nn.ReLU(),
            nn.Linear(64, 128), nn.ReLU()
        )
        # head
        self.head = nn.Sequential(
            nn.Linear(embed_dim + 128, 384), nn.ReLU(), nn.Dropout(0.1),
            nn.Linear(384, 2)
        )

    def forward(self, x_img, x_meta, region_idx):
        feats = self.backbone.forward_features(x_img)
        # unwrap dict if needed
        if isinstance(feats, dict):
            feats = feats.get('x', feats.get('x_norm', next(iter(feats.values()))))
        r = self.region_emb(region_idx)
        meta = self.meta_enc(torch.cat([x_meta, r], dim=1))
        raw = self.head(torch.cat([feats, meta], dim=1))
        norm = raw.norm(dim=1, keepdim=True).clamp(min=1e-6)
        return raw / norm

# Loss and metric
def angle_mae(pred, target):
    dots = (pred * target).sum(dim=1).clamp(-1, 1)
    return (torch.acos(dots) * 180 / math.pi).mean().item()
criterion = nn.MSELoss()

# Training function

def train_model(model, train_loader, val_loader,
                epochs=300, acc_steps=2,
                lr_backbone=5e-6, lr_head=5e-4,
                weight_decay=1e-2, min_lr=1e-7):
    model.to(device)
    # parameter groups
    backbone_params = [p for _, p in model.backbone.named_parameters() if p.requires_grad]
    head_params = list(model.head.parameters()) + \
                  list(model.region_emb.parameters()) + \
                  list(model.meta_enc.parameters())
    optimizer = torch.optim.AdamW([
        {'params': backbone_params, 'lr': lr_backbone},
        {'params': head_params,     'lr': lr_head}
    ], weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs, eta_min=min_lr)
    scaler = GradScaler()

    best_mae = float('inf')
    preds_dir = "angle_predictions"
    os.makedirs(preds_dir, exist_ok=True)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        optimizer.zero_grad()
        for i, (imgs, metas, regions, targets) in enumerate(tqdm(train_loader, desc=f"Train {epoch+1}/{epochs}")):
            imgs, metas, regions, targets = imgs.to(device), metas.to(device), regions.to(device), targets.to(device)
            with autocast():
                preds = model(imgs, metas, regions)
                loss = criterion(preds, targets) / acc_steps
            scaler.scale(loss).backward()
            running_loss += loss.item() * imgs.size(0) * acc_steps
            if (i + 1) % acc_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
        scheduler.step()
        train_loss = running_loss / len(train_loader.dataset)

        # Validation
        model.eval()
        all_p, all_t = [], []
        with torch.no_grad():
            for imgs, metas, regions, targets in tqdm(val_loader, desc="Validate"):
                imgs, metas, regions, targets = imgs.to(device), metas.to(device), regions.to(device), targets.to(device)
                p = model(imgs, metas, regions)
                all_p.append(p)
                all_t.append(targets)
        all_p = torch.cat(all_p)
        all_t = torch.cat(all_t)

        val_mae = angle_mae(all_p, all_t)
        print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val MAE={val_mae:.4f}")

        if val_mae < best_mae:
            best_mae = val_mae
            # save model
            model_path = f"best_dinov2_regressor_{best_mae:.4f}.pth"
            torch.save(model.state_dict(), model_path)
            # save predictions
            df_preds = pd.DataFrame({
                'filename': val_ds.df['filename'],
                'true_cos': all_t[:,0].cpu().numpy(),
                'true_sin': all_t[:,1].cpu().numpy(),
                'pred_cos': all_p[:,0].cpu().numpy(),
                'pred_sin': all_p[:,1].cpu().numpy(),
                'angle_true': np.degrees(np.arctan2(all_t[:,1].cpu(), all_t[:,0].cpu())),
                'angle_pred': np.degrees(np.arctan2(all_p[:,1].cpu(), all_p[:,0].cpu())),
                'error': np.abs((np.degrees(np.arctan2(all_t[:,1].cpu(), all_t[:,0].cpu())) - 
                                 np.degrees(np.arctan2(all_p[:,1].cpu(), all_p[:,0].cpu())) + 180) % 360 - 180)
            })
            csv_path = os.path.join(preds_dir, f"best_preds_{best_mae:.4f}.csv")
            df_preds.to_csv(csv_path, index=False)

    print(f"Best Val MAE: {best_mae:.4f}")
    model.load_state_dict(torch.load(model_path))
    return model

# Main run
if __name__ == '__main__':
    model = DinoV2AngleRegressor(num_regions=num_regions)
    trained_model = train_model(model, train_loader, val_loader)


In [None]:
import os
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ---------- 1. Device ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------- 2. Dataset ----------
class RegionDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.df = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.img_dir, row['filename'])).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = int(row['Region_ID']) - 1
        return img, label

# ---------- 3. Transforms ----------
val_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# ---------- 4. Load Validation Data ----------
val_csv    = "/kaggle/input/smainewdataset/Phase_2_data/labels_val.csv"
val_img_dir= "/kaggle/input/smainewdataset/Phase_2_data/images_val/images_val"
val_df     = pd.read_csv(val_csv)
val_df['Region_ID'] = val_df['Region_ID'].astype(int)

val_dataset = RegionDataset(val_df, val_img_dir, transform=val_transform)
val_loader  = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# ---------- 5. Model Builder ----------
def build_model(name='convnext_tiny', num_classes=15, pretrained=False):
    if name == 'convnext_tiny':
        m = models.convnext_tiny(pretrained=pretrained)
        in_feats = m.classifier[2].in_features
        m.classifier[2] = nn.Linear(in_feats, num_classes)
    elif name == 'efficientnet_b0':
        m = models.efficientnet_b0(pretrained=pretrained)
        in_feats = m.classifier[1].in_features
        m.classifier[1] = nn.Linear(in_feats, num_classes)
    elif name == 'resnet50':
        m = models.resnet50(pretrained=pretrained)
        in_feats = m.fc.in_features
        m.fc = nn.Linear(in_feats, num_classes)
    elif name == 'mobilenet_v3_large':
        m = models.mobilenet_v3_large(pretrained=pretrained)
        in_feats = m.classifier[3].in_features
        m.classifier[3] = nn.Linear(in_feats, num_classes)
    else:
        raise ValueError(f"Unknown model: {name}")
    return m

# ---------- 6. Load Checkpoint ----------
model_name      = 'convnext_tiny'
checkpoint_path = "/kaggle/input/region-predict-97.83/tensorflow2/default/1/region_pred_97_83.pth"

model = build_model(name=model_name, num_classes=15, pretrained=False)
state = torch.load(checkpoint_path, map_location=device)
model.load_state_dict(state)
model.to(device)
model.eval()

# ---------- 7. Inference + Metrics ----------
all_preds  = []
all_labels = []

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        preds = outputs.argmax(dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

# 8. Print results
acc = accuracy_score(all_labels, all_preds)
print(f"Validation Accuracy: {acc:.4f}\n")

print("Classification Report:")
print(classification_report(all_labels, all_preds,
      target_names=[f"Region_{i+1}" for i in range(15)]))

print("Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))




Using device: cuda


Using cache found in /root/.cache/torch/hub/facebookresearch_dinov2_main
  scaler = GradScaler()


Epoch 1/30:   0%|          | 0/3271 [00:00<?, ?it/s]

  with autocast():


Validation:   0%|          | 0/185 [00:00<?, ?it/s]

Epoch 1: Train Loss=0.7810, Val MAE=52.9516


Epoch 2/30:   0%|          | 0/3271 [00:00<?, ?it/s]