In [None]:
import os
import pandas as pd
import random
import urllib.request
from tqdm import tqdm

In [None]:
# ---------------------------------------------
# CONFIGURATION
# ---------------------------------------------
# Adjust these paths as per your setup
TRAIN_CSV = "/kaggle/input/dataset-google-landmark/train.csv"
TRAIN_CLEAN_CSV = "/kaggle/input/dataset-google-landmark/train_clean.csv"
OUTPUT_DIR = "/kaggle/tmp/landmark_images"
NUM_CLASSES = 50  # top N landmark classes
IMAGES_PER_CLASS = 200  # images per class


In [None]:
# ---------------------------------------------
# SETUP
# ---------------------------------------------
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# ---------------------------------------------
# LOAD METADATA
# ---------------------------------------------
print("[INFO] Loading metadata...")
train_df = pd.read_csv(TRAIN_CSV)
clean_df = pd.read_csv(TRAIN_CLEAN_CSV)

# Parse image IDs from string to list
clean_df["images"] = clean_df["images"].apply(lambda x: x.split())

# Sort by number of images and select top N classes
clean_df["count"] = clean_df["images"].apply(len)
top_classes = clean_df.sort_values(by="count", ascending=False).head(NUM_CLASSES)

# Create a flat list of (image_id, landmark_id) pairs
print(f"[INFO] Selecting {IMAGES_PER_CLASS} images from {NUM_CLASSES} classes...")
selected_images = []
for _, row in top_classes.iterrows():
    landmark_id = row["landmark_id"]
    image_ids = row["images"]
    sampled = random.sample(image_ids, min(IMAGES_PER_CLASS, len(image_ids)))
    for img_id in sampled:
        selected_images.append((img_id, landmark_id))

selected_df = pd.DataFrame(selected_images, columns=["id", "landmark_id"])

# Merge to get URLs
merged_df = pd.merge(selected_df, train_df, on=["id", "landmark_id"], how="left")

In [None]:
# ---------------------------------------------
# DOWNLOAD FUNCTION
# ---------------------------------------------
import time
import urllib.error
from urllib.request import Request, urlopen

def download_image(row, out_dir, retries=3, delay=2):
    img_id = row["id"]
    url = row["url"]
    ext = url.split(".")[-1].lower()
    if ext not in ["jpg", "jpeg", "png"]:
        ext = "jpg"
    save_path = os.path.join(out_dir, f"{img_id}.{ext}")

    if os.path.exists(save_path):
        return  # skip if already downloaded

    headers = {
        'User-Agent': 'Mozilla/5.0 (compatible; landmark-dataset-downloader/1.0; +https://example.com)'
    }

    for attempt in range(retries):
        try:
            req = Request(url, headers=headers)
            with urlopen(req, timeout=10) as response, open(save_path, 'wb') as out_file:
                out_file.write(response.read())
            return
        except urllib.error.HTTPError as e:
            if e.code == 429:
                print(f"[RETRY] {img_id}: Too many requests. Waiting {delay}s...")
                time.sleep(delay)
                delay *= 2  # exponential backoff
            else:
                print(f"[ERROR] {img_id}: HTTP error {e.code}")
                return
        except Exception as e:
            print(f"[ERROR] {img_id}: {e}")
            return



In [None]:
# ---------------------------------------------
# DOWNLOAD IMAGES
# ---------------------------------------------
print(f"[INFO] Downloading {len(merged_df)} images to: {OUTPUT_DIR}")
for _, row in tqdm(merged_df.iterrows(), total=len(merged_df)):
    download_image(row, OUTPUT_DIR)

# ---------------------------------------------
# SAVE IMAGE METADATA
# ---------------------------------------------
print("[INFO] Saving metadata...")
merged_df.to_csv(os.path.join(OUTPUT_DIR, "downloaded_metadata.csv"), index=False)

print("[DONE] Dataset curation complete.")

In [None]:
import os
import json
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image
from tqdm import tqdm

# ---------------------------------------------
# CONFIGURATION
# ---------------------------------------------
INPUT_DIR = "/kaggle/tmp/landmark_images"
OUTPUT_JSON = "/kaggle/working/landmarks_coco.json"
IMAGE_EXT = [".jpg", ".jpeg", ".png"]

# ---------------------------------------------
# HELPER FUNCTIONS
# ---------------------------------------------
def load_images(input_dir):
    return [os.path.join(input_dir, f) for f in os.listdir(input_dir) if os.path.splitext(f)[1].lower() in IMAGE_EXT]

def generate_annotations(model, image_paths):
    annotations = []
    images = []
    ann_id = 1
    transform = transforms.Compose([
        transforms.ToTensor()
    ])

    for img_id, path in enumerate(tqdm(image_paths)):
        image = Image.open(path).convert("RGB")
        tensor = transform(image).unsqueeze(0).cuda()
        result = model(tensor)[0]

        filename = os.path.basename(path)
        width, height = image.size
        images.append({
            "id": img_id,
            "file_name": filename,
            "width": width,
            "height": height
        })

        for box, score in zip(result['boxes'], result['scores']):
            if score.item() < 0.5:
                continue
            x_min, y_min, x_max, y_max = box.tolist()
            annotations.append({
                "id": ann_id,
                "image_id": img_id,
                "category_id": 1,
                "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],
                "area": (x_max - x_min) * (y_max - y_min),
                "iscrowd": 0
            })
            ann_id += 1

    return images, annotations

# ---------------------------------------------
# LOAD MODEL
# ---------------------------------------------
model = fasterrcnn_resnet50_fpn(pretrained=True).cuda()
model.eval()

# ---------------------------------------------
# RUN DETECTION & SAVE COCO JSON
# ---------------------------------------------
image_paths = load_images(INPUT_DIR)
images, annotations = generate_annotations(model, image_paths)

coco_json = {
    "info": {"description": "Landmark Dataset with Bounding Boxes"},
    "licenses": [],
    "images": images,
    "annotations": annotations,
    "categories": [{"id": 1, "name": "landmark"}]
}

with open(OUTPUT_JSON, "w") as f:
    json.dump(coco_json, f)

print("[DONE] COCO JSON saved.")

### Step-by-Step Training Pipeline

In [None]:
import torch

print("CUDA Available:", torch.cuda.is_available())
print("CUDA Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU only")


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


In [None]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)


In [None]:
def train_baseline(model, dataloader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss, correct, total = 0.0, 0, 0
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        acc = correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss:.4f}, Accuracy: {acc:.4f}")


In [None]:
inputs, labels = next(iter(dataloader))
print("Inputs on:", inputs.device)
print("Labels on:", labels.device)


In [None]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset
import os
import json
from PIL import Image

# ---------------------------------------------
# DEVICE SETUP
# ---------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ---------------------------------------------
# COCO Dataset Class
# ---------------------------------------------
class LandmarkDataset(Dataset):
    def __init__(self, coco_json_path, image_dir, transform=None):
        with open(coco_json_path, 'r') as f:
            self.coco = json.load(f)
        self.image_dir = image_dir
        self.transform = transform
        self.annotations = self.coco['annotations']
        self.images = {img['id']: img for img in self.coco['images']}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        ann = self.annotations[idx]
        img_info = self.images[ann['image_id']]
        img_path = os.path.join(self.image_dir, img_info['file_name'])
        img = Image.open(img_path).convert("RGB")

        # Crop to bounding box
        x, y, w, h = ann['bbox']
        img = img.crop((x, y, x + w, y + h))

        if self.transform:
            img = self.transform(img)

        label = ann['category_id']
        return img, label

# ---------------------------------------------
# Transformations
# ---------------------------------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# ---------------------------------------------
# Training Function
# ---------------------------------------------
def train_baseline(model, dataloader, criterion, optimizer, epochs=3):
    model.train()
    for epoch in range(epochs):
        running_loss, correct, total = 0.0, 0, 0
        for i, (inputs, labels) in enumerate(dataloader):
            inputs, labels = inputs.to(device), labels.to(device)

            if i == 0:  # Print device info for first batch
                print(f"Inputs on: {inputs.device}, Labels on: {labels.device}, Model on: {next(model.parameters()).device}")

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        acc = correct / total
        print(f"Epoch [{epoch+1}/{epochs}] Loss: {running_loss:.4f}, Accuracy: {acc:.4f}")

# ---------------------------------------------
# Parameters
# ---------------------------------------------
num_classes = 50  # Adjust based on your dataset

# ---------------------------------------------
# Load Dataset & DataLoader
# ---------------------------------------------
coco_json_path = "/kaggle/working/landmarks_coco.json"
image_dir = "/kaggle/tmp/landmark_images"

dataset = LandmarkDataset(coco_json_path, image_dir, transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)

# ---------------------------------------------
# Load and Prepare ResNet-50 Model
# ---------------------------------------------
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# ---------------------------------------------
# Train the Model
# ---------------------------------------------
train_baseline(model, dataloader, criterion, optimizer)


In [None]:
torch.save(model, "/kaggle/working/resnet50_landmarks.pth")
#This saves the entire model object (including architecture and weights). To load it later:
# model = torch.load("/kaggle/tmp/resnet50_landmarks.pth")
# model.eval()


In [None]:
#Option 2: Save only the model weights (recommended for portability)
torch.save(model.state_dict(), "/kaggle/working/resnet50_landmarks_state_dict.pth")
#To load it later:

# model = models.resnet50(pretrained=False)
# model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
# model.load_state_dict(torch.load("/kaggle/tmp/resnet50_landmarks_state_dict.pth"))
# model.to(device)
# model.eval()


In [None]:
# Save the trained model
torch.save(model.state_dict(), "/kaggle/working/resnet50_landmarks_state_dict.pth")
print("Model weights saved to /kaggle/working/resnet50_landmarks_state_dict.pth")


In [None]:
import torch.onnx

# Dummy input to trace the model
dummy_input = torch.randn(1, 3, 224, 224).to(device)

# Path to save the ONNX model
onnx_path = "/kaggle/working/resnet50_landmarks.onnx"

# Export the model
torch.onnx.export(
    model,                      # model being run
    dummy_input,                # model input (or a tuple for multiple inputs)
    onnx_path,                  # where to save the model
    export_params=True,         # store the trained parameter weights
    opset_version=11,           # the ONNX version to export the model to
    do_constant_folding=True,   # whether to execute constant folding for optimization
    input_names=['input'],      # the model’s input names
    output_names=['output'],    # the model’s output names
    dynamic_axes={              # allow variable batch size
        'input': {0: 'batch_size'},
        'output': {0: 'batch_size'}
    }
)

print(f"Model successfully exported to ONNX format at: {onnx_path}")


###  Dataset Loader with Bounding Box Cropping and traditional learning

In [None]:
from PIL import Image
import torchvision.transforms as transforms

def crop_with_bbox(image, bbox):
    x, y, w, h = bbox
    return image.crop((x, y, x + w, y + h))

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

def load_dataset_from_coco(coco_json_path, images_dir):
    from pycocotools.coco import COCO
    coco = COCO(coco_json_path)
    dataset = []

    for img_id in coco.getImgIds():
        img_info = coco.loadImgs(img_id)[0]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)

        image_path = os.path.join(images_dir, img_info['file_name'])
        image = Image.open(image_path).convert("RGB")
        for ann in anns:
            cropped = crop_with_bbox(image, ann["bbox"])
            label = ann["category_id"]
            dataset.append((transform(cropped), label))
    return dataset


In [None]:
import torch.nn as nn
from torchvision import models

def get_cnn_model(model_name="resnet50", num_classes=100):
    if model_name == "resnet50":
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif model_name == "vgg16":
        model = models.vgg16(pretrained=True)
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)
    return model


### Few-Shot Learning (ProtoNet, SiameseNet)


In [None]:
class ProtoNet(nn.Module):
    def __init__(self, encoder):
        super().__init__()
        self.encoder = encoder

    def forward(self, support, query):
        support_emb = self.encoder(support)
        query_emb = self.encoder(query)
        proto = support_emb.view(n_way, k_shot, -1).mean(dim=1)
        dists = euclidean_dist(query_emb, proto)
        return -dists


In [None]:
def euclidean_dist(x, y):
    n, m = x.size(0), y.size(0)
    return ((x.unsqueeze(1) - y.unsqueeze(0))**2).sum(2)


### Training and Evaluation

In [None]:
from sklearn.metrics import accuracy_score, classification_report

def evaluate_model(model, dataloader, device):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = outputs.argmax(dim=1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    print(classification_report(y_true, y_pred))
