In [None]:
# ResNet50  Modello 1
import os
import json
import torch
import torchvision
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np

def get_image_paths(folder):
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.jpg')]

class ImageFolderDataset(Dataset):
    def __init__(self, paths, transform=None):
        self.paths = paths
        self.transform = transform or T.Compose([
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
        ])

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        img = Image.open(path).convert('RGB')
        return self.transform(img), path

# Load pretrained ResNet50 and remove last layer
model = torchvision.models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])
model.eval().cuda()

# Paths
gallery_folder = 'Data_example/test/gallery'
query_folder   = 'Data_example/test/query'

gallery_paths = get_image_paths(gallery_folder)
query_paths   = get_image_paths(query_folder)

# DataLoaders
batch_size = 32
gallery_loader = DataLoader(ImageFolderDataset(gallery_paths), batch_size=batch_size, shuffle=False)
query_loader   = DataLoader(ImageFolderDataset(query_paths),   batch_size=batch_size, shuffle=False)

# Extract features
@torch.no_grad()
def extract_features(loader):
    feats, paths = [], []
    for imgs, pths in loader:
        imgs = imgs.cuda()
        out = model(imgs).squeeze(-1).squeeze(-1)  # (B,2048)
        feats.append(out.cpu().numpy())
        paths.extend(pths)
    feats = np.vstack(feats)
    return feats, paths

gallery_feats, gallery_paths = extract_features(gallery_loader)
query_feats, query_paths     = extract_features(query_loader)

# Normalize
gallery_feats = gallery_feats / np.linalg.norm(gallery_feats, axis=1, keepdims=True)
query_feats   = query_feats   / np.linalg.norm(query_feats,   axis=1, keepdims=True)

# Compute cosine similarities and retrieve top-k
k = 5  # set as needed
results = []
for q_feat, q_path in zip(query_feats, query_paths):
    sims = gallery_feats.dot(q_feat)
    topk_idx = np.argsort(-sims)[:k]
    topk_paths = [gallery_paths[i] for i in topk_idx]
    results.append({
        'filename': q_path,
        'gallery_images': topk_paths
    })

# Save to JSON
with open('submission1.json', 'w') as f:
    json.dump(results, f, indent=2)

print('Saved submission.json')

In [None]:
#ResNet50 fine-tuned - modello 2
import os
import json
import torch
import torchvision
import torchvision.transforms as T
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from PIL import Image
import numpy as np

# Data transforms
transform = T.Compose([
    T.Resize(256),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
])

# Load training data
train_dir = 'Data_example/training'
train_dataset = ImageFolder(train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Load pretrained model
model = torchvision.models.resnet50(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, len(train_dataset.classes))  # Replace classifier
model = model.cuda()

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss, correct, total = 0.0, 0, 0

    for images, labels in train_loader:
        images, labels = images.cuda(), labels.cuda()

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss/len(train_loader):.4f} | Accuracy: {100 * correct / total:.2f}%")

# Save model
torch.save(model.state_dict(), 'resnet50_finetuned.pth')
print("✅ Fine-tuned model saved as 'resnet50_finetuned.pth'")

# Load fine-tuned ResNet50
model = torchvision.models.resnet50(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 3)  # same structure as during training
model.load_state_dict(torch.load('resnet50_finetuned.pth'))  # load weights
model.fc = torch.nn.Identity()  # remove classification head after loading
model.eval().cuda()


# Image transforms
transform = T.Compose([
    T.Resize(256),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
])

# Dataset for inference
class ImageFolderDataset(Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        img_path = self.paths[idx]
        image = Image.open(img_path).convert('RGB')
        return self.transform(image), img_path

# Get all image paths
def get_image_paths(folder):
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.jpg')]

gallery_folder = 'Data_example/test/gallery'
query_folder   = 'Data_example/test/query'

gallery_paths = get_image_paths(gallery_folder)
query_paths = get_image_paths(query_folder)

# DataLoaders
batch_size = 32
gallery_loader = DataLoader(ImageFolderDataset(gallery_paths, transform), batch_size=batch_size, shuffle=False)
query_loader = DataLoader(ImageFolderDataset(query_paths, transform), batch_size=batch_size, shuffle=False)

# Feature extraction
@torch.no_grad()
def extract_features(loader):
    feats, paths = [], []
    for imgs, pths in loader:
        imgs = imgs.cuda()
        out = model(imgs).squeeze(-1).squeeze(-1)  # Shape: (B, 2048)
        feats.append(out.cpu().numpy())
        paths.extend(pths)
    return np.vstack(feats), paths

gallery_feats, gallery_paths = extract_features(gallery_loader)
query_feats, query_paths = extract_features(query_loader)

# Normalize features
gallery_feats = gallery_feats / np.linalg.norm(gallery_feats, axis=1, keepdims=True)
query_feats = query_feats / np.linalg.norm(query_feats, axis=1, keepdims=True)

# Retrieve top-k gallery images per query
k = 5  # You can adjust this
results = []
for q_feat, q_path in zip(query_feats, query_paths):
    sims = gallery_feats.dot(q_feat)
    topk_idx = np.argsort(-sims)[:k]
    topk_paths = [gallery_paths[i] for i in topk_idx]
    results.append({
        'filename': q_path,
        'gallery_images': topk_paths
    })

# Save results
with open('submission2.json', 'w') as f:
    json.dump(results, f, indent=2)

print('✅ Saved retrieval results to submission.json')

In [None]:
# Clip - modello 3

#pip install torch torchvision ftfy regex tqdm
#pip install git+https://github.com/openai/CLIP.git

import os
import torch
import clip
from PIL import Image
import numpy as np
import json
from tqdm import tqdm

# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Function to load and preprocess images
def load_images_from_folder(folder):
    image_paths = []
    images = []
    for filename in os.listdir(folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            path = os.path.join(folder, filename)
            try:
                image = preprocess(Image.open(path).convert("RGB"))
                images.append(image)
                image_paths.append(path)
            except Exception as e:
                print(f"Error loading image {path}: {e}")
    return image_paths, images

# Load gallery images
gallery_folder = "Data_example/test/gallery"
gallery_paths, gallery_images = load_images_from_folder(gallery_folder)

# Compute gallery features
gallery_images_tensor = torch.stack(gallery_images).to(device)
with torch.no_grad():
    gallery_features = model.encode_image(gallery_images_tensor)
    gallery_features /= gallery_features.norm(dim=-1, keepdim=True)

# Load query images
query_folder = "Data_example/test/query"
query_paths, query_images = load_images_from_folder(query_folder)

# Compute query features
query_images_tensor = torch.stack(query_images).to(device)
with torch.no_grad():
    query_features = model.encode_image(query_images_tensor)
    query_features /= query_features.norm(dim=-1, keepdim=True)

# Compute similarity and retrieve top-k matches
top_k = 5
results = []
for i, query_feature in enumerate(query_features):
    similarities = (gallery_features @ query_feature.unsqueeze(1)).squeeze(1)
    top_k_indices = similarities.topk(top_k).indices
    top_k_paths = [gallery_paths[idx] for idx in top_k_indices]
    results.append({
        "filename": query_paths[i],
        "gallery_images": top_k_paths
    })

# Save results to JSON
with open("submission3.json", "w") as f:
    json.dump(results, f, indent=2)

print("✅ Retrieval results saved to submission.json")

In [None]:
# HuggingFace - modello 4 
import os
import json
import torch
import faiss
import numpy as np
from PIL import Image
from torchvision import transforms
from transformers import ViTFeatureExtractor, ViTModel

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load pre-trained model and feature extractor
model_name = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
model = ViTModel.from_pretrained(model_name).to(device)
model.eval()

# Define image preprocessing
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

# Load and preprocess images
def load_images_from_folder(folder_path):
    image_paths = []
    images = []
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            path = os.path.join(folder_path, filename)
            try:
                image = Image.open(path).convert('RGB')
                image = preprocess(image)
                images.append(image)
                image_paths.append(path)
            except Exception as e:
                print(f"Error loading image {path}: {e}")
    return image_paths, images

gallery_folder = "Data_example/test/gallery"
query_folder   = "Data_example/test/query"

gallery_paths, gallery_images = load_images_from_folder(gallery_folder)
query_paths, query_images     = load_images_from_folder(query_folder)

# Extract embeddings
def extract_embeddings(images):
    embeddings = []
    with torch.no_grad():
        for image in images:
            image = image.unsqueeze(0).to(device)
            outputs = model(image)
            embedding = outputs.last_hidden_state[:, 0, :].squeeze().cpu().numpy()
            embeddings.append(embedding)
    return np.array(embeddings)

gallery_embeddings = extract_embeddings(gallery_images)
query_embeddings   = extract_embeddings(query_images)

# Normalize
gallery_embeddings = gallery_embeddings / np.linalg.norm(gallery_embeddings, axis=1, keepdims=True)
query_embeddings   = query_embeddings / np.linalg.norm(query_embeddings, axis=1, keepdims=True)

# FAISS index
dimension = gallery_embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(gallery_embeddings)

# Similarity search
k = 5
distances, indices = index.search(query_embeddings, k)

# Prepare submission format
results = []
for i, query_path in enumerate(query_paths):
    top_k_paths = [gallery_paths[idx] for idx in indices[i]]
    results.append({
        "filename": query_path,
        "gallery_images": top_k_paths
    })

# Save to JSON
with open("submission4.json", "w") as f:
    json.dump(results, f, indent=2)

print("✅ Saved submission JSON to 'submission_vit_faiss.json'")

In [None]:
# Triplet Network - modello 5
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms, datasets
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import random

# Triplet dataset
class TripletImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.dataset = datasets.ImageFolder(root=root_dir)
        self.classes = self.dataset.classes
        self.class_to_idx = self.dataset.class_to_idx
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        # Build index for each class
        self.class_index = {}
        for idx, (path, label) in enumerate(self.dataset.samples):
            if label not in self.class_index:
                self.class_index[label] = []
            self.class_index[label].append(path)

    def __getitem__(self, index):
        anchor_path, anchor_label = self.dataset.samples[index]
        positive_path = random.choice(self.class_index[anchor_label])

        negative_label = random.choice([l for l in self.class_index if l != anchor_label])
        negative_path = random.choice(self.class_index[negative_label])

        anchor = self.transform(Image.open(anchor_path).convert('RGB'))
        positive = self.transform(Image.open(positive_path).convert('RGB'))
        negative = self.transform(Image.open(negative_path).convert('RGB'))

        return anchor, positive, negative

    def __len__(self):
        return len(self.dataset)

# Triplet network using ResNet backbone
class EmbeddingNet(nn.Module):
    def __init__(self):
        super(EmbeddingNet, self).__init__()
        base_model = models.resnet18(pretrained=True)
        modules = list(base_model.children())[:-1]  # Remove fc layer
        self.feature_extractor = nn.Sequential(*modules)
        self.fc = nn.Linear(base_model.fc.in_features, 128)

    def forward(self, x):
        x = self.feature_extractor(x).view(x.size(0), -1)
        x = self.fc(x)
        return F.normalize(x, p=2, dim=1)

class TripletNet(nn.Module):
    def __init__(self, embedding_net):
        super(TripletNet, self).__init__()
        self.embedding_net = embedding_net

    def forward(self, anchor, positive, negative):
        anchor_emb = self.embedding_net(anchor)
        positive_emb = self.embedding_net(positive)
        negative_emb = self.embedding_net(negative)
        return anchor_emb, positive_emb, negative_emb

# Training loop for TripletNet
root_dir = 'Data_example/training'
batch_size = 32
num_epochs = 10
learning_rate = 1e-4

train_dataset = TripletImageDataset(root_dir)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = TripletNet(EmbeddingNet()).cuda()
criterion = nn.TripletMarginLoss(margin=1.0)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model.train()
for epoch in range(num_epochs):
    total_loss = 0.0
    for anchor, positive, negative in train_loader:
        anchor, positive, negative = anchor.cuda(), positive.cuda(), negative.cuda()
        optimizer.zero_grad()
        anchor_out, positive_out, negative_out = model(anchor, positive, negative)
        loss = criterion(anchor_out, positive_out, negative_out)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}")

# Save trained model
torch.save(model.embedding_net.state_dict(), 'triplet_embedding_model.pth')
print("✅ Triplet model trained and saved as 'triplet_embedding_model.pth'")

import os
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np

# Device (CPU only)
device = torch.device('cpu')

# Embedding network (must match the training definition)
class EmbeddingNet(nn.Module):
    def __init__(self):
        super(EmbeddingNet, self).__init__()
        base_model = models.resnet18(pretrained=False)
        modules = list(base_model.children())[:-1]  # remove fc
        self.feature_extractor = nn.Sequential(*modules)
        self.fc = nn.Linear(base_model.fc.in_features, 128)

    def forward(self, x):
        x = self.feature_extractor(x).view(x.size(0), -1)
        x = self.fc(x)
        return F.normalize(x, p=2, dim=1)

# Load trained model
model = EmbeddingNet().to(device)
model.load_state_dict(torch.load('triplet_embedding_model.pth', map_location=device))
model.eval()

# Image transform (must match training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Dataset class
class ImageFolderDataset(Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        image = Image.open(path).convert('RGB')
        return self.transform(image), path

# Collect all image paths
def get_image_paths(folder):
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.jpg')]

# Paths
gallery_folder = 'Data_example/test/gallery'
query_folder   = 'Data_example/test/query'

gallery_paths = get_image_paths(gallery_folder)
query_paths   = get_image_paths(query_folder)

# Dataloaders
batch_size = 32
gallery_loader = DataLoader(ImageFolderDataset(gallery_paths, transform), batch_size=batch_size, shuffle=False)
query_loader   = DataLoader(ImageFolderDataset(query_paths, transform),   batch_size=batch_size, shuffle=False)

# Feature extraction
@torch.no_grad()
def extract_embeddings(loader):
    feats, paths = [], []
    for images, img_paths in loader:
        images = images.to(device)
        out = model(images)
        feats.append(out.cpu().numpy())
        paths.extend(img_paths)
    return np.vstack(feats), paths

# Extract features
gallery_feats, gallery_paths = extract_embeddings(gallery_loader)
query_feats, query_paths     = extract_embeddings(query_loader)

# Normalize (just in case, though model already outputs normalized embeddings)
gallery_feats = gallery_feats / np.linalg.norm(gallery_feats, axis=1, keepdims=True)
query_feats   = query_feats / np.linalg.norm(query_feats,   axis=1, keepdims=True)

# Retrieve top-k similar images
k = 5
results = []
for q_feat, q_path in zip(query_feats, query_paths):
    sims = gallery_feats @ q_feat  # cosine similarity
    topk_idx = np.argsort(-sims)[:k]
    topk_paths = [gallery_paths[i] for i in topk_idx]
    results.append({
        "filename": q_path,
        "gallery_images": topk_paths
    })

# Save results
with open('submission5.json', 'w') as f:
    json.dump(results, f, indent=2)

print("✅ Retrieval results saved to 'submission_triplet.json'")

In [None]:
# Arcface - modello 6
import os
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import numpy as np
from sklearn.preprocessing import normalize

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Paths
train_dir = 'Data_example/training'
gallery_dir = 'Data_example/test/gallery'
query_dir = 'Data_example/test/query'

# Parameters
batch_size = 32
embedding_dim = 128
num_epochs = 10
learning_rate = 1e-4
k = 5  # top-k

# Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Dataset + Loader
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
num_classes = len(train_dataset.classes)

# ArcFace head
class ArcFaceHead(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.50):
        super(ArcFaceHead, self).__init__()
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        theta = torch.acos(torch.clamp(cosine, -1.0 + 1e-7, 1.0 - 1e-7))
        target_logit = torch.cos(theta + self.m)
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1), 1.0)
        output = cosine * (1 - one_hot) + target_logit * one_hot
        return self.s * output

# EmbeddingNet
class EmbeddingNet(nn.Module):
    def __init__(self, out_dim=embedding_dim):
        super(EmbeddingNet, self).__init__()
        base_model = models.resnet18(pretrained=True)
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])
        self.embedding = nn.Linear(base_model.fc.in_features, out_dim)

    def forward(self, x):
        x = self.backbone(x).view(x.size(0), -1)
        return F.normalize(self.embedding(x))

# Model + Loss + Optimizer
model = EmbeddingNet().to(device)
arcface_head = ArcFaceHead(embedding_dim, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(list(model.parameters()) + list(arcface_head.parameters()), lr=learning_rate)

# Training loop
best_loss = float('inf')
best_model_state = None

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        embeddings = model(images)
        logits = arcface_head(embeddings, labels)
        loss = criterion(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

    # Save best model
    if avg_loss < best_loss:
        best_loss = avg_loss
        best_model_state = model.state_dict().copy()

# Save best model
torch.save(best_model_state, 'arcface_embedding_model.pth')

# Dataset for retrieval
class ImageFolderPaths(Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        image = Image.open(path).convert("RGB")
        return self.transform(image), path

def get_image_paths(folder):
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".jpg")]

def extract_embeddings(model, loader):
    model.eval()
    features, paths = [], []
    with torch.no_grad():
        for imgs, pths in loader:
            imgs = imgs.to(device)
            out = model(imgs).cpu().numpy()
            features.append(out)
            paths.extend(pths)
    return np.vstack(features), paths

# Reload model
model = EmbeddingNet().to(device)
model.load_state_dict(torch.load('arcface_embedding_model.pth', map_location=device))
model.eval()

# Extract gallery/query embeddings
gallery_paths = get_image_paths(gallery_dir)
query_paths = get_image_paths(query_dir)

gallery_loader = DataLoader(ImageFolderPaths(gallery_paths, transform), batch_size=batch_size, shuffle=False)
query_loader = DataLoader(ImageFolderPaths(query_paths, transform), batch_size=batch_size, shuffle=False)

gallery_feats, gallery_paths = extract_embeddings(model, gallery_loader)
query_feats, query_paths = extract_embeddings(model, query_loader)

# Normalize embeddings
gallery_feats = normalize(gallery_feats, axis=1)
query_feats = normalize(query_feats, axis=1)

# Compute cosine similarities and save submission
results = []
for q_feat, q_path in zip(query_feats, query_paths):
    sims = gallery_feats @ q_feat
    topk_idx = np.argsort(-sims)[:k]
    topk_paths = [gallery_paths[i] for i in topk_idx]
    results.append({
        'filename': q_path,
        'gallery_images': topk_paths
    })

with open('submission6.json', 'w') as f:
    json.dump(results, f, indent=2)

print("✅ Saved submission_arcface.json")

In [None]:
# Clip fine-tuned - modello 7
import os
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
from PIL import Image
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from sklearn.preprocessing import normalize

# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Parameters
train_dir = 'Data_example/training'
gallery_dir = 'Data_example/test/gallery'
query_dir = 'Data_example/test/query'
batch_size = 32
epochs = 10
learning_rate = 1e-5
top_k = 5

# Load CLIP
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Custom dataset for image-text pairs
class ImageTextDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.dataset = datasets.ImageFolder(root=root_dir)
        self.label_to_text = {v: k for k, v in self.dataset.class_to_idx.items()}
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.48145466, 0.4578275, 0.40821073],
                                 [0.26862954, 0.26130258, 0.27577711])
        ])

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        text = self.label_to_text[label].replace('_', ' ')
        image = self.transform(image)
        return image, text

train_dataset = ImageTextDataset(train_dir)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

# Fine-tuning loop
model.train()
for epoch in range(epochs):
    total_loss = 0
    for images, texts in train_loader:
        inputs = processor(text=texts, return_tensors="pt", padding=True, truncation=True).to(device)
        inputs["pixel_values"] = images.to(device)

        outputs = model(**inputs)
        logits_per_image = outputs.logits_per_image
        labels = torch.arange(len(images), device=device)
        loss = nn.CrossEntropyLoss()(logits_per_image, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

# Save fine-tuned model
model.save_pretrained("clip_finetuned")
processor.save_pretrained("clip_finetuned")

# Dataset for inference
class ImageFolderPaths(Dataset):
    def __init__(self, paths, transform=None):
        self.paths = paths
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.48145466, 0.4578275, 0.40821073],
                                 [0.26862954, 0.26130258, 0.27577711])
        ])

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        image = Image.open(path).convert("RGB")
        return self.transform(image), path

def get_image_paths(folder):
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".jpg")]

def extract_image_embeddings(paths):
    dataset = ImageFolderPaths(paths)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    features, all_paths = [], []

    model.eval()
    with torch.no_grad():
        for images, img_paths in loader:
            inputs = {"pixel_values": images.to(device)}
            image_features = model.get_image_features(**inputs)
            image_features = image_features.cpu().numpy()
            features.append(image_features)
            all_paths.extend(img_paths)
    return np.vstack(features), all_paths

# Load fine-tuned model for inference
model = CLIPModel.from_pretrained("clip_finetuned").to(device)
processor = CLIPProcessor.from_pretrained("clip_finetuned")

# Extract gallery and query features
gallery_paths = get_image_paths(gallery_dir)
query_paths = get_image_paths(query_dir)

gallery_feats, gallery_paths = extract_image_embeddings(gallery_paths)
query_feats, query_paths = extract_image_embeddings(query_paths)

# Normalize features
gallery_feats = normalize(gallery_feats, axis=1)
query_feats = normalize(query_feats, axis=1)

# Similarity and submission
results = []
for q_feat, q_path in zip(query_feats, query_paths):
    sims = gallery_feats @ q_feat
    topk_idx = np.argsort(-sims)[:top_k]
    topk_paths = [gallery_paths[i] for i in topk_idx]
    results.append({
        "filename": q_path,
        "gallery_images": topk_paths
    })

with open("submission7.json", "w") as f:
    json.dump(results, f, indent=2)

print("✅ Saved submission_clip.json")

In [None]:
# SimCLR - modello 8
import os
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import random
import numpy as np
from sklearn.preprocessing import normalize

# Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dir = 'Data_example/training'
gallery_dir = 'Data_example/test/gallery'
query_dir = 'Data_example/test/query'
batch_size = 64
epochs = 10
learning_rate = 1e-3
embedding_dim = 128
top_k = 5

# Augmentation for contrastive learning
simclr_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.8, 0.8, 0.8, 0.2),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# SimCLR Dataset
class SimCLRDataset(Dataset):
    def __init__(self, root_dir, transform):
        self.dataset = datasets.ImageFolder(root=root_dir)
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, _ = self.dataset[idx]
        return self.transform(img), self.transform(img)

# Projection head
class ProjectionHead(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, in_dim),
            nn.ReLU(),
            nn.Linear(in_dim, out_dim)
        )

    def forward(self, x):
        return self.net(x)

# SimCLR model
class SimCLR(nn.Module):
    def __init__(self, base_model, projection_dim):
        super().__init__()
        self.backbone = nn.Sequential(*list(base_model.children())[:-1])
        self.projection_head = ProjectionHead(base_model.fc.in_features, projection_dim)

    def forward(self, x):
        x = self.backbone(x).squeeze()
        x = self.projection_head(x)
        return F.normalize(x, dim=-1)

# NT-Xent Loss
def nt_xent_loss(z1, z2, temperature=0.5):
    N = z1.size(0)
    z = torch.cat([z1, z2], dim=0)
    sim = F.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0), dim=2)
    sim = sim / temperature

    labels = torch.arange(N, device=device)
    labels = torch.cat([labels + N, labels])

    mask = torch.eye(2 * N, dtype=torch.bool).to(device)
    sim = sim.masked_fill(mask, -9e15)

    targets = labels
    loss = F.cross_entropy(sim, targets)
    return loss

# Dataset and model
train_dataset = SimCLRDataset(train_dir, simclr_transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

resnet = models.resnet18(pretrained=True)
model = SimCLR(resnet, embedding_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training
model.train()
for epoch in range(epochs):
    total_loss = 0
    for x1, x2 in train_loader:
        x1, x2 = x1.to(device), x2.to(device)
        z1 = model(x1)
        z2 = model(x2)
        loss = nt_xent_loss(z1, z2)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

# Save model
torch.save(model.state_dict(), "simclr_embedding_model.pth")

# Inference dataset
class ImageFolderPaths(Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        img = Image.open(path).convert("RGB")
        return self.transform(img), path

# Simple transform for test
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

def get_image_paths(folder):
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.jpg')]

def extract_embeddings(model, image_paths):
    dataset = ImageFolderPaths(image_paths, test_transform)
    loader = DataLoader(dataset, batch_size=batch_size)
    model.eval()
    features, paths = [], []
    with torch.no_grad():
        for images, pths in loader:
            images = images.to(device)
            emb = model(images).cpu().numpy()
            features.append(emb)
            paths.extend(pths)
    return np.vstack(features), paths

# Load model for inference
model = SimCLR(resnet, embedding_dim).to(device)
model.load_state_dict(torch.load("simclr_embedding_model.pth", map_location=device))
model.eval()

gallery_paths = get_image_paths(gallery_dir)
query_paths = get_image_paths(query_dir)

gallery_feats, gallery_paths = extract_embeddings(model, gallery_paths)
query_feats, query_paths = extract_embeddings(model, query_paths)

gallery_feats = normalize(gallery_feats, axis=1)
query_feats = normalize(query_feats, axis=1)

# Similarity + JSON output
results = []
for q_feat, q_path in zip(query_feats, query_paths):
    sims = gallery_feats @ q_feat
    topk_idx = np.argsort(-sims)[:top_k]
    topk_paths = [gallery_paths[i] for i in topk_idx]
    results.append({
        "filename": q_path,
        "gallery_images": topk_paths
    })

with open("submission8.json", "w") as f:
    json.dump(results, f, indent=2)

print("✅ Saved submission_simclr.json")

In [None]:
# FAISS - modello 9 
import os
import json
from PIL import Image
import torch
import torchvision.transforms as transforms
import torchvision.models as models
import faiss
import numpy as np

# Configurazione
gallery_folder = 'Data_example/test/gallery'
query_folder   = 'Data_example/test/query'
top_k = 3

# Carica il modello pre-addestrato
model = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])  # Rimuove l'ultima classificazione
model.eval()

# Trasformazioni per le immagini
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Funzione per estrarre le feature
def extract_features(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)
    with torch.no_grad():
        feature = model(image)
    return feature.squeeze().numpy()

# Estrai feature per la galleria
gallery_paths = [os.path.join(gallery_folder, fname) for fname in os.listdir(gallery_folder)]
gallery_features = np.array([extract_features(p) for p in gallery_paths]).astype('float32')

# Costruisci l’indice FAISS
index = faiss.IndexFlatL2(gallery_features.shape[1])
index.add(gallery_features)

# Estrai feature per ogni immagine query e trova le immagini più simili
results = []
query_paths = [os.path.join(query_folder, fname) for fname in os.listdir(query_folder)]

for q_path in query_paths:
    q_feat = extract_features(q_path).astype('float32').reshape(1, -1)
    distances, indices = index.search(q_feat, top_k)
    similar_images = [gallery_paths[i] for i in indices[0]]
    results.append({
        "filename": q_path,
        "gallery_images": similar_images
    })


# Salva o stampa il risultato in formato JSON
with open('submission2.json', 'w') as f:
    json.dump(results, f, indent=2)

print('submission ok')

In [None]:
# Siamese fine-tuned- modello 10
from sklearn.metrics.pairwise import cosine_similarity
import json
import os
import random
from PIL import Image
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

# ================================
# CONFIG
# ================================
training_dir = 'Data_example/training'
batch_size = 8
num_epochs = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ================================
# DATASET: Siamese con coppie auto-generate
# ================================
class SiameseDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])
        self.root_dir = root_dir

        self.class_to_imgs = {}
        for cls in os.listdir(root_dir):
            class_path = os.path.join(root_dir, cls)
            if os.path.isdir(class_path):
                self.class_to_imgs[cls] = [
                    os.path.join(class_path, f) for f in os.listdir(class_path)
                    if f.lower().endswith(('.jpg', '.jpeg', '.png'))
                ]

        self.classes = list(self.class_to_imgs.keys())
        self.pairs = self._generate_pairs()

    def _generate_pairs(self, num_pairs=1000):
        pairs = []

        for _ in range(num_pairs):
            # Positiva
            cls = random.choice(self.classes)
            imgs = random.sample(self.class_to_imgs[cls], 2)
            pairs.append((imgs[0], imgs[1], 1))

            # Negativa
            cls1, cls2 = random.sample(self.classes, 2)
            img1 = random.choice(self.class_to_imgs[cls1])
            img2 = random.choice(self.class_to_imgs[cls2])
            pairs.append((img1, img2, 0))

        return pairs

    def __getitem__(self, index):
        img1_path, img2_path, label = self.pairs[index]
        img1 = Image.open(img1_path).convert('RGB')
        img2 = Image.open(img2_path).convert('RGB')
        return self.transform(img1), self.transform(img2), torch.tensor(label, dtype=torch.float32)

    def __len__(self):
        return len(self.pairs)

# ================================
# MODEL: Siamese Network
# ================================
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.backbone = models.resnet18(pretrained=True)
        self.backbone.fc = nn.Identity()  # Rimuove classificazione finale
        self.embedding_dim = 512

        self.fc = nn.Sequential(
            nn.Linear(self.embedding_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128)
        )

    def forward_once(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x

    def forward(self, x1, x2):
        return self.forward_once(x1), self.forward_once(x2)

# ================================
# LOSS: Contrastive
# ================================
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, out1, out2, label):
        euclidean = nn.functional.pairwise_distance(out1, out2)
        loss = torch.mean((1 - label) * torch.pow(euclidean, 2) +
                          label * torch.pow(torch.clamp(self.margin - euclidean, min=0.0), 2))
        return loss

# ================================
# TRAINING
# ================================
dataset = SiameseDataset(training_dir)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = SiameseNetwork().to(device)
criterion = ContrastiveLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

print("🚀 Inizio training...")
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for img1, img2, labels in dataloader:
        img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)

        out1, out2 = model(img1, img2)
        loss = criterion(out1, out2, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {total_loss / len(dataloader):.4f}")
print("✅ Fine allenamento.")


# Cartelle
gallery_dir = 'Data_example/test/gallery'
query_dir   = 'Data_example/test/query'

# Trasformazioni da usare anche qui
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

def extract_embedding(image_path, model):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)
    model.eval()
    with torch.no_grad():
        embedding = model.forward_once(image)
    return embedding.cpu().numpy().flatten()

# Estrai feature galleria
gallery_paths = [os.path.join(gallery_dir, f) for f in os.listdir(gallery_dir)]
gallery_embeddings = [extract_embedding(p, model) for p in gallery_paths]

# Estrai feature query
query_paths = [os.path.join(query_dir, f) for f in os.listdir(query_dir)]

results = []
top_k = 3  # Numero di immagini simili da restituire

for q_path in query_paths:
    q_emb = extract_embedding(q_path, model).reshape(1, -1)
    sims = cosine_similarity(q_emb, gallery_embeddings)[0]
    top_indices = sims.argsort()[::-1][:top_k]
    similar_images = [gallery_paths[i] for i in top_indices]
    results.append({
        "filename": q_path,
        "gallery_images": similar_images
    })

# Salva in JSON
     # Salva o stampa il risultato in formato JSON
    with open('submission3.json', 'w') as f:
        json.dump(results, f, indent=2)

    print('submission ok')


In [None]:
# Siamese double fine-tuned - modello 11
import os
import random
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.metrics.pairwise import cosine_similarity
import json

# ================================
# CONFIGURAZIONE
# ================================
class Config:
    # Directory paths
    gallery_dir = 'Data_example/test/gallery'
    query_dir = 'Data_example/test/query'
    model_path = 'best_siamese_model.pth'
    output_json = 'submission7.json'
    
    # Training parameters - MODIFICA QUI I PARAMETRI DI TRAINING
    batch_size = 16
    num_epochs = 5          # ← CAMBIA QUI IL NUMERO DI EPOCH
    learning_rate = 1e-4    # ← CAMBIA QUI IL LEARNING RATE
    embedding_dim = 256     # ← CAMBIA QUI LA DIMENSIONE DELL'EMBEDDING
    margin = 2.0           # ← CAMBIA QUI IL MARGIN PER LA CONTRASTIVE LOSS
    num_pairs = 2000       # ← CAMBIA QUI IL NUMERO DI COPPIE PER TRAINING
    top_k = 5              # ← CAMBIA QUI IL NUMERO DI RISULTATI TOP-K
    
    # Model parameters
    dropout_rate = 0.3     # ← CAMBIA QUI IL DROPOUT RATE
    hidden_dim = 512       # ← CAMBIA QUI LA DIMENSIONE HIDDEN LAYER
    
    # Device configuration
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Data augmentation and preprocessing
    use_data_augmentation = True  # ← ATTIVA/DISATTIVA DATA AUGMENTATION
    
    # Trasformazioni per training (con data augmentation)
    train_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Trasformazioni per test (senza augmentation)
    test_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    @classmethod
    def print_config(cls):
        """Stampa la configurazione corrente"""
        print("🔧 CONFIGURAZIONE CORRENTE")
        print("=" * 50)
        print(f"📁 Gallery directory: {cls.gallery_dir}")
        print(f"📁 Query directory: {cls.query_dir}")
        print(f"💾 Model path: {cls.model_path}")
        print(f"📄 Output JSON: {cls.output_json}")
        print("\n🎯 PARAMETRI DI TRAINING:")
        print(f"   Batch size: {cls.batch_size}")
        print(f"   Num epochs: {cls.num_epochs}")
        print(f"   Learning rate: {cls.learning_rate}")
        print(f"   Embedding dim: {cls.embedding_dim}")
        print(f"   Margin: {cls.margin}")
        print(f"   Num pairs: {cls.num_pairs}")
        print(f"   Top-K: {cls.top_k}")
        print(f"   Dropout rate: {cls.dropout_rate}")
        print(f"   Hidden dim: {cls.hidden_dim}")
        print(f"   Data augmentation: {cls.use_data_augmentation}")
        print(f"   Device: {cls.device}")
        print("=" * 50)
    
    @classmethod
    def update_training_params(cls, **kwargs):
        """Aggiorna i parametri di training dinamicamente"""
        updated = []
        for key, value in kwargs.items():
            if hasattr(cls, key):
                old_value = getattr(cls, key)
                setattr(cls, key, value)
                updated.append(f"{key}: {old_value} → {value}")
            else:
                print(f"⚠️  Parametro sconosciuto: {key}")
        
        if updated:
            print("✅ Parametri aggiornati:")
            for update in updated:
                print(f"   {update}")

# ================================
# MODELLO SIAMESE
# ================================
class SiameseNetwork(nn.Module):
    def __init__(self, embedding_dim=None, dropout_rate=None, hidden_dim=None):
        super(SiameseNetwork, self).__init__()
        
        # Usa i parametri dalla Config se non specificati
        if embedding_dim is None:
            embedding_dim = Config.embedding_dim
        if dropout_rate is None:
            dropout_rate = Config.dropout_rate
        if hidden_dim is None:
            hidden_dim = Config.hidden_dim
        
        # Backbone: ResNet18 pre-addestrato
        self.backbone = models.resnet18(pretrained=True)
        
        # Rimuovi l'ultimo layer di classificazione
        num_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        
        # Embedding layers con parametri configurabili
        self.embedding = nn.Sequential(
            nn.Linear(num_features, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, embedding_dim),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.7),  # Dropout più leggero nel secondo layer
            nn.Linear(embedding_dim, embedding_dim)
        )
        
        print(f"🏗️  Modello creato con:")
        print(f"   Embedding dim: {embedding_dim}")
        print(f"   Hidden dim: {hidden_dim}")
        print(f"   Dropout rate: {dropout_rate}")
    
    def forward_once(self, x):
        features = self.backbone(x)
        embedding = self.embedding(features)
        # Normalizza l'embedding per stabilità
        embedding = F.normalize(embedding, p=2, dim=1)
        return embedding
    
    def forward(self, x1, x2):
        emb1 = self.forward_once(x1)
        emb2 = self.forward_once(x2)
        return emb1, emb2

# ================================
# CONTRASTIVE LOSS
# ================================
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=None):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin if margin is not None else Config.margin
        print(f"📊 Contrastive Loss creata con margin: {self.margin}")
    
    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2, keepdim=True)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                    (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss_contrastive

# ================================
# FUNZIONI DI UTILITÀ
# ================================
def load_model():
    """Carica il modello addestrato con parametri dalla Config"""
    model = SiameseNetwork().to(Config.device)
    
    if os.path.exists(Config.model_path):
        model.load_state_dict(torch.load(Config.model_path, map_location=Config.device))
        print(f"✅ Modello caricato da {Config.model_path}")
    else:
        print(f"⚠️  Modello non trovato in {Config.model_path}. Usando modello non addestrato.")
    
    model.eval()
    return model

def extract_embedding(image_path, model):
    """Estrae embedding da una singola immagine"""
    try:
        image = Image.open(image_path).convert('RGB')
        image = Config.test_transform(image).unsqueeze(0).to(Config.device)
        
        with torch.no_grad():
            embedding = model.forward_once(image)
        
        return embedding.cpu().numpy().flatten()
    except Exception as e:
        print(f"❌ Errore nell'estrazione embedding da {image_path}: {e}")
        return None

def get_image_paths(directory):
    """Ottiene tutti i percorsi delle immagini in una directory"""
    if not os.path.exists(directory):
        print(f"❌ Directory non trovata: {directory}")
        return []
    
    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp')
    image_paths = []
    
    for filename in os.listdir(directory):
        if filename.lower().endswith(valid_extensions):
            image_paths.append(os.path.join(directory, filename))
    
    return sorted(image_paths)

def generate_similarity_json():
    """Genera il file JSON con i risultati di similarità"""
    print("🔍 Generazione JSON per similarità immagini...")
    print("=" * 50)
    
    # Stampa configurazione corrente
    Config.print_config()
    
    # Carica il modello
    model = load_model()
    
    # Ottieni percorsi immagini
    gallery_paths = get_image_paths(Config.gallery_dir)
    query_paths = get_image_paths(Config.query_dir)
    
    if not gallery_paths:
        print(f"❌ Nessuna immagine trovata in {Config.gallery_dir}")
        return
    
    if not query_paths:
        print(f"❌ Nessuna immagine trovata in {Config.query_dir}")
        return
    
    print(f"📁 Gallery: {len(gallery_paths)} immagini")
    print(f"📁 Query: {len(query_paths)} immagini")
    
    # Estrai embedding per tutte le immagini gallery
    print("\n🧮 Estrazione embedding gallery...")
    gallery_embeddings = []
    valid_gallery_paths = []
    
    for i, path in enumerate(gallery_paths):
        if i % 50 == 0:
            print(f"Processando gallery {i+1}/{len(gallery_paths)}")
        
        emb = extract_embedding(path, model)
        if emb is not None:
            gallery_embeddings.append(emb)
            valid_gallery_paths.append(path)
    
    if not gallery_embeddings:
        print("❌ Nessun embedding valido estratto dalla gallery")
        return
    
    gallery_embeddings = np.array(gallery_embeddings)
    print(f"✅ Estratti {len(gallery_embeddings)} embedding dalla gallery")
    
    # Processa ogni query e trova le immagini più simili
    print(f"\n🔍 Ricerca top-{Config.top_k} immagini simili per ogni query...")
    results = []
    
    for i, query_path in enumerate(query_paths):
        if i % 10 == 0:
            print(f"Processando query {i+1}/{len(query_paths)}")
        
        # Estrai embedding della query
        query_emb = extract_embedding(query_path, model)
        if query_emb is None:
            print(f"⚠️  Saltando {query_path} (errore estrazione embedding)")
            continue
        
        # Calcola similarità con tutte le immagini gallery
        query_emb = query_emb.reshape(1, -1)
        similarities = cosine_similarity(query_emb, gallery_embeddings)[0]
        
        # Trova le top-k immagini più simili
        top_indices = similarities.argsort()[::-1][:Config.top_k]
        similar_images = [valid_gallery_paths[idx] for idx in top_indices]
        
        # Formato semplice richiesto: filename + gallery_images
        result_entry = {
            "filename": query_path,
            "gallery_images": similar_images
        }
        results.append(result_entry)
        
        # Debug: mostra similarità per le prime query
        if i < 3:
            print(f"\n📊 Query: {os.path.basename(query_path)}")
            for j, idx in enumerate(top_indices):
                sim_score = similarities[idx]
                img_name = os.path.basename(valid_gallery_paths[idx])
                print(f"  {j+1}. {img_name} (similarità: {sim_score:.4f})")
            print(f"  → Formato finale: filename + {len(similar_images)} gallery_images")
    
    # Salva risultati in JSON
    print(f"\n💾 Salvataggio risultati in {Config.output_json}...")
    with open(Config.output_json, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"✅ JSON generato con successo!")
    print(f"📄 File: {Config.output_json}")
    print(f"📊 Query processate: {len(results)}")
    print(f"🎯 Top-{Config.top_k} immagini simili per ogni query")
    
    return results

def validate_json_format():
    """Valida il formato del JSON generato"""
    if not os.path.exists(Config.output_json):
        print(f"❌ File {Config.output_json} non trovato")
        return False
    
    try:
        with open(Config.output_json, 'r') as f:
            data = json.load(f)
        
        print(f"\n🔍 Validazione formato JSON...")
        print(f"📊 Numero di entries: {len(data)}")
        
        # Controlla il formato della prima entry
        if data:
            first_entry = data[0]
            required_keys = ['filename', 'gallery_images']
            
            for key in required_keys:
                if key not in first_entry:
                    print(f"❌ Chiave mancante: {key}")
                    return False
            
            print(f"✅ Formato corretto!")
            print(f"📝 Esempio prima entry:")
            print(f"   filename: {os.path.basename(first_entry['filename'])}")
            print(f"   gallery_images: {len(first_entry['gallery_images'])} immagini")
            print(f"   📄 Struttura: [{{\"filename\": \"...\", \"gallery_images\": [...]}}]")
            
            return True
        else:
            print("❌ JSON vuoto")
            return False
            
    except json.JSONDecodeError as e:
        print(f"❌ Errore formato JSON: {e}")
        return False

# ================================
# FUNZIONI PER MODIFICARE PARAMETRI
# ================================
def quick_config_update(**kwargs):
    """Funzione rapida per aggiornare la configurazione"""
    print("🔧 Aggiornamento configurazione...")
    Config.update_training_params(**kwargs)
    print("\n📋 Configurazione aggiornata:")
    Config.print_config()

def training_preset_light():
    """Preset per training leggero e veloce"""
    Config.update_training_params(
        batch_size=32,
        num_epochs=3,
        learning_rate=1e-3,
        embedding_dim=128,
        margin=1.5,
        num_pairs=1000,
        dropout_rate=0.2
    )
    print("🚀 Preset LIGHT applicato - Training veloce")

def training_preset_heavy():
    """Preset per training intensivo e accurato"""
    Config.update_training_params(
        batch_size=8,
        num_epochs=15,
        learning_rate=5e-5,
        embedding_dim=512,
        margin=3.0,
        num_pairs=5000,
        dropout_rate=0.4
    )
    print("🔥 Preset HEAVY applicato - Training intensivo")

def training_preset_balanced():
    """Preset bilanciato tra velocità e accuratezza"""
    Config.update_training_params(
        batch_size=16,
        num_epochs=8,
        learning_rate=1e-4,
        embedding_dim=256,
        margin=2.0,
        num_pairs=3000,
        dropout_rate=0.3
    )
    print("⚖️  Preset BALANCED applicato - Training bilanciato")

# ================================
# MAIN
# ================================
if __name__ == "__main__":
    print("🎯 Generatore JSON per Similarità Immagini")
    print("=" * 50)
    
    # Esempi di come modificare i parametri:
    
    # 1. Modifica singoli parametri
    # Config.update_training_params(num_epochs=10, learning_rate=5e-5)
    
    # 2. Modifica multipli parametri
    # quick_config_update(batch_size=32, num_epochs=8, embedding_dim=512)
    
    # 3. Usa preset predefiniti
    # training_preset_light()    # Per training veloce
    # training_preset_heavy()    # Per training intensivo  
    # training_preset_balanced() # Per training bilanciato
    
    # Genera il JSON
    results = generate_similarity_json()
    
    if results:
        # Valida il formato
        validate_json_format()
        
        print("\n🎉 Processo completato!")
        print(f"📁 File generato: {Config.output_json}")
    else:
        print("❌ Errore nella generazione del JSON")

# ================================
# FUNZIONE PER GENERARE DATI MOCK (se non hai il modello)
# ================================
def generate_mock_json():
    """Genera un JSON di esempio se non hai il modello addestrato"""
    print("🎭 Generazione JSON mock per test...")
    
    gallery_paths = get_image_paths(Config.gallery_dir)
    query_paths = get_image_paths(Config.query_dir)
    
    if not gallery_paths or not query_paths:
        print("❌ Directory gallery o query non trovate")
        return
    
    results = []
    for query_path in query_paths:
        # Seleziona immagini casuali dalla gallery (senza ripetizioni)
        similar_images = random.sample(gallery_paths, min(Config.top_k, len(gallery_paths)))
        
        # Formato richiesto: filename + gallery_images  
        results.append({
            "filename": query_path,
            "gallery_images": similar_images
        })
    
    # Salva JSON mock
    mock_filename = 'submission_mock.json'
    with open(mock_filename, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"✅ JSON mock generato: {mock_filename}")
    return results

# Decommenta per generare dati mock se non hai il modello
# generate_mock_json()

# ================================
# ESEMPI DI UTILIZZO
# ================================
"""
ESEMPI DI COME MODIFICARE I PARAMETRI:

1. Modifica parametri singoli:
   Config.batch_size = 32
   Config.num_epochs = 10
   Config.learning_rate = 5e-5

2. Modifica parametri multipli:
   Config.update_training_params(
       batch_size=32,
       num_epochs=10, 
       learning_rate=5e-5,
       embedding_dim=512
   )

3. Usa preset predefiniti:
   training_preset_light()     # Training veloce
   training_preset_heavy()     # Training intensivo
   training_preset_balanced()  # Training bilanciato

4. Modifica rapida:
   quick_config_update(num_epochs=15, batch_size=8)

5. Visualizza configurazione corrente:
   Config.print_config()
"""

In [None]:
# per visualizzare
import json
import matplotlib.pyplot as plt
from PIL import Image

# Load submission file
with open('submission7.json', 'r') as f:
    results = json.load(f)

# How many queries to show?
num_queries_to_show = 5
top_k = len(results[0]['gallery_images'])

# Plot
for i, item in enumerate(results[:num_queries_to_show]):
    query_img = Image.open(item['filename']).convert('RGB')

    fig, axs = plt.subplots(1, top_k + 1, figsize=(3 * (top_k + 1), 4))
    axs[0].imshow(query_img)
    axs[0].set_title("Query")
    axs[0].axis('off')

    for j, gallery_path in enumerate(item['gallery_images']):
        gallery_img = Image.open(gallery_path).convert('RGB')
        axs[j + 1].imshow(gallery_img)
        axs[j + 1].set_title(f"Top {j+1}")
        axs[j + 1].axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
import os
import json
import torch
import faiss
import numpy as np
from PIL import Image, UnidentifiedImageError
from torchvision import transforms, models
from tqdm import tqdm

# =========================
# CONFIGURAZIONE
# =========================
gallery_folder = 'test/gallery'
query_folder = 'test/query'
top_k = 10

# Percorsi per caching
features_file = 'gallery_features.npy'
paths_file = 'gallery_paths.txt'

# Usa GPU se disponibile
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# =========================
# MODELLO VELOCE: ResNet18
# =========================
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model = torch.nn.Sequential(*list(model.children())[:-1])  # Rimuove FC
model.to(device)
model.eval()

# =========================
# TRASFORMAZIONE
# =========================
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# =========================
# ESTRAZIONE FEATURE
# =========================
def extract_features(image_path):
    try:
        image = Image.open(image_path).convert('RGB')
    except UnidentifiedImageError:
        print(f"[ERRORE] Immagine non valida: {image_path}")
        return None
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        feature = model(image).squeeze().cpu().numpy()
    return feature

# =========================
# CARICA o CALCOLA GALLERIA
# =========================
if os.path.exists(features_file) and os.path.exists(paths_file):
    print("📂 Caricamento feature galleria salvate...")
    gallery_features = np.load(features_file)
    with open(paths_file, 'r') as f:
        valid_gallery_paths = [line.strip() for line in f.readlines()]
else:
    print("🧠 Estrazione feature dalla galleria...")
    gallery_paths = [os.path.join(gallery_folder, fname)
                     for fname in os.listdir(gallery_folder)
                     if fname.lower().endswith(('.jpg', '.jpeg', '.png'))]

    gallery_features = []
    valid_gallery_paths = []

    for p in tqdm(gallery_paths, desc="Estrai galleria"):
        feat = extract_features(p)
        if feat is not None:
            gallery_features.append(feat)
            valid_gallery_paths.append(p)

    gallery_features = np.array(gallery_features).astype('float32')
    np.save(features_file, gallery_features)
    with open(paths_file, 'w') as f:
        for path in valid_gallery_paths:
            f.write(path + '\n')

# =========================
# COSTRUISCI INDICE FAISS
# =========================
index = faiss.IndexFlatL2(gallery_features.shape[1])
index.add(gallery_features)

# =========================
# ELABORA QUERY
# =========================
results = []
query_paths = [os.path.join(query_folder, fname)
               for fname in os.listdir(query_folder)
               if fname.lower().endswith(('.jpg', '.jpeg', '.png'))]

top_k_effettivo = min(top_k, len(valid_gallery_paths))

print("🔍 Ricerca immagini simili...")
for q_path in tqdm(query_paths, desc="Query"):
    q_feat = extract_features(q_path)
    if q_feat is None:
        print(f"[ATTENZIONE] Query ignorata: {q_path}")
        continue

    q_feat = q_feat.astype('float32').reshape(1, -1)
    distances, indices = index.search(q_feat, top_k_effettivo)
    similar_images = [valid_gallery_paths[i] for i in indices[0]]
    results.append({
        "filename": q_path,
        "gallery_images": similar_images
    })

# =========================
# SALVA RISULTATI
# =========================
with open('submission_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("✅ Ricerca completata. File salvato: submission_results.json")
