#Installazione e Confifurazione

In [None]:
!pip install --quiet --upgrade open_clip_torch
!pip install timm peft transformers


In [None]:
!git clone https://github.com/agwmon/MuDI.git
%cd MuDI

In [None]:
%cd detect_and_compare

In [None]:
import torch
from IPython.display import clear_output
import numpy as np
from PIL import Image
from owl_dreamsim_utils import eval_with_dinov2, eval_with_dreamsim
cache_dir = 'models'

evalulator = eval_with_dreamsim(cache_dir=cache_dir, device='cuda')
#evalulator = eval_with_dinov2(cache_dir=None, device='cuda')

clear_output()

In [None]:
#@title Upload delle: reference_photo, maschere e il jsonl

#@markdown Inserisci il percorso in cui salvare i file (verrà creato se non esiste):
save_path = "./examples/references/andrea"  #@param {type:"string"}

import os
from google.colab import files

# Crea la directory se non esiste
os.makedirs(save_path, exist_ok=True)

# Prompt di upload
uploaded = files.upload()  # Salva automaticamente i file in /content

# Copia in save_path e rimuovi i file temporanei da /content
for filename, filecontent in uploaded.items():
    src_path = os.path.join("/content", filename)
    dest_path = os.path.join(save_path, filename)

    # Scrivi nella destinazione
    with open(dest_path, 'wb') as f:
        f.write(filecontent)
    print(f"✅ Salvato: {dest_path}")

    # Elimina il file temporaneo originale da /content
    if os.path.exists(src_path):
        os.remove(src_path)



In [None]:
save_uploaded_files(save_path)

In [None]:
#@title Funzioni per il calcolo della D&C

def get_gt_matrix(query_dict):
    embs = query_dict['query_emb']
    n = len(embs)
    gt_matrix = torch.zeros(n, n)
    # Fill the matrix
    for i in range(n):
        for j in range(i, n):  # Only calculate for i <= j
            if i == j:
                # Diagonal: Mean of self-similarity
                similarity = (embs[i] / embs[i].norm(dim=-1, keepdim=True)).matmul((embs[i] / embs[i].norm(dim=-1, keepdim=True)).t())
                gt_matrix[i, j] = similarity.mean()
            else:
                # Off-diagonal: Mean of inter-group similarity
                inter_similarity = (embs[i] / embs[i].norm(dim=-1, keepdim=True)).matmul((embs[j] / embs[j].norm(dim=-1, keepdim=True)).t())
                mean_similarity = inter_similarity.mean()
                gt_matrix[i, j] = mean_similarity
                gt_matrix[j, i] = mean_similarity  # Assign to A[j, i] without recalculating
    gt_matrix = np.array(gt_matrix)
    return gt_matrix

def sort_by_max(A):
    sorted_rows = np.zeros_like(A)
    used_rows = []

    # Iterate over each column
    for i in range(A.shape[1]):
        # Find the maximum value in the i-th column that hasn't been used yet
        max_value = -np.inf
        max_index = -1
        for j in range(A.shape[0]):
            if j not in used_rows and A[j, i] > max_value:
                max_value = A[j, i]
                max_index = j

        # Add the row with the maximum value to the sorted array
        sorted_rows[i] = A[max_index]
        used_rows.append(max_index)
    return sorted_rows

def gt_distance(scores, gt_matrix, ord=None):
    if len(scores) != len(gt_matrix):
        print(f'Count:{len(scores)}')
        return 1.
    # scores = [x.mean(-1).tolist() for x in np.array(scores)] # 2x2
    # scores = np.array(scores).mean(-1)
    tmp = []
    for bbox_score in scores:
        per_bbox = []
        for ref in bbox_score:
            per_bbox.append(np.array(ref).mean())
        tmp.append(per_bbox)

    # scores = [np.array(x).mean(-1) for x in scores]
    scores = np.array(tmp)
    scores = sort_by_max(scores)
    return np.linalg.norm(scores - gt_matrix, ord=ord)

In [None]:
#@title Specifica del dizionario
query_dict = {
    'query_name': ["person", "person"],
    'query_path': ["examples/references/andrea",
                    "examples/references/luigi"]
}

In [None]:
#@title Funzione per la ricerca del Threshold, per poi segmentare il numero n di oggetti giusti dato un minimo, un max e uno step

def search_threshold(image, query_dict, thr_min, thr_max, *, step=0.01, ord=2, verbose=False):
    # Assicuriamoci che i limiti abbiano senso
    if thr_min >= thr_max:
        raise ValueError("thr_min deve essere < thr_max")
    if step <= 0:
        raise ValueError("step deve essere > 0")

    t = thr_min
    while t <= thr_max:
        # Calcola gli score con il threshold corrente
        scores = evalulator.score(image, query_dict, threshold=t, return_round=False)

        # Ground-truth
        gt_matrix = get_gt_matrix(query_dict)

        # Mostra l’immagine ridimensionata, se serve
        # (commenta questa riga se non vuoi la finestra di anteprima)
        # image.resize((512, 512)).show()

        # Calcola il valore
        value = 1 - gt_distance(scores, gt_matrix, ord=ord)

        print(t)

        if verbose:
            print(f"threshold = {t:.3f} | value = {value}")

        # Condizione di uscita
        if value != 0:
            if verbose:
                print("Trovato value diverso da 0 — stop.")
            return {
                "threshold": t,
                "value": value,
                "scores": scores,
                "gt_matrix": gt_matrix,
            }

        # Passo successivo
        t += step

    # Se il ciclo termina senza successo
    raise RuntimeError(
        f"Nessun threshold con value ≠ 0 trovato nell’intervallo [{thr_min}, {thr_max}]."
    )


#Esecuzione del codice

In [None]:
#@markdown Percorso della foto da calcolare il D&C
image = "/content/MuDI/detect_and_compare/examples/images/testmetriche/sdxl/andrea_cami1.png" #@param {type:"string"}
image = Image.open(image)

search_threshold(image, query_dict, 0.1, 0.5, step=0.1, ord=2, verbose=False)

In [None]:
#@markdown Percorso della foto da calcolare il D&C
image = "/content/MuDI/detect_and_compare/examples/images/testmetriche/sd1.5/lufienAnrosci2.png" #@param {type:"string"}
image = Image.open(image)
scores = evalulator.score(image, query_dict, threshold=0.1, return_round=False)

gt_matrix = get_gt_matrix(query_dict)
image.resize((256, 256)).show()
print(f"GT score: {1 - gt_distance(scores, gt_matrix, ord=2):.2f}")