In [None]:
!pip install open-clip-torch
!pip install jaxtyping einops open-clip-torch ftfy regex timm



## Import delle librerie necessarie

In [None]:
import os,json
from pathlib import Path

import pandas as pd
from PIL import Image

import torch
import torch.nn.functional as F
from sphinx.ext.viewcode import OUTPUT_DIRNAME
from tqdm import tqdm

#Todo togliere il commmento
import open_clip


Impostazione del dispositivo di calcolo

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

Using device: cuda


Clip_model per ottenere gli score delle generazioni

In [None]:
#Inizializzazione CLIP (per score)
clip_model, _, clip_preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai')
clip_model= clip_model.to(device).eval()
clip_tokenizer=open_clip.get_tokenizer('ViT-B-32')



Definizioni dei Path


In [None]:
from pathlib import Path

BASE_DATA_DIR=Path("/content")
CSV_PATH=BASE_DATA_DIR/"dataset/cambio_materiale/change_material_tests.csv"
OUT_DIR=Path("/content/outputs_change_materials")
OUT_DIR.mkdir(parents=True, exist_ok=True)

print('BASE_DATA_DIR =', BASE_DATA_DIR)
print('CSV_PATH      =', CSV_PATH)
print('OUT_DIR       =', OUT_DIR)

BASE_DATA_DIR = /content
CSV_PATH      = /content/dataset/cambio_materiale/change_material_tests.csv
OUT_DIR       = /content/outputs_change_materials


Import del Token-Opt


In [None]:
import sys
!git clone https://github.com/lukaslaobeyer/token-opt.git /content/token-opt
REPO_DIR = Path('/content/token-opt')
sys.path.insert(0, str(REPO_DIR))

TTO_AVAILABLE = True
try:
    from tto.test_time_opt import(
        TestTimeOpt,
        TestTimeOptConfig,
        CLIPObjective,
    )
    print("Token-Opt importato correttamente")
except Exception as e:
    TTO_AVAILABLE = False
    print("Errore nell'importazione di Token-Opt:", e)

print(TTO_AVAILABLE)


fatal: destination path '/content/token-opt' already exists and is not an empty directory.
Token-Opt importato correttamente
True


Definizione di caricamento immagine e funzione di calcolo score CLIP

In [None]:
def load_rgb_image(path:Path):
    image = Image.open(path).convert("RGB")
    return image

def clip_score_image_text(img:Image.Image,text:str) -> float:
    with torch.no_grad():
        image_input=clip_preprocess(img).unsqueeze(0).to(device)
        text_input=clip_tokenizer([text]).to(device)

        image_feature=clip_model.encode_image(image_input)
        text_feature=clip_model.encode_text(text_input)

        image_feature=F.normalize(image_feature,dim=-1)
        text_feature=F.normalize(text_feature,dim=-1)

        sim=(image_feature@text_feature.T).squeeze().item()
    return float(sim)

Per trasformare immagine in tensore e tensore in immagine

In [None]:
import torchvision.transforms as T
_pre_tto = T.Compose([
    T.Resize(256, interpolation=T.InterpolationMode.BICUBIC),
    T.CenterCrop(256),
    T.ToTensor(),  # -> [0,1] float32, shape [3,H,W]
])

def pil_to_tto(seed_img: Image.Image, device: str = "cuda") -> torch.Tensor:
    x = _pre_tto(seed_img.convert("RGB"))     # [3,H,W] in [0,1]
    x = x.unsqueeze(0).to(device, dtype=torch.float32)  # [1,3,H,W]
    return x
def tensor_to_pil(img: torch.Tensor) -> Image.Image:
    # accetta [1,3,H,W] o [3,H,W]; assume range [0,1]
    if img.ndim == 4:
        img = img[0]
    img = img.detach().clamp(0, 1).cpu()
    img = (img * 255).to(torch.uint8)      # [3,H,W]
    img = img.permute(1, 2, 0).numpy()     # HWC
    return Image.fromarray(img)

# Metodo di generazione dell'immagine

In [None]:
def run_token_opt(seed_img:Image.Image,prompt:str,steps:int=50)->Image.Image:
    """
    Esegue Token_Opt su una singola immagine+prompt. Se Token-Opt non è importato correttamente si restituisce l'immagine seed.
    """
    if not TTO_AVAILABLE:
        print("Token-Opt non è disponibile, restituisco l'immagine seed")
        return seed_img

    seed_tensor=pil_to_tto(seed_img,device="cuda")

    cfg=TestTimeOptConfig(
        num_iter=steps,
        lr=0.1,
    )

    #Definizione dell'obiettivo CLIP
    objective=CLIPObjective(prompt=prompt)

    tto=TestTimeOpt(cfg,objective).to(device=device)
    torch.manual_seed(0)

    tensor_out=tto(seed=seed_tensor)

    return tensor_to_pil(tensor_out)


# Loop principale

In [None]:
df=pd.read_csv(CSV_PATH) #Lettura del csv

required_cols=['id','macrocat','scenario','object','input_image_path','prompt_target']

missing=[col for col in required_cols if col not in df.columns]
if missing:
    raise ValueError(f"Mancano le seguenti colonne nel CSV: {missing}")
print("Totale righe da processare",len(df))
results=[]

for _,row in tqdm(df.iterrows(),total=len(df)):
    test_id=row['id']
    scenario=row['scenario']
    obj=row['object']
    rel_path=row['input_image_path']
    prompt=row['prompt_target']

    img_path=BASE_DATA_DIR/Path(rel_path)
    print(img_path)
    if not img_path.exists():
        print(f"[WARN] immagine mancante {img_path}")
        continue
    out_dir=OUT_DIR/str(row['macrocat']) /str(obj)/str(scenario)/str(test_id)
    out_dir.mkdir(parents=True,exist_ok=True)

    (out_dir/"prompt.txt").write_text(prompt,encoding='utf-8')

    seed_image=load_rgb_image(img_path)
    seed_image.save(out_dir/"input.jpg")

    #Generazione immagine
    image_out=run_token_opt(seed_image,prompt,steps=200)
    out_image_path=out_dir/"output.jpg"
    image_out.save(out_image_path)

    #CLIP score
    score=clip_score_image_text(image_out,prompt)

    #Metadata JSON
    meta = {
        'id': test_id,
        'macrocat': row['macrocat'],
        'scenario': scenario,
        'object': obj,
        'prompt': prompt,
        'input_image_path': str(img_path),
        'output_image_path': str(out_image_path),
        'clip_score': score,
    }
    with open(out_dir / 'clip_scores.json', 'w', encoding='utf-8') as f:
        json.dump(meta, f, ensure_ascii=False, indent=2)
    results.append(meta)

res_df = pd.DataFrame(results)
res_csv = OUT_DIR / 'results_clip.csv'
res_df.to_csv(res_csv, index=False)
print('[DONE] Salvato results in:', res_csv)

if len(results):
    res_df = pd.DataFrame(results)
    print('Media CLIP per (object, scenario):')
    display(res_df.groupby(['object','scenario'])['clip_score'].mean().round(4))

    print('\nTop 5 risultati per clip_score:')
    display(res_df.sort_values('clip_score', ascending=False).head(5)[['id','object','scenario','clip_score','prompt']])

    print('\nBottom 5 risultati per clip_score:')
    display(res_df.sort_values('clip_score', ascending=True).head(5)[['id','object','scenario','clip_score','prompt']])
else:
    print('Nessun risultato: controlla path immagini / CSV.')



Totale righe da processare 30


  0%|          | 0/30 [00:00<?, ?it/s]

/content/dataset/cambio_materiale/clean/sedia.jpg


  3%|▎         | 1/30 [01:53<54:40, 113.11s/it]

/content/dataset/cambio_materiale/real/sedia.jpg


  7%|▋         | 2/30 [03:35<49:48, 106.74s/it]

/content/dataset/cambio_materiale/clean/sedia.jpg


 10%|█         | 3/30 [05:12<46:02, 102.30s/it]

/content/dataset/cambio_materiale/real/sedia.jpg


 13%|█▎        | 4/30 [06:49<43:21, 100.08s/it]

/content/dataset/cambio_materiale/clean/sedia.jpg


 17%|█▋        | 5/30 [08:25<41:11, 98.85s/it] 

/content/dataset/cambio_materiale/real/sedia.jpg


 20%|██        | 6/30 [10:02<39:16, 98.18s/it]

/content/dataset/cambio_materiale/clean/divano.jpg


 23%|██▎       | 7/30 [11:39<37:27, 97.72s/it]

/content/dataset/cambio_materiale/real/divano.jpg


 27%|██▋       | 8/30 [13:16<35:42, 97.41s/it]

/content/dataset/cambio_materiale/clean/divano.jpg


 30%|███       | 9/30 [14:52<34:00, 97.17s/it]

/content/dataset/cambio_materiale/real/divano.jpg


 33%|███▎      | 10/30 [16:29<32:18, 96.92s/it]

/content/dataset/cambio_materiale/clean/tavolo.jpg


 37%|███▋      | 11/30 [18:05<30:37, 96.72s/it]

/content/dataset/cambio_materiale/real/tavolo.jpg


 40%|████      | 12/30 [19:41<28:59, 96.62s/it]

/content/dataset/cambio_materiale/clean/tavolo.jpg


 43%|████▎     | 13/30 [21:18<27:20, 96.51s/it]

/content/dataset/cambio_materiale/real/tavolo.jpg


 47%|████▋     | 14/30 [22:55<25:46, 96.65s/it]

/content/dataset/cambio_materiale/clean/tavolo.jpg


 50%|█████     | 15/30 [24:35<24:27, 97.86s/it]

/content/dataset/cambio_materiale/real/tavolo.jpg


 53%|█████▎    | 16/30 [26:13<22:48, 97.77s/it]

/content/dataset/cambio_materiale/clean/tavolo.jpg


 57%|█████▋    | 17/30 [27:51<21:13, 97.97s/it]

/content/dataset/cambio_materiale/real/tavolo.jpg


 60%|██████    | 18/30 [29:29<19:33, 97.80s/it]

/content/dataset/cambio_materiale/clean/tazza.jpg


 63%|██████▎   | 19/30 [31:06<17:54, 97.71s/it]

/content/dataset/cambio_materiale/real/tazza.jpg


 67%|██████▋   | 20/30 [32:44<16:16, 97.66s/it]

/content/dataset/cambio_materiale/clean/tazza.jpg


 70%|███████   | 21/30 [34:21<14:37, 97.55s/it]

/content/dataset/cambio_materiale/real/tazza.jpg


 73%|███████▎  | 22/30 [35:58<13:00, 97.54s/it]

/content/dataset/cambio_materiale/clean/vaso.jpg


 77%|███████▋  | 23/30 [37:36<11:22, 97.48s/it]

/content/dataset/cambio_materiale/real/vaso.jpg


 80%|████████  | 24/30 [39:14<09:45, 97.57s/it]

/content/dataset/cambio_materiale/clean/vaso.jpg


 83%|████████▎ | 25/30 [40:51<08:07, 97.55s/it]

/content/dataset/cambio_materiale/real/vaso.jpg


 87%|████████▋ | 26/30 [42:28<06:29, 97.32s/it]

/content/dataset/cambio_materiale/clean/lampada.jpg


 90%|█████████ | 27/30 [44:04<04:51, 97.05s/it]

/content/dataset/cambio_materiale/real/lampada.jpg


 93%|█████████▎| 28/30 [45:41<03:13, 96.82s/it]

/content/dataset/cambio_materiale/clean/lampada.jpg


 97%|█████████▋| 29/30 [47:17<01:36, 96.59s/it]

/content/dataset/cambio_materiale/real/lampada.jpg


100%|██████████| 30/30 [48:53<00:00, 97.78s/it]

[DONE] Salvato results in: /content/outputs_change_materials/results_clip.csv
Media CLIP per (object, scenario):





Unnamed: 0_level_0,Unnamed: 1_level_0,clip_score
object,scenario,Unnamed: 2_level_1
chair,clean,0.31
chair,real,0.2991
cup,clean,0.3223
cup,real,0.3298
lamp,clean,0.2945
lamp,real,0.3026
sofa,clean,0.2877
sofa,real,0.3174
table,clean,0.2947
table,real,0.327



Top 5 risultati per clip_score:


Unnamed: 0,id,object,scenario,clip_score,prompt
15,MAT_016,table,real,0.349721,Transform the table into a transparent glass t...
23,MAT_022,vase,real,0.349568,Transform the terracotta vase into a natural s...
22,MAT_021,vase,clean,0.349549,Transform the terracotta vase into a natural s...
9,MAT_010,sofa,real,0.337466,Make the sofa appear as made of glossy plastic...
10,MAT_011,table,clean,0.335973,Transform the table into a polished metal tabl...



Bottom 5 risultati per clip_score:


Unnamed: 0,id,object,scenario,clip_score,prompt
6,MAT_007,sofa,clean,0.257873,Turn the fabric sofa into a brown brown leathe...
12,MAT_013,table,clean,0.258209,Transform the table into a polished marble tab...
5,MAT_006,chair,real,0.280109,Transform the chair so that it is with a trans...
16,MAT_029,table,clean,0.288087,Transform the table into a natural stone table...
28,MAT_027,lamp,clean,0.293317,Transform the lamp head into transparent glass...


In [None]:
!zip -r outputs_change_materials-zip /content/outputs_change_materials

updating: content/outputs_change_materials/ (stored 0%)
updating: content/outputs_change_materials/.ipynb_checkpoints/ (stored 0%)
updating: content/outputs_change_materials/change_material/ (stored 0%)
updating: content/outputs_change_materials/change_material/cup/ (stored 0%)
updating: content/outputs_change_materials/change_material/cup/real/ (stored 0%)
updating: content/outputs_change_materials/change_material/cup/real/MAT_018/ (stored 0%)
updating: content/outputs_change_materials/change_material/cup/real/MAT_018/prompt.txt (deflated 23%)
updating: content/outputs_change_materials/change_material/cup/real/MAT_018/clip_scores.json (deflated 43%)
updating: content/outputs_change_materials/change_material/cup/real/MAT_018/input.jpg (deflated 1%)
updating: content/outputs_change_materials/change_material/cup/real/MAT_018/output.jpg (deflated 2%)
updating: content/outputs_change_materials/change_material/cup/real/MAT_020/ (stored 0%)
updating: content/outputs_change_materials/change_m