In [None]:
import json
import os
import glob

os.makedirs('jsons/eval', exist_ok=True)
mvtec_annotations = json.load(open('jsons/mvtec_annotations.json', 'r'))
norm_prompts = {
    "wood": "This is a non-defective image of wood used for visual inspection. Small scratches and stripes like the ones in this image are not considered defects this time.",
    "bottle": "This is a non-defective image of a bottle seen from above used for visual inspection. Orange stains like the ones in this image are not considered defects this time.",
    "cable": "This is a non-defective cross-sectional image of a cable used for visual inspection. Stains and shadows on the sheath, reflections, and rust on the copper wires, as seen in this image, are not considered defects this time.",
    "capsule": "This is a non-defective image of a capsule-shaped tablet used for visual inspection. Small scratches and the line in the middle in this image are not defects this time. Additionally, the printed text on the tablet is not considered a defect as long as it is clearly printed.",
    "carpet": "This is a non-defective image of a carpet surface used for visual inspection. Color unevenness like the ones in this image is not considered a defect this time.",
    "grid": "This is a non-defective image of grid-patterned metal used for visual inspection. Bent shape patterns like the ones in this image are not considered defects this time.",
    "hazelnut": "This is a non-defective image of a hazelnut used for visual inspection. Thin brown parts and light ridges like the ones in this image are not considered defects this time.",
    "leather": "This is a non-defective image of the surface of leather used for visual inspection. Wrinkles like the ones in this image are not considered defects this time.",
    "metal_nut": "This is a non-defective image of a metal nut used for visual inspection. The central hole and outer ridges like the ones in this image are not considered defects this time.",
    "pill": "This is a non-defective image of a tablet used for visual inspection. Red spots like the ones in this image are not considered defects this time. Additionally, the printed text on the tablet is not considered a defect as long as it is clearly printed.",
    "screw": "This is a non-defective image of a screw used for visual inspection.",
    "tile": "This is a non-defective image of a tile surface used for visual inspection. Black patterns like the ones in this image are not considered defects this time.",
    "toothbrush": "This is a non-defective image of a toothbrush used for visual inspection. Fine fraying of the bristles like the ones in this image is not considered a defect this time. Additionally, differences in bristle color are not considered defects this time either.",
    "transistor": "This is a non-defective image of a transistor used for visual inspection. Dirt on the substrate and minor kinks in the transistor pins like the ones in this image are not considered defects this time.",
    "zipper": "This is a non-defective image of a zipper used for visual inspection. Minor dirt like the ones in this image is not considered a defect this time."
}

abnorm_prompts = {
    "wood": "This is a defective image of wood used for visual inspection. Small scratches and stripes like the ones in this image are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "bottle": "This is a defective image of a bottle seen from above used for visual inspection. Orange stains like the ones in this image are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "cable": "This is a defective cross-sectional image of a cable used for visual inspection. Stains and shadows on the sheath, reflections, and rust on the copper wires, as seen in this image, are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "capsule": "This is a defective image of a capsule-shaped tablet used for visual inspection. Small scratches and the line in the middle in this image are not defects this time. Additionally, the printed text on the tablet is not considered a defect as long as it is clearly printed. However, in this image, the area enclosed by the red frame is considered a defect.",
    "carpet": "This is a defective image of a carpet surface used for visual inspection. Color unevenness like the ones in this image is not considered a defect this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "grid": "This is a defective image of grid-patterned metal used for visual inspection. Bent shape patterns like the ones in this image are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "hazelnut": "This is a defective image of a hazelnut used for visual inspection. Thin brown parts and light ridges like the ones in this image are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "leather": "This is a defective image of the surface of leather used for visual inspection. Wrinkles like the ones in this image are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "metal_nut": "This is a defective image of a metal nut used for visual inspection. The central hole and outer ridges like the ones in this image are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "pill": "This is a defective image of a tablet used for visual inspection. Red spots like the ones in this image are not considered defects this time. Additionally, the printed text on the tablet is not considered a defect as long as it is clearly printed. However, in this image, the area enclosed by the red frame is considered a defect.",
    "screw": "This is a defective image of a screw used for visual inspection. However, in this image, the area enclosed by the red frame is considered a defect.",
    "tile": "This is a defective image of a tile surface used for visual inspection. Black patterns like the ones in this image are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "toothbrush": "This is a defective image of a toothbrush used for visual inspection. Fine fraying of the bristles like the ones in this image is not considered a defect this time. Additionally, differences in bristle color are not considered defects this time either. However, in this image, the area enclosed by the red frame is considered a defect.",
    "transistor": "This is a defective image of a transistor used for visual inspection. Dirt on the substrate and minor kinks in the transistor pins like the ones in this image are not considered defects this time. However, in this image, the area enclosed by the red frame is considered a defect.",
    "zipper": "This is a defective image of a zipper used for visual inspection. Minor dirt like the ones in this image is not considered a defect this time. However, in this image, the area enclosed by the red frame is considered a defect."
}

final_prompt = "\n<image>\nThen, Does this image have any defects? If yes, please provide the bounding box coordinate of the region where the defect is located. If no, please say None."


data = glob.glob('/data/mvtec_test_for_icl/*/*/*')
new_dict = {}
for d in data:
    if d.split('/')[-2] == 'good':
        prompt = norm_prompts[d.split('/')[-3]]
    else:
        prompt = abnorm_prompts[d.split('/')[-3]].format(defect=mvtec_annotations[d]['bboxes'][0]['mode'], location=mvtec_annotations[d]['bboxes'][0]['location'])
    prompt += final_prompt
    new_dict[d] = prompt
    json.dump(new_dict, open('jsons/eval/prompt_each.json', 'w'), indent=4)

In [None]:
import json
import os
import glob

os.makedirs('jsons/eval', exist_ok=True)
prompts = {
    "wood": "This is an image of wood used for visual inspection. Small scratches and stripes like the ones in this image are not considered defects this time.",
    "bottle": "This is an image of a bottle seen from above used for visual inspection. Orange stains like the ones in this image are not considered defects this time.",
    "cable": "This is a cross-sectional image of a cable used for visual inspection. Stains and shadows on the sheath, reflections, and rust on the copper wires, as seen in this image, are not considered defects this time.",
    "capsule": "This is an image of a capsule-shaped tablet used for visual inspection. Small scratches and the line in the middle in this image are not defects this time. Additionally, the printed text on the tablet is not considered a defect as long as it is clearly printed.",
    "carpet": "This is an image of a carpet surface used for visual inspection. Color unevenness like the ones in this image is not considered a defect this time.",
    "grid": "This is an image of grid-patterned metal used for visual inspection. Bent shape patterns like the ones in this image are not considered defects this time.",
    "hazelnut": "This is an image of a hazelnut used for visual inspection. Thin brown parts and light ridges like the ones in this image are not considered defects this time.",
    "leather": "This is an image of the surface of leather used for visual inspection. Wrinkles like the ones in this image are not considered defects this time.",
    "metal_nut": "This is an image of a metal nut used for visual inspection. The central hole and outer ridges like the ones in this image are not considered defects this time.",
    "pill": "This is an image of a tablet used for visual inspection. Red spots like the ones in this image are not considered defects this time. Additionally, the printed text on the tablet is not considered a defect as long as it is clearly printed.",
    "screw": "This is an image of a screw used for visual inspection.",
    "tile": "This is an image of a tile surface used for visual inspection. Black patterns like the ones in this image are not considered defects this time.",
    "toothbrush": "This is an image of a toothbrush used for visual inspection. Fine fraying of the bristles like the ones in this image is not considered a defect this time. Additionally, differences in bristle color are not considered defects this time either.",
    "transistor": "This is an image of a transistor used for visual inspection. Dirt on the substrate and minor kinks in the transistor pins like the ones in this image are not considered defects this time.",
    "zipper": "This is an image of a zipper used for visual inspection. Minor dirt like the ones in this image is not considered a defect this time."
}

final_prompt = "\nThen, does this image have any defects? If yes, please provide the bounding box coordinate of the region where the defect is located. If no, please say None."


data = glob.glob('/data/mvtec_test_for_icl/*/*/*')
new_dict = {}
for d in data:
    prompt = prompts[d.split('/')[-3]]
    prompt += final_prompt
    new_dict[d] = prompt
    json.dump(new_dict, open('jsons/eval/mvtec/prompt_single.json', 'w'), indent=4)

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image

encoder = models.resnet18(pretrained=True)
encoder = nn.Sequential(*list(encoder.children())[:-1])
encoder.eval()

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

support_set_path = '/data/mvtec_test_for_icl/bottle'
dataset = ImageFolder(support_set_path, transform=transform)
dataloader = DataLoader(dataset, batch_size=1)

support_set_embeddings = []
for i, (x, _) in enumerate(dataloader):
    with torch.no_grad():
        output = encoder(x)
    support_set_embeddings.append(output.squeeze().cpu())

support_set_embeddings = torch.stack(support_set_embeddings)

query_path = '/data/mvtec_test_for_icl/bottle/good/000.png'
query_img = Image.open(query_path)
query_tensor = transform(query_img).unsqueeze(0)

with torch.no_grad():
    query_embedding = encoder(query_tensor).squeeze().cpu()

def nearest_first(unlabeled_embeddings, labeled_embedding, n):
    unlabeled_embeddings = unlabeled_embeddings.to('cpu')
    labeled_embedding = labeled_embedding.unsqueeze(0).to('cpu')
    
    dist_ctr = torch.cdist(unlabeled_embeddings, labeled_embedding, p=2)
    min_dist = torch.min(dist_ctr, dim=1)[0]
    
    idxs = []
    selected_indices = set()
    for _ in range(n):
        idx = torch.argmin(min_dist)
        while idx.item() in selected_indices:
            min_dist[idx] = float('inf')
            idx = torch.argmin(min_dist)
        selected_indices.add(idx.item())
        idxs.append(idx.item())
        
        dist_new_ctr = torch.cdist(unlabeled_embeddings, unlabeled_embeddings[[idx], :])
        min_dist = torch.minimum(min_dist, dist_new_ctr[:, 0])
    
    return idxs

n = 1
selected_indices = nearest_first(support_set_embeddings, query_embedding, n)
print("Selected indices:", selected_indices)


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import glob
import json
import tqdm

encoder = models.resnet50(pretrained=True)
encoder = nn.Sequential(*list(encoder.children())[:-1])
encoder.eval()

normalize_dict = json.load(open('jsons/mvtec_normalize.json', 'r'))

query_paths = sorted(glob.glob('/data/mvtec_test_for_icl/*/*/*'))
category = None
new_dict = {}

def nearest_first(unlabeled_embeddings, labeled_embedding, n):
    unlabeled_embeddings = unlabeled_embeddings.to('cpu')
    labeled_embedding = labeled_embedding.unsqueeze(0).to('cpu')
    similarity = torch.nn.functional.cosine_similarity(unlabeled_embeddings, labeled_embedding, dim=1)
    max_similarity = similarity.clone()
    idxs = []
    selected_indices = set()
    for _ in range(n):
        idx = torch.argmax(max_similarity)
        while idx.item() in selected_indices:
            max_similarity[idx] = float('-inf')
            idx = torch.argmax(max_similarity)
        selected_indices.add(idx.item())
        idxs.append(idx.item())
        similarity_new = torch.nn.functional.cosine_similarity(unlabeled_embeddings, unlabeled_embeddings[idx].unsqueeze(0), dim=1)
        max_similarity = torch.maximum(max_similarity, similarity_new)
    return idxs

for idx, query_path in tqdm.tqdm(enumerate(query_paths), total=len(query_paths)):
    def get_transform(mode):
        if mode == 'L':
            return transforms.Compose([
                transforms.Grayscale(num_output_channels=3),
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
            ])
        else:
            return transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
            ])

    query_img = Image.open(query_path)
    transform = get_transform(query_img.mode)
    query_tensor = transform(query_img).unsqueeze(0)

    if category != query_path.split('/')[-3]:
        support_set_path = '/data/mvtec_test_for_icl/' + query_path.split('/')[-3]
        support_set_paths = sorted(glob.glob(support_set_path + '/*/*'))
        dataset = ImageFolder(support_set_path, transform=transform)
        dataloader = DataLoader(dataset, batch_size=1)

        support_set_embeddings = []
        for i, (x, _) in enumerate(dataloader):
            with torch.no_grad():
                output = encoder(x)
            support_set_embeddings.append(output.squeeze().cpu())
        category = query_path.split('/')[-3]

        support_set_embeddings = torch.stack(support_set_embeddings)

    with torch.no_grad():
        query_embedding = encoder(query_tensor).squeeze().cpu()

    n = 1
    selected_indices = nearest_first(support_set_embeddings, query_embedding, n)
    
    new_dict[query_path] = support_set_paths[selected_indices[1]]

    json.dump(new_dict, open('jsons/eval/rices_one.json', 'w'), indent=4)
