# PatchCoreViT Analysis

## Preparations

In [None]:
! pip install wget

Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=174e9f1c380e6834a9b0643b638e7d2283efeb548b1ec82849d2374d13126ec1
  Stored in directory: /root/.cache/pip/wheels/01/46/3b/e29ffbe4ebe614ff224bad40fc6a5773a67a163251585a13a9
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [None]:
import importlib

import os
import cv2
import wget
import json
import torch
import shutil
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image

import patchcore_models
import patchcore_utils

from patchcore_utils import get_results, print_results, save_json
from patchcore_models import MVTecDataset, PatchCoreViT, VanillaPatchCore, PatchCoreSWin

class_links = {
    "bottle": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937370-1629951468/bottle.tar.xz",
    "cable": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937413-1629951498/cable.tar.xz",
    "capsule": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937454-1629951595/capsule.tar.xz",
    "carpet": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937484-1629951672/carpet.tar.xz",
    "grid": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937487-1629951814/grid.tar.xz",
    "hazelnut": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937545-1629951845/hazelnut.tar.xz",
    "leather": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937607-1629951964/leather.tar.xz",
    "metal_nut": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937637-1629952063/metal_nut.tar.xz",
    "pill": "https://www.mydrive.ch/shares/43421/11a215a5749fcfb75e331ddd5f8e43ee/download/420938129-1629953099/pill.tar.xz",
    "screw": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938130-1629953152/screw.tar.xz",
    "tile": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938133-1629953189/tile.tar.xz",
    "toothbrush": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938134-1629953256/toothbrush.tar.xz",
    "transistor": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938166-1629953277/transistor.tar.xz",
    "wood": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938383-1629953354/wood.tar.xz",
    "zipper": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938385-1629953449/zipper.tar.xz"
}

'\n    "capsule": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937454-1629951595/capsule.tar.xz",\n    "carpet": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937484-1629951672/carpet.tar.xz",\n    "grid": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937487-1629951814/grid.tar.xz",\n    "hazelnut": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937545-1629951845/hazelnut.tar.xz",\n    "leather": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937607-1629951964/leather.tar.xz",\n    "metal_nut": "https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420937637-1629952063/metal_nut.tar.xz",\n    "pill": "https://www.mydrive.ch/shares/43421/11a215a5749fcfb75e331ddd5f8e43ee/download/420938129-1629953099/pill.tar.xz",\n    "screw": "https://www.mydrive.ch/shares/38536/3830184030e49fe747476694

In [None]:
# Download duration 6m 5s
for class_name in class_links:
  os.mkdir(class_name)
  file_path = wget.download(class_links[class_name], class_name)
  if os.path.exists(file_path):
    # Extract the file if it's a tar.xz file
    if file_path.endswith('.tar.xz'):
      shutil.unpack_archive(file_path, extract_dir=class_name)
      os.remove(file_path)
      print(f"File {class_name} downloaded and extracted successfully.")
    else:
      print(f"Failed to download the file {class_name}.")

File bottle downloaded and extracted successfully.
File cable downloaded and extracted successfully.


In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

# remove
# shutil.rmtree("bottle")

## Single Layer Analysis

### Layer 0-1-2

In [None]:
# Layer 0
print(f"Layer: 0")

model_params = {
  "layers" : [0],
  "backbone" : "google/vit-base-patch16-224-in21k",
  "f_coreset" : 0.1
}

results = get_results(PatchCoreViT, c)
print_results(results)
result_json = save_json(results, "pcViT_base-patch16-224-ink21k_l0.json")
# Avg AUC: 0.795 		Total Misclassified: 306

Layer: 0

Class: bottle


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 209/209 [00:12<00:00, 17.14it/s]


Start Coreset Subsampling...


100%|██████████| 4095/4095 [00:05<00:00, 771.51it/s]
100%|██████████| 83/83 [00:06<00:00, 12.35it/s]


Val: IMAGE Level ROCAUC: 0.988
Val: PIXEL Level ROCAUC: 0.970
[INFO][evaluate] Image Level ROCAUC: 0.988
[INFO][evaluate] Initial Score Threshold: 3.771 F1Score: 0.968
[INFO][evaluate] Optimal Score Threshold: 3.711 F1Score: 0.976
[INFO][evaluate] Average Inference time with batch_size=1: 0.028s

Class: cable


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 224/224 [00:19<00:00, 11.71it/s]


Start Coreset Subsampling...


100%|██████████| 4389/4389 [00:05<00:00, 733.55it/s]
100%|██████████| 150/150 [00:14<00:00, 10.27it/s]


Val: IMAGE Level ROCAUC: 0.477
Val: PIXEL Level ROCAUC: 0.738
[INFO][evaluate] Image Level ROCAUC: 0.477
[INFO][evaluate] Initial Score Threshold: 6.234 F1Score: 0.598
[INFO][evaluate] Optimal Score Threshold: -3.766 F1Score: 0.760
[INFO][evaluate] Average Inference time with batch_size=1: 0.026s

Class: capsule


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 219/219 [00:18<00:00, 11.97it/s]


Start Coreset Subsampling...


100%|██████████| 4291/4291 [00:05<00:00, 731.47it/s]
100%|██████████| 132/132 [00:12<00:00, 10.36it/s]


Val: IMAGE Level ROCAUC: 0.803
Val: PIXEL Level ROCAUC: 0.913
[INFO][evaluate] Image Level ROCAUC: 0.803
[INFO][evaluate] Initial Score Threshold: 2.257 F1Score: 0.874
[INFO][evaluate] Optimal Score Threshold: 1.877 F1Score: 0.931
[INFO][evaluate] Average Inference time with batch_size=1: 0.026s

Class: carpet


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 280/280 [00:23<00:00, 12.06it/s]


Start Coreset Subsampling...


100%|██████████| 5487/5487 [00:09<00:00, 582.16it/s]
100%|██████████| 117/117 [00:11<00:00, 10.63it/s]


Val: IMAGE Level ROCAUC: 0.525
Val: PIXEL Level ROCAUC: 0.794
[INFO][evaluate] Image Level ROCAUC: 0.525
[INFO][evaluate] Initial Score Threshold: 4.554 F1Score: 0.744
[INFO][evaluate] Optimal Score Threshold: -5.446 F1Score: 0.864
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: grid


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 264/264 [00:12<00:00, 20.64it/s]


Start Coreset Subsampling...


100%|██████████| 5173/5173 [00:08<00:00, 616.19it/s]
100%|██████████| 78/78 [00:04<00:00, 18.69it/s]


Val: IMAGE Level ROCAUC: 0.931
Val: PIXEL Level ROCAUC: 0.956
[INFO][evaluate] Image Level ROCAUC: 0.931
[INFO][evaluate] Initial Score Threshold: 4.205 F1Score: 0.885
[INFO][evaluate] Optimal Score Threshold: 4.025 F1Score: 0.922
[INFO][evaluate] Average Inference time with batch_size=1: 0.016s

Class: hazelnut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 391/391 [00:31<00:00, 12.38it/s]


Start Coreset Subsampling...


100%|██████████| 7662/7662 [00:18<00:00, 419.67it/s]
100%|██████████| 110/110 [00:10<00:00, 10.58it/s]


Val: IMAGE Level ROCAUC: 0.884
Val: PIXEL Level ROCAUC: 0.975
[INFO][evaluate] Image Level ROCAUC: 0.884
[INFO][evaluate] Initial Score Threshold: 4.444 F1Score: 0.840
[INFO][evaluate] Optimal Score Threshold: 4.394 F1Score: 0.851
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: leather


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 245/245 [00:18<00:00, 13.05it/s]


Start Coreset Subsampling...


100%|██████████| 4801/4801 [00:07<00:00, 654.63it/s]
100%|██████████| 124/124 [00:11<00:00, 10.95it/s]


Val: IMAGE Level ROCAUC: 0.780
Val: PIXEL Level ROCAUC: 0.917
[INFO][evaluate] Image Level ROCAUC: 0.780
[INFO][evaluate] Initial Score Threshold: 4.508 F1Score: 0.756
[INFO][evaluate] Optimal Score Threshold: 4.328 F1Score: 0.854
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: metal_nut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 220/220 [00:10<00:00, 20.93it/s]


Start Coreset Subsampling...


100%|██████████| 4311/4311 [00:05<00:00, 726.08it/s]
100%|██████████| 115/115 [00:06<00:00, 18.26it/s]


Val: IMAGE Level ROCAUC: 0.442
Val: PIXEL Level ROCAUC: 0.830
[INFO][evaluate] Image Level ROCAUC: 0.442
[INFO][evaluate] Initial Score Threshold: 4.652 F1Score: 0.432
[INFO][evaluate] Optimal Score Threshold: -5.348 F1Score: 0.894
[INFO][evaluate] Average Inference time with batch_size=1: 0.018s

Class: pill


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 267/267 [00:18<00:00, 14.64it/s]


Start Coreset Subsampling...


100%|██████████| 5232/5232 [00:08<00:00, 600.87it/s]
100%|██████████| 167/167 [00:12<00:00, 13.02it/s]


Val: IMAGE Level ROCAUC: 0.899
Val: PIXEL Level ROCAUC: 0.957
[INFO][evaluate] Image Level ROCAUC: 0.899
[INFO][evaluate] Initial Score Threshold: 3.280 F1Score: 0.899
[INFO][evaluate] Optimal Score Threshold: 3.141 F1Score: 0.951
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: screw


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 320/320 [00:17<00:00, 18.39it/s]


Start Coreset Subsampling...


100%|██████████| 6271/6271 [00:12<00:00, 510.05it/s]
100%|██████████| 160/160 [00:10<00:00, 15.04it/s]


Val: IMAGE Level ROCAUC: 0.717
Val: PIXEL Level ROCAUC: 0.978
[INFO][evaluate] Image Level ROCAUC: 0.717
[INFO][evaluate] Initial Score Threshold: 3.803 F1Score: 0.817
[INFO][evaluate] Optimal Score Threshold: 3.333 F1Score: 0.866
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: tile


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 230/230 [00:15<00:00, 15.01it/s]


Start Coreset Subsampling...


100%|██████████| 4507/4507 [00:06<00:00, 672.00it/s]
100%|██████████| 117/117 [00:08<00:00, 13.28it/s]


Val: IMAGE Level ROCAUC: 0.811
Val: PIXEL Level ROCAUC: 0.754
[INFO][evaluate] Image Level ROCAUC: 0.811
[INFO][evaluate] Initial Score Threshold: 4.585 F1Score: 0.734
[INFO][evaluate] Optimal Score Threshold: 4.485 F1Score: 0.865
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: toothbrush


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 60/60 [00:05<00:00, 11.21it/s]


Start Coreset Subsampling...


100%|██████████| 1175/1175 [00:00<00:00, 1955.85it/s]
100%|██████████| 42/42 [00:03<00:00, 12.60it/s]


Val: IMAGE Level ROCAUC: 0.956
Val: PIXEL Level ROCAUC: 0.980
[INFO][evaluate] Image Level ROCAUC: 0.956
[INFO][evaluate] Initial Score Threshold: 4.315 F1Score: 0.893
[INFO][evaluate] Optimal Score Threshold: 4.215 F1Score: 0.933
[INFO][evaluate] Average Inference time with batch_size=1: 0.017s

Class: transistor


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 213/213 [00:19<00:00, 11.02it/s]


Start Coreset Subsampling...


100%|██████████| 4173/4173 [00:05<00:00, 700.62it/s]
100%|██████████| 100/100 [00:09<00:00, 10.63it/s]


Val: IMAGE Level ROCAUC: 0.799
Val: PIXEL Level ROCAUC: 0.835
[INFO][evaluate] Image Level ROCAUC: 0.799
[INFO][evaluate] Initial Score Threshold: 4.249 F1Score: 0.706
[INFO][evaluate] Optimal Score Threshold: 4.239 F1Score: 0.721
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: wood


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 247/247 [00:21<00:00, 11.59it/s]


Start Coreset Subsampling...


100%|██████████| 4840/4840 [00:07<00:00, 649.56it/s]
100%|██████████| 79/79 [00:07<00:00, 11.23it/s]


Val: IMAGE Level ROCAUC: 0.950
Val: PIXEL Level ROCAUC: 0.902
[INFO][evaluate] Image Level ROCAUC: 0.950
[INFO][evaluate] Initial Score Threshold: 4.129 F1Score: 0.911
[INFO][evaluate] Optimal Score Threshold: 3.920 F1Score: 0.935
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: zipper


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 240/240 [00:11<00:00, 21.33it/s]


Start Coreset Subsampling...


100%|██████████| 4703/4703 [00:06<00:00, 682.03it/s]
100%|██████████| 151/151 [00:09<00:00, 16.08it/s]


Val: IMAGE Level ROCAUC: 0.959
Val: PIXEL Level ROCAUC: 0.935
[INFO][evaluate] Image Level ROCAUC: 0.959
[INFO][evaluate] Initial Score Threshold: 4.054 F1Score: 0.949
[INFO][evaluate] Optimal Score Threshold: 3.944 F1Score: 0.971
[INFO][evaluate] Average Inference time with batch_size=1: 0.022s

CLASS BREAKDOWN
ROCAUC: 0.988 		f1_score: 0.976 	bottle
ROCAUC: 0.477 		f1_score: 0.760 	cable
ROCAUC: 0.803 		f1_score: 0.931 	capsule
ROCAUC: 0.525 		f1_score: 0.864 	carpet
ROCAUC: 0.931 		f1_score: 0.922 	grid
ROCAUC: 0.884 		f1_score: 0.851 	hazelnut
ROCAUC: 0.780 		f1_score: 0.854 	leather
ROCAUC: 0.442 		f1_score: 0.894 	metal_nut
ROCAUC: 0.899 		f1_score: 0.951 	pill
ROCAUC: 0.717 		f1_score: 0.866 	screw
ROCAUC: 0.811 		f1_score: 0.865 	tile
ROCAUC: 0.956 		f1_score: 0.933 	toothbrush
ROCAUC: 0.799 		f1_score: 0.721 	transistor
ROCAUC: 0.950 		f1_score: 0.935 	wood
ROCAUC: 0.959 		f1_score: 0.971 	zipper


SUMMARY
Avg AUC: 0.795 		Total Misclassified: 306


In [None]:
# Layer 1
print(f"Layer: 1")

model_params = {
  "layers" : [1],
  "backbone" : "google/vit-base-patch16-224-in21k",
  "f_coreset" : 0.1
}

results = get_results(PatchCoreViT, model_params)
print_results(results)
result_json = save_json(results, "pcViT_base-patch16-224-ink21k_l1.json")
# Avg AUC: 0.933 		Total Misclassified: 183

Layer: 1

Class: bottle


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 209/209 [00:11<00:00, 17.63it/s]


Start Coreset Subsampling...


100%|██████████| 4095/4095 [00:05<00:00, 788.76it/s]
100%|██████████| 83/83 [00:05<00:00, 14.72it/s]


Val: IMAGE Level ROCAUC: 0.999
Val: PIXEL Level ROCAUC: 0.991
[INFO][evaluate] Image Level ROCAUC: 0.999
[INFO][evaluate] Initial Score Threshold: 4.232 F1Score: 0.984
[INFO][evaluate] Optimal Score Threshold: 3.372 F1Score: 0.992
[INFO][evaluate] Average Inference time with batch_size=1: 0.020s

Class: cable


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 224/224 [00:18<00:00, 11.79it/s]


Start Coreset Subsampling...


100%|██████████| 4389/4389 [00:05<00:00, 744.21it/s]
100%|██████████| 150/150 [00:14<00:00, 10.04it/s]


Val: IMAGE Level ROCAUC: 0.862
Val: PIXEL Level ROCAUC: 0.942
[INFO][evaluate] Image Level ROCAUC: 0.862
[INFO][evaluate] Initial Score Threshold: 5.180 F1Score: 0.833
[INFO][evaluate] Optimal Score Threshold: 5.151 F1Score: 0.834
[INFO][evaluate] Average Inference time with batch_size=1: 0.027s

Class: capsule


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 219/219 [00:18<00:00, 11.95it/s]


Start Coreset Subsampling...


100%|██████████| 4291/4291 [00:06<00:00, 692.64it/s]
100%|██████████| 132/132 [00:12<00:00, 10.56it/s]


Val: IMAGE Level ROCAUC: 0.834
Val: PIXEL Level ROCAUC: 0.930
[INFO][evaluate] Image Level ROCAUC: 0.834
[INFO][evaluate] Initial Score Threshold: 3.204 F1Score: 0.856
[INFO][evaluate] Optimal Score Threshold: 2.404 F1Score: 0.939
[INFO][evaluate] Average Inference time with batch_size=1: 0.026s

Class: carpet


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 280/280 [00:22<00:00, 12.29it/s]


Start Coreset Subsampling...


100%|██████████| 5487/5487 [00:09<00:00, 582.22it/s]
100%|██████████| 117/117 [00:10<00:00, 10.88it/s]


Val: IMAGE Level ROCAUC: 0.852
Val: PIXEL Level ROCAUC: 0.967
[INFO][evaluate] Image Level ROCAUC: 0.852
[INFO][evaluate] Initial Score Threshold: 4.492 F1Score: 0.815
[INFO][evaluate] Optimal Score Threshold: 4.242 F1Score: 0.889
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: grid


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 264/264 [00:12<00:00, 20.83it/s]


Start Coreset Subsampling...


100%|██████████| 5173/5173 [00:08<00:00, 629.33it/s]
100%|██████████| 78/78 [00:04<00:00, 15.75it/s]


Val: IMAGE Level ROCAUC: 0.985
Val: PIXEL Level ROCAUC: 0.976
[INFO][evaluate] Image Level ROCAUC: 0.985
[INFO][evaluate] Initial Score Threshold: 4.299 F1Score: 0.954
[INFO][evaluate] Optimal Score Threshold: 4.159 F1Score: 0.965
[INFO][evaluate] Average Inference time with batch_size=1: 0.020s

Class: hazelnut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 391/391 [00:31<00:00, 12.24it/s]


Start Coreset Subsampling...


100%|██████████| 7662/7662 [00:18<00:00, 417.60it/s]
100%|██████████| 110/110 [00:09<00:00, 11.50it/s]


Val: IMAGE Level ROCAUC: 0.985
Val: PIXEL Level ROCAUC: 0.983
[INFO][evaluate] Image Level ROCAUC: 0.985
[INFO][evaluate] Initial Score Threshold: 4.903 F1Score: 0.950
[INFO][evaluate] Optimal Score Threshold: 4.834 F1Score: 0.952
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: leather


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 245/245 [00:18<00:00, 13.59it/s]


Start Coreset Subsampling...


100%|██████████| 4801/4801 [00:07<00:00, 669.00it/s]
100%|██████████| 124/124 [00:11<00:00, 11.12it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.995
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 4.448 F1Score: 0.995
[INFO][evaluate] Optimal Score Threshold: 4.438 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: metal_nut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 220/220 [00:10<00:00, 21.50it/s]


Start Coreset Subsampling...


100%|██████████| 4311/4311 [00:07<00:00, 608.19it/s]
100%|██████████| 115/115 [00:06<00:00, 16.65it/s]


Val: IMAGE Level ROCAUC: 0.973
Val: PIXEL Level ROCAUC: 0.962
[INFO][evaluate] Image Level ROCAUC: 0.973
[INFO][evaluate] Initial Score Threshold: 4.932 F1Score: 0.938
[INFO][evaluate] Optimal Score Threshold: 4.702 F1Score: 0.963
[INFO][evaluate] Average Inference time with batch_size=1: 0.021s

Class: pill


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 267/267 [00:15<00:00, 17.43it/s]


Start Coreset Subsampling...


100%|██████████| 5232/5232 [00:08<00:00, 608.65it/s]
100%|██████████| 167/167 [00:11<00:00, 14.50it/s]


Val: IMAGE Level ROCAUC: 0.914
Val: PIXEL Level ROCAUC: 0.944
[INFO][evaluate] Image Level ROCAUC: 0.914
[INFO][evaluate] Initial Score Threshold: 4.111 F1Score: 0.901
[INFO][evaluate] Optimal Score Threshold: 3.852 F1Score: 0.936
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: screw


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 320/320 [00:15<00:00, 20.23it/s]


Start Coreset Subsampling...


100%|██████████| 6271/6271 [00:12<00:00, 516.48it/s]
100%|██████████| 160/160 [00:09<00:00, 16.43it/s]


Val: IMAGE Level ROCAUC: 0.843
Val: PIXEL Level ROCAUC: 0.964
[INFO][evaluate] Image Level ROCAUC: 0.843
[INFO][evaluate] Initial Score Threshold: 3.964 F1Score: 0.824
[INFO][evaluate] Optimal Score Threshold: 3.684 F1Score: 0.882
[INFO][evaluate] Average Inference time with batch_size=1: 0.021s

Class: tile


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 230/230 [00:15<00:00, 15.25it/s]


Start Coreset Subsampling...


100%|██████████| 4507/4507 [00:06<00:00, 717.82it/s]
100%|██████████| 117/117 [00:08<00:00, 13.15it/s]


Val: IMAGE Level ROCAUC: 0.935
Val: PIXEL Level ROCAUC: 0.903
[INFO][evaluate] Image Level ROCAUC: 0.935
[INFO][evaluate] Initial Score Threshold: 5.098 F1Score: 0.877
[INFO][evaluate] Optimal Score Threshold: 5.009 F1Score: 0.901
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: toothbrush


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 60/60 [00:04<00:00, 12.73it/s]


Start Coreset Subsampling...


100%|██████████| 1175/1175 [00:00<00:00, 2267.45it/s]
100%|██████████| 42/42 [00:03<00:00, 12.78it/s]


Val: IMAGE Level ROCAUC: 0.975
Val: PIXEL Level ROCAUC: 0.989
[INFO][evaluate] Image Level ROCAUC: 0.975
[INFO][evaluate] Initial Score Threshold: 4.601 F1Score: 0.912
[INFO][evaluate] Optimal Score Threshold: 3.981 F1Score: 0.938
[INFO][evaluate] Average Inference time with batch_size=1: 0.017s

Class: transistor


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 213/213 [00:19<00:00, 11.19it/s]


Start Coreset Subsampling...


100%|██████████| 4173/4173 [00:05<00:00, 750.84it/s]
100%|██████████| 100/100 [00:09<00:00, 10.38it/s]


Val: IMAGE Level ROCAUC: 0.956
Val: PIXEL Level ROCAUC: 0.937
[INFO][evaluate] Image Level ROCAUC: 0.956
[INFO][evaluate] Initial Score Threshold: 4.560 F1Score: 0.878
[INFO][evaluate] Optimal Score Threshold: 4.550 F1Score: 0.892
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: wood


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 247/247 [00:20<00:00, 12.00it/s]


Start Coreset Subsampling...


100%|██████████| 4840/4840 [00:07<00:00, 652.01it/s]
100%|██████████| 79/79 [00:06<00:00, 11.52it/s]


Val: IMAGE Level ROCAUC: 0.968
Val: PIXEL Level ROCAUC: 0.952
[INFO][evaluate] Image Level ROCAUC: 0.968
[INFO][evaluate] Initial Score Threshold: 4.857 F1Score: 0.930
[INFO][evaluate] Optimal Score Threshold: 4.717 F1Score: 0.951
[INFO][evaluate] Average Inference time with batch_size=1: 0.022s

Class: zipper


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 240/240 [00:11<00:00, 21.72it/s]


Start Coreset Subsampling...


100%|██████████| 4703/4703 [00:06<00:00, 683.33it/s]
100%|██████████| 151/151 [00:08<00:00, 17.68it/s]


Val: IMAGE Level ROCAUC: 0.918
Val: PIXEL Level ROCAUC: 0.961
[INFO][evaluate] Image Level ROCAUC: 0.918
[INFO][evaluate] Initial Score Threshold: 3.930 F1Score: 0.927
[INFO][evaluate] Optimal Score Threshold: 3.550 F1Score: 0.952
[INFO][evaluate] Average Inference time with batch_size=1: 0.020s

CLASS BREAKDOWN
ROCAUC: 0.999 		f1_score: 0.992 	bottle
ROCAUC: 0.862 		f1_score: 0.834 	cable
ROCAUC: 0.834 		f1_score: 0.939 	capsule
ROCAUC: 0.852 		f1_score: 0.889 	carpet
ROCAUC: 0.985 		f1_score: 0.965 	grid
ROCAUC: 0.985 		f1_score: 0.952 	hazelnut
ROCAUC: 1.000 		f1_score: 1.000 	leather
ROCAUC: 0.973 		f1_score: 0.963 	metal_nut
ROCAUC: 0.914 		f1_score: 0.936 	pill
ROCAUC: 0.843 		f1_score: 0.882 	screw
ROCAUC: 0.935 		f1_score: 0.901 	tile
ROCAUC: 0.975 		f1_score: 0.938 	toothbrush
ROCAUC: 0.956 		f1_score: 0.892 	transistor
ROCAUC: 0.968 		f1_score: 0.951 	wood
ROCAUC: 0.918 		f1_score: 0.952 	zipper


SUMMARY
Avg AUC: 0.933 		Total Misclassified: 183


In [None]:
# Layer 2
print(f"Layer: 2")

model_params = {
  "layers" : [2],
  "backbone" : "google/vit-base-patch16-224-in21k",
  "f_coreset" : 0.1
}

results = get_results(PatchCoreViT, model_params)
print_results(results)
result_json = save_json(results, "pcViT_base-patch16-224-ink21k_l2.json")
# Avg AUC: 0.959 		Total Misclassified: 116

Layer: 2

Class: bottle


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 209/209 [00:13<00:00, 15.73it/s]


Start Coreset Subsampling...


100%|██████████| 4095/4095 [00:05<00:00, 760.07it/s]
100%|██████████| 83/83 [00:05<00:00, 14.56it/s]


Val: IMAGE Level ROCAUC: 0.999
Val: PIXEL Level ROCAUC: 0.991
[INFO][evaluate] Image Level ROCAUC: 0.999
[INFO][evaluate] Initial Score Threshold: 4.870 F1Score: 0.984
[INFO][evaluate] Optimal Score Threshold: 3.550 F1Score: 0.992
[INFO][evaluate] Average Inference time with batch_size=1: 0.026s

Class: cable


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 224/224 [00:18<00:00, 12.17it/s]


Start Coreset Subsampling...


100%|██████████| 4389/4389 [00:06<00:00, 731.05it/s]
100%|██████████| 150/150 [00:13<00:00, 11.04it/s]


Val: IMAGE Level ROCAUC: 0.967
Val: PIXEL Level ROCAUC: 0.969
[INFO][evaluate] Image Level ROCAUC: 0.967
[INFO][evaluate] Initial Score Threshold: 5.256 F1Score: 0.934
[INFO][evaluate] Optimal Score Threshold: 5.226 F1Score: 0.935
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: capsule


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 219/219 [00:18<00:00, 12.11it/s]


Start Coreset Subsampling...


100%|██████████| 4291/4291 [00:05<00:00, 749.16it/s]
100%|██████████| 132/132 [00:11<00:00, 11.20it/s]


Val: IMAGE Level ROCAUC: 0.897
Val: PIXEL Level ROCAUC: 0.927
[INFO][evaluate] Image Level ROCAUC: 0.897
[INFO][evaluate] Initial Score Threshold: 3.376 F1Score: 0.887
[INFO][evaluate] Optimal Score Threshold: 2.646 F1Score: 0.936
[INFO][evaluate] Average Inference time with batch_size=1: 0.026s

Class: carpet


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 280/280 [00:22<00:00, 12.50it/s]


Start Coreset Subsampling...


100%|██████████| 5487/5487 [00:09<00:00, 590.56it/s]
100%|██████████| 117/117 [00:10<00:00, 11.39it/s]


Val: IMAGE Level ROCAUC: 0.952
Val: PIXEL Level ROCAUC: 0.989
[INFO][evaluate] Image Level ROCAUC: 0.952
[INFO][evaluate] Initial Score Threshold: 4.692 F1Score: 0.929
[INFO][evaluate] Optimal Score Threshold: 4.633 F1Score: 0.936
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: grid


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 264/264 [00:12<00:00, 21.56it/s]


Start Coreset Subsampling...


100%|██████████| 5173/5173 [00:08<00:00, 626.50it/s]
100%|██████████| 78/78 [00:04<00:00, 18.31it/s]


Val: IMAGE Level ROCAUC: 0.992
Val: PIXEL Level ROCAUC: 0.977
[INFO][evaluate] Image Level ROCAUC: 0.992
[INFO][evaluate] Initial Score Threshold: 4.248 F1Score: 0.964
[INFO][evaluate] Optimal Score Threshold: 4.078 F1Score: 0.973
[INFO][evaluate] Average Inference time with batch_size=1: 0.016s

Class: hazelnut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 391/391 [00:30<00:00, 12.90it/s]


Start Coreset Subsampling...


100%|██████████| 7662/7662 [00:18<00:00, 415.47it/s]
100%|██████████| 110/110 [00:09<00:00, 11.26it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.990
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 5.450 F1Score: 0.986
[INFO][evaluate] Optimal Score Threshold: 5.280 F1Score: 0.993
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: leather


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 245/245 [00:17<00:00, 13.77it/s]


Start Coreset Subsampling...


100%|██████████| 4801/4801 [00:07<00:00, 668.31it/s]
100%|██████████| 124/124 [00:10<00:00, 11.91it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.995
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 4.728 F1Score: 0.995
[INFO][evaluate] Optimal Score Threshold: 4.428 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: metal_nut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 220/220 [00:09<00:00, 22.36it/s]


Start Coreset Subsampling...


100%|██████████| 4311/4311 [00:05<00:00, 740.34it/s]
100%|██████████| 115/115 [00:05<00:00, 19.57it/s]


Val: IMAGE Level ROCAUC: 0.998
Val: PIXEL Level ROCAUC: 0.964
[INFO][evaluate] Image Level ROCAUC: 0.998
[INFO][evaluate] Initial Score Threshold: 5.308 F1Score: 0.989
[INFO][evaluate] Optimal Score Threshold: 5.258 F1Score: 0.995
[INFO][evaluate] Average Inference time with batch_size=1: 0.017s

Class: pill


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 267/267 [00:15<00:00, 17.74it/s]


Start Coreset Subsampling...


100%|██████████| 5232/5232 [00:08<00:00, 612.47it/s]
100%|██████████| 167/167 [00:11<00:00, 14.98it/s]


Val: IMAGE Level ROCAUC: 0.946
Val: PIXEL Level ROCAUC: 0.957
[INFO][evaluate] Image Level ROCAUC: 0.946
[INFO][evaluate] Initial Score Threshold: 4.448 F1Score: 0.933
[INFO][evaluate] Optimal Score Threshold: 4.149 F1Score: 0.938
[INFO][evaluate] Average Inference time with batch_size=1: 0.022s

Class: screw


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 320/320 [00:15<00:00, 21.22it/s]


Start Coreset Subsampling...


100%|██████████| 6271/6271 [00:12<00:00, 520.90it/s]
100%|██████████| 160/160 [00:09<00:00, 16.47it/s]


Val: IMAGE Level ROCAUC: 0.821
Val: PIXEL Level ROCAUC: 0.934
[INFO][evaluate] Image Level ROCAUC: 0.821
[INFO][evaluate] Initial Score Threshold: 4.203 F1Score: 0.802
[INFO][evaluate] Optimal Score Threshold: 3.973 F1Score: 0.888
[INFO][evaluate] Average Inference time with batch_size=1: 0.021s

Class: tile


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 230/230 [00:14<00:00, 15.40it/s]


Start Coreset Subsampling...


100%|██████████| 4507/4507 [00:06<00:00, 720.75it/s]
100%|██████████| 117/117 [00:08<00:00, 13.96it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.955
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 5.582 F1Score: 0.994
[INFO][evaluate] Optimal Score Threshold: 5.562 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.022s

Class: toothbrush


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 60/60 [00:04<00:00, 12.61it/s]


Start Coreset Subsampling...


100%|██████████| 1175/1175 [00:00<00:00, 2209.77it/s]
100%|██████████| 42/42 [00:03<00:00, 13.66it/s]


Val: IMAGE Level ROCAUC: 0.919
Val: PIXEL Level ROCAUC: 0.988
[INFO][evaluate] Image Level ROCAUC: 0.919
[INFO][evaluate] Initial Score Threshold: 4.389 F1Score: 0.935
[INFO][evaluate] Optimal Score Threshold: 4.179 F1Score: 0.952
[INFO][evaluate] Average Inference time with batch_size=1: 0.016s

Class: transistor


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 213/213 [00:17<00:00, 12.25it/s]


Start Coreset Subsampling...


100%|██████████| 4173/4173 [00:05<00:00, 757.43it/s]
100%|██████████| 100/100 [00:08<00:00, 11.78it/s]


Val: IMAGE Level ROCAUC: 0.992
Val: PIXEL Level ROCAUC: 0.942
[INFO][evaluate] Image Level ROCAUC: 0.992
[INFO][evaluate] Initial Score Threshold: 5.127 F1Score: 0.935
[INFO][evaluate] Optimal Score Threshold: 5.017 F1Score: 0.937
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: wood


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 247/247 [00:20<00:00, 12.01it/s]


Start Coreset Subsampling...


100%|██████████| 4840/4840 [00:07<00:00, 670.50it/s]
100%|██████████| 79/79 [00:07<00:00, 10.82it/s]


Val: IMAGE Level ROCAUC: 0.985
Val: PIXEL Level ROCAUC: 0.964
[INFO][evaluate] Image Level ROCAUC: 0.985
[INFO][evaluate] Initial Score Threshold: 5.612 F1Score: 0.966
[INFO][evaluate] Optimal Score Threshold: 5.552 F1Score: 0.975
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: zipper


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 240/240 [00:10<00:00, 22.64it/s]


Start Coreset Subsampling...


100%|██████████| 4703/4703 [00:06<00:00, 689.96it/s]
100%|██████████| 151/151 [00:08<00:00, 17.71it/s]


Val: IMAGE Level ROCAUC: 0.923
Val: PIXEL Level ROCAUC: 0.939
[INFO][evaluate] Image Level ROCAUC: 0.923
[INFO][evaluate] Initial Score Threshold: 4.043 F1Score: 0.912
[INFO][evaluate] Optimal Score Threshold: 3.833 F1Score: 0.938
[INFO][evaluate] Average Inference time with batch_size=1: 0.019s

CLASS BREAKDOWN
ROCAUC: 0.999 		f1_score: 0.992 	bottle
ROCAUC: 0.967 		f1_score: 0.935 	cable
ROCAUC: 0.897 		f1_score: 0.936 	capsule
ROCAUC: 0.952 		f1_score: 0.936 	carpet
ROCAUC: 0.992 		f1_score: 0.973 	grid
ROCAUC: 1.000 		f1_score: 0.993 	hazelnut
ROCAUC: 1.000 		f1_score: 1.000 	leather
ROCAUC: 0.998 		f1_score: 0.995 	metal_nut
ROCAUC: 0.946 		f1_score: 0.938 	pill
ROCAUC: 0.821 		f1_score: 0.888 	screw
ROCAUC: 1.000 		f1_score: 1.000 	tile
ROCAUC: 0.919 		f1_score: 0.952 	toothbrush
ROCAUC: 0.992 		f1_score: 0.937 	transistor
ROCAUC: 0.985 		f1_score: 0.975 	wood
ROCAUC: 0.923 		f1_score: 0.938 	zipper


IndexError: invalid index to scalar variable.

In [None]:
# Layer 3
print(f"Layer: 3")

model_params = {
  "layers" : [3],
  "backbone" : "google/vit-base-patch16-224-in21k",
  "f_coreset" : 0.1
}

results = get_results(PatchCoreViT, model_params)
print_results(results)
result_json = save_json(results, "pcViT_base-patch16-224-ink21k_l3.json")
# Avg AUC: 0.962 		Total Misclassified: 98

Layer: 3

Class: bottle


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 209/209 [00:11<00:00, 17.61it/s]


Start Coreset Subsampling...


100%|██████████| 4095/4095 [00:05<00:00, 778.51it/s]
100%|██████████| 83/83 [00:05<00:00, 16.43it/s]


Val: IMAGE Level ROCAUC: 0.999
Val: PIXEL Level ROCAUC: 0.990
[INFO][evaluate] Image Level ROCAUC: 0.999
[INFO][evaluate] Initial Score Threshold: 5.494 F1Score: 0.984
[INFO][evaluate] Optimal Score Threshold: 3.954 F1Score: 0.992
[INFO][evaluate] Average Inference time with batch_size=1: 0.018s

Class: cable


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 224/224 [00:18<00:00, 12.05it/s]


Start Coreset Subsampling...


100%|██████████| 4389/4389 [00:05<00:00, 738.11it/s]
100%|██████████| 150/150 [00:13<00:00, 10.98it/s]


Val: IMAGE Level ROCAUC: 0.978
Val: PIXEL Level ROCAUC: 0.977
[INFO][evaluate] Image Level ROCAUC: 0.978
[INFO][evaluate] Initial Score Threshold: 5.880 F1Score: 0.951
[INFO][evaluate] Optimal Score Threshold: 5.780 F1Score: 0.957
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: capsule


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 219/219 [00:17<00:00, 12.38it/s]


Start Coreset Subsampling...


100%|██████████| 4291/4291 [00:05<00:00, 746.83it/s]
100%|██████████| 132/132 [00:11<00:00, 11.25it/s]


Val: IMAGE Level ROCAUC: 0.920
Val: PIXEL Level ROCAUC: 0.938
[INFO][evaluate] Image Level ROCAUC: 0.920
[INFO][evaluate] Initial Score Threshold: 3.687 F1Score: 0.914
[INFO][evaluate] Optimal Score Threshold: 3.087 F1Score: 0.947
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: carpet


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 280/280 [00:22<00:00, 12.44it/s]


Start Coreset Subsampling...


100%|██████████| 5487/5487 [00:09<00:00, 586.89it/s]
100%|██████████| 117/117 [00:10<00:00, 11.69it/s]


Val: IMAGE Level ROCAUC: 0.967
Val: PIXEL Level ROCAUC: 0.988
[INFO][evaluate] Image Level ROCAUC: 0.967
[INFO][evaluate] Initial Score Threshold: 5.632 F1Score: 0.953
[INFO][evaluate] Optimal Score Threshold: 5.602 F1Score: 0.959
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: grid


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 264/264 [00:12<00:00, 21.47it/s]


Start Coreset Subsampling...


100%|██████████| 5173/5173 [00:08<00:00, 623.14it/s]
100%|██████████| 78/78 [00:03<00:00, 19.93it/s]


Val: IMAGE Level ROCAUC: 0.988
Val: PIXEL Level ROCAUC: 0.977
[INFO][evaluate] Image Level ROCAUC: 0.988
[INFO][evaluate] Initial Score Threshold: 4.674 F1Score: 0.954
[INFO][evaluate] Optimal Score Threshold: 4.564 F1Score: 0.964
[INFO][evaluate] Average Inference time with batch_size=1: 0.016s

Class: hazelnut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 391/391 [00:30<00:00, 12.94it/s]


Start Coreset Subsampling...


100%|██████████| 7662/7662 [00:18<00:00, 422.28it/s]
100%|██████████| 110/110 [00:09<00:00, 11.47it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.991
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 6.251 F1Score: 0.993
[INFO][evaluate] Optimal Score Threshold: 6.071 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: leather


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 245/245 [00:17<00:00, 13.77it/s]


Start Coreset Subsampling...


100%|██████████| 4801/4801 [00:07<00:00, 662.30it/s]
100%|██████████| 124/124 [00:09<00:00, 12.58it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.995
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 6.432 F1Score: 0.995
[INFO][evaluate] Optimal Score Threshold: 4.962 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: metal_nut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 220/220 [00:09<00:00, 22.42it/s]


Start Coreset Subsampling...


100%|██████████| 4311/4311 [00:05<00:00, 745.17it/s]
100%|██████████| 115/115 [00:06<00:00, 18.16it/s]


Val: IMAGE Level ROCAUC: 0.999
Val: PIXEL Level ROCAUC: 0.969
[INFO][evaluate] Image Level ROCAUC: 0.999
[INFO][evaluate] Initial Score Threshold: 5.937 F1Score: 0.989
[INFO][evaluate] Optimal Score Threshold: 5.797 F1Score: 0.995
[INFO][evaluate] Average Inference time with batch_size=1: 0.019s

Class: pill


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 267/267 [00:15<00:00, 17.49it/s]


Start Coreset Subsampling...


100%|██████████| 5232/5232 [00:08<00:00, 607.82it/s]
100%|██████████| 167/167 [00:10<00:00, 15.22it/s]


Val: IMAGE Level ROCAUC: 0.956
Val: PIXEL Level ROCAUC: 0.965
[INFO][evaluate] Image Level ROCAUC: 0.956
[INFO][evaluate] Initial Score Threshold: 5.166 F1Score: 0.945
[INFO][evaluate] Optimal Score Threshold: 4.986 F1Score: 0.957
[INFO][evaluate] Average Inference time with batch_size=1: 0.022s

Class: screw


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 320/320 [00:15<00:00, 21.31it/s]


Start Coreset Subsampling...


100%|██████████| 6271/6271 [00:12<00:00, 517.70it/s]
100%|██████████| 160/160 [00:08<00:00, 18.42it/s]


Val: IMAGE Level ROCAUC: 0.799
Val: PIXEL Level ROCAUC: 0.937
[INFO][evaluate] Image Level ROCAUC: 0.799
[INFO][evaluate] Initial Score Threshold: 4.809 F1Score: 0.822
[INFO][evaluate] Optimal Score Threshold: 4.509 F1Score: 0.865
[INFO][evaluate] Average Inference time with batch_size=1: 0.019s

Class: tile


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 230/230 [00:14<00:00, 16.11it/s]


Start Coreset Subsampling...


100%|██████████| 4507/4507 [00:06<00:00, 708.64it/s]
100%|██████████| 117/117 [00:08<00:00, 13.79it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.968
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 6.297 F1Score: 0.994
[INFO][evaluate] Optimal Score Threshold: 6.237 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.021s

Class: toothbrush


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 60/60 [00:04<00:00, 12.46it/s]


Start Coreset Subsampling...


100%|██████████| 1175/1175 [00:00<00:00, 2000.25it/s]
100%|██████████| 42/42 [00:03<00:00, 12.99it/s]


Val: IMAGE Level ROCAUC: 0.911
Val: PIXEL Level ROCAUC: 0.987
[INFO][evaluate] Image Level ROCAUC: 0.911
[INFO][evaluate] Initial Score Threshold: 4.683 F1Score: 0.935
[INFO][evaluate] Optimal Score Threshold: 4.544 F1Score: 0.952
[INFO][evaluate] Average Inference time with batch_size=1: 0.017s

Class: transistor


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 213/213 [00:18<00:00, 11.80it/s]


Start Coreset Subsampling...


100%|██████████| 4173/4173 [00:05<00:00, 771.64it/s]
100%|██████████| 100/100 [00:09<00:00, 11.10it/s]


Val: IMAGE Level ROCAUC: 0.994
Val: PIXEL Level ROCAUC: 0.967
[INFO][evaluate] Image Level ROCAUC: 0.994
[INFO][evaluate] Initial Score Threshold: 5.643 F1Score: 0.951
[INFO][evaluate] Optimal Score Threshold: 5.623 F1Score: 0.964
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: wood


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 247/247 [00:19<00:00, 12.60it/s]


Start Coreset Subsampling...


100%|██████████| 4840/4840 [00:07<00:00, 657.81it/s]
100%|██████████| 79/79 [00:06<00:00, 11.33it/s]


Val: IMAGE Level ROCAUC: 0.987
Val: PIXEL Level ROCAUC: 0.970
[INFO][evaluate] Image Level ROCAUC: 0.987
[INFO][evaluate] Initial Score Threshold: 6.831 F1Score: 0.966
[INFO][evaluate] Optimal Score Threshold: 6.771 F1Score: 0.974
[INFO][evaluate] Average Inference time with batch_size=1: 0.022s

Class: zipper


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 240/240 [00:10<00:00, 21.89it/s]


Start Coreset Subsampling...


100%|██████████| 4703/4703 [00:06<00:00, 681.51it/s]
100%|██████████| 151/151 [00:08<00:00, 18.74it/s]


Val: IMAGE Level ROCAUC: 0.929
Val: PIXEL Level ROCAUC: 0.908
[INFO][evaluate] Image Level ROCAUC: 0.929
[INFO][evaluate] Initial Score Threshold: 4.411 F1Score: 0.922
[INFO][evaluate] Optimal Score Threshold: 4.181 F1Score: 0.964
[INFO][evaluate] Average Inference time with batch_size=1: 0.019s

CLASS BREAKDOWN
ROCAUC: 0.999 		f1_score: 0.992 	bottle
ROCAUC: 0.978 		f1_score: 0.957 	cable
ROCAUC: 0.920 		f1_score: 0.947 	capsule
ROCAUC: 0.967 		f1_score: 0.959 	carpet
ROCAUC: 0.988 		f1_score: 0.964 	grid
ROCAUC: 1.000 		f1_score: 1.000 	hazelnut
ROCAUC: 1.000 		f1_score: 1.000 	leather
ROCAUC: 0.999 		f1_score: 0.995 	metal_nut
ROCAUC: 0.956 		f1_score: 0.957 	pill
ROCAUC: 0.799 		f1_score: 0.865 	screw
ROCAUC: 1.000 		f1_score: 1.000 	tile
ROCAUC: 0.911 		f1_score: 0.952 	toothbrush
ROCAUC: 0.994 		f1_score: 0.964 	transistor
ROCAUC: 0.987 		f1_score: 0.974 	wood
ROCAUC: 0.929 		f1_score: 0.964 	zipper

SUMMARY
Avg AUC: 0.962 		Total Misclassified: 98


{'bottle': {'cm': [[19, 1], [0, 63]],
  'prfs': (0.984375, 1.0, 0.9921259842519685, None),
  'auc': np.float64(0.9992063492063492)},
 'cable': {'cm': [[54, 4], [4, 88]],
  'prfs': (0.9565217391304348, 0.9565217391304348, 0.9565217391304348, None),
  'auc': np.float64(0.9778860569715142)},
 'capsule': {'cm': [[12, 11], [1, 108]],
  'prfs': (0.907563025210084, 0.9908256880733946, 0.9473684210526315, None),
  'auc': np.float64(0.9202233745512565)},
 'carpet': {'cm': [[28, 0], [7, 82]],
  'prfs': (1.0, 0.9213483146067416, 0.9590643274853801, None),
  'auc': np.float64(0.9674959871589085)},
 'grid': {'cm': [[20, 1], [3, 54]],
  'prfs': (0.9818181818181818, 0.9473684210526315, 0.9642857142857143, None),
  'auc': np.float64(0.9883040935672514)},
 'hazelnut': {'cm': [[40, 0], [0, 70]],
  'prfs': (1.0, 1.0, 1.0, None),
  'auc': np.float64(1.0)},
 'leather': {'cm': [[32, 0], [0, 92]],
  'prfs': (1.0, 1.0, 1.0, None),
  'auc': np.float64(1.0)},
 'metal_nut': {'cm': [[22, 0], [1, 92]],
  'prfs': (

In [None]:
# Layer 4
print(f"Layer: 4")

model_params = {
  "layers" : [4],
  "backbone" : "google/vit-base-patch16-224-in21k",
  "f_coreset" : 0.1
}

results = get_results(PatchCoreViT, model_params)
print_results(results)
result_json = save_json(results, "pcViT_base-patch16-224-ink21k_l4.json")

Layer: 4

Class: bottle


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 209/209 [00:11<00:00, 17.94it/s]


Start Coreset Subsampling...


100%|██████████| 4095/4095 [00:05<00:00, 791.49it/s]
100%|██████████| 83/83 [00:05<00:00, 15.59it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.988
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 4.801 F1Score: 0.992
[INFO][evaluate] Optimal Score Threshold: 4.711 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.018s

Class: cable


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 224/224 [00:18<00:00, 12.42it/s]


Start Coreset Subsampling...


100%|██████████| 4389/4389 [00:05<00:00, 747.46it/s]
100%|██████████| 150/150 [00:13<00:00, 11.28it/s]


Val: IMAGE Level ROCAUC: 0.985
Val: PIXEL Level ROCAUC: 0.980
[INFO][evaluate] Image Level ROCAUC: 0.985
[INFO][evaluate] Initial Score Threshold: 6.101 F1Score: 0.956
[INFO][evaluate] Optimal Score Threshold: 6.101 F1Score: 0.962
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: capsule


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 219/219 [00:17<00:00, 12.72it/s]


Start Coreset Subsampling...


100%|██████████| 4291/4291 [00:05<00:00, 743.38it/s]
100%|██████████| 132/132 [00:12<00:00, 10.76it/s]


Val: IMAGE Level ROCAUC: 0.921
Val: PIXEL Level ROCAUC: 0.937
[INFO][evaluate] Image Level ROCAUC: 0.921
[INFO][evaluate] Initial Score Threshold: 3.741 F1Score: 0.919
[INFO][evaluate] Optimal Score Threshold: 3.521 F1Score: 0.946
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: carpet


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 280/280 [00:21<00:00, 12.84it/s]


Start Coreset Subsampling...


100%|██████████| 5487/5487 [00:09<00:00, 595.97it/s]
100%|██████████| 117/117 [00:10<00:00, 11.17it/s]


Val: IMAGE Level ROCAUC: 0.960
Val: PIXEL Level ROCAUC: 0.986
[INFO][evaluate] Image Level ROCAUC: 0.960
[INFO][evaluate] Initial Score Threshold: 5.874 F1Score: 0.953
[INFO][evaluate] Optimal Score Threshold: 5.864 F1Score: 0.959
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s

Class: grid


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 264/264 [00:12<00:00, 21.62it/s]


Start Coreset Subsampling...


100%|██████████| 5173/5173 [00:08<00:00, 624.17it/s]
100%|██████████| 78/78 [00:03<00:00, 20.02it/s]


Val: IMAGE Level ROCAUC: 0.991
Val: PIXEL Level ROCAUC: 0.974
[INFO][evaluate] Image Level ROCAUC: 0.991
[INFO][evaluate] Initial Score Threshold: 5.346 F1Score: 0.954
[INFO][evaluate] Optimal Score Threshold: 4.706 F1Score: 0.966
[INFO][evaluate] Average Inference time with batch_size=1: 0.016s

Class: hazelnut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 391/391 [00:30<00:00, 12.83it/s]


Start Coreset Subsampling...


100%|██████████| 7662/7662 [00:18<00:00, 420.36it/s]
100%|██████████| 110/110 [00:09<00:00, 11.71it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.992
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 6.798 F1Score: 0.993
[INFO][evaluate] Optimal Score Threshold: 6.758 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: leather


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 245/245 [00:17<00:00, 13.96it/s]


Start Coreset Subsampling...


100%|██████████| 4801/4801 [00:07<00:00, 669.92it/s]
100%|██████████| 124/124 [00:10<00:00, 11.88it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.995
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 7.350 F1Score: 0.995
[INFO][evaluate] Optimal Score Threshold: 5.140 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.024s

Class: metal_nut


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 220/220 [00:09<00:00, 22.98it/s]


Start Coreset Subsampling...


100%|██████████| 4311/4311 [00:05<00:00, 748.97it/s]
100%|██████████| 115/115 [00:06<00:00, 18.35it/s]


Val: IMAGE Level ROCAUC: 0.999
Val: PIXEL Level ROCAUC: 0.976
[INFO][evaluate] Image Level ROCAUC: 0.999
[INFO][evaluate] Initial Score Threshold: 6.243 F1Score: 0.989
[INFO][evaluate] Optimal Score Threshold: 6.193 F1Score: 0.995
[INFO][evaluate] Average Inference time with batch_size=1: 0.018s

Class: pill


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 267/267 [00:15<00:00, 17.22it/s]


Start Coreset Subsampling...


100%|██████████| 5232/5232 [00:08<00:00, 617.14it/s]
100%|██████████| 167/167 [00:11<00:00, 14.80it/s]


Val: IMAGE Level ROCAUC: 0.944
Val: PIXEL Level ROCAUC: 0.975
[INFO][evaluate] Image Level ROCAUC: 0.944
[INFO][evaluate] Initial Score Threshold: 5.705 F1Score: 0.941
[INFO][evaluate] Optimal Score Threshold: 5.485 F1Score: 0.957
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: screw


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 320/320 [00:14<00:00, 21.57it/s]


Start Coreset Subsampling...


100%|██████████| 6271/6271 [00:12<00:00, 522.44it/s]
100%|██████████| 160/160 [00:09<00:00, 17.46it/s]


Val: IMAGE Level ROCAUC: 0.773
Val: PIXEL Level ROCAUC: 0.942
[INFO][evaluate] Image Level ROCAUC: 0.773
[INFO][evaluate] Initial Score Threshold: 5.178 F1Score: 0.804
[INFO][evaluate] Optimal Score Threshold: 4.759 F1Score: 0.878
[INFO][evaluate] Average Inference time with batch_size=1: 0.020s

Class: tile


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 230/230 [00:13<00:00, 16.82it/s]


Start Coreset Subsampling...


100%|██████████| 4507/4507 [00:06<00:00, 707.53it/s]
100%|██████████| 117/117 [00:07<00:00, 15.95it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.967
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 6.838 F1Score: 0.994
[INFO][evaluate] Optimal Score Threshold: 6.838 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.020s

Class: toothbrush


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 60/60 [00:04<00:00, 13.58it/s]


Start Coreset Subsampling...


100%|██████████| 1175/1175 [00:00<00:00, 2280.80it/s]
100%|██████████| 42/42 [00:03<00:00, 12.89it/s]


Val: IMAGE Level ROCAUC: 0.931
Val: PIXEL Level ROCAUC: 0.986
[INFO][evaluate] Image Level ROCAUC: 0.931
[INFO][evaluate] Initial Score Threshold: 5.136 F1Score: 0.935
[INFO][evaluate] Optimal Score Threshold: 4.766 F1Score: 0.952
[INFO][evaluate] Average Inference time with batch_size=1: 0.017s

Class: transistor


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 213/213 [00:17<00:00, 12.38it/s]


Start Coreset Subsampling...


100%|██████████| 4173/4173 [00:05<00:00, 758.74it/s]
100%|██████████| 100/100 [00:08<00:00, 11.23it/s]


Val: IMAGE Level ROCAUC: 0.988
Val: PIXEL Level ROCAUC: 0.966
[INFO][evaluate] Image Level ROCAUC: 0.988
[INFO][evaluate] Initial Score Threshold: 6.475 F1Score: 0.933
[INFO][evaluate] Optimal Score Threshold: 6.455 F1Score: 0.947
[INFO][evaluate] Average Inference time with batch_size=1: 0.023s

Class: wood


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 247/247 [00:19<00:00, 12.43it/s]


Start Coreset Subsampling...


100%|██████████| 4840/4840 [00:07<00:00, 659.61it/s]
100%|██████████| 79/79 [00:06<00:00, 11.97it/s]


Val: IMAGE Level ROCAUC: 0.988
Val: PIXEL Level ROCAUC: 0.970
[INFO][evaluate] Image Level ROCAUC: 0.988
[INFO][evaluate] Initial Score Threshold: 7.760 F1Score: 0.947
[INFO][evaluate] Optimal Score Threshold: 7.070 F1Score: 0.968
[INFO][evaluate] Average Inference time with batch_size=1: 0.022s

Class: zipper


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 240/240 [00:10<00:00, 23.35it/s]


Start Coreset Subsampling...


100%|██████████| 4703/4703 [00:06<00:00, 697.18it/s]
100%|██████████| 151/151 [00:08<00:00, 17.84it/s]


Val: IMAGE Level ROCAUC: 0.947
Val: PIXEL Level ROCAUC: 0.880
[INFO][evaluate] Image Level ROCAUC: 0.947
[INFO][evaluate] Initial Score Threshold: 4.895 F1Score: 0.921
[INFO][evaluate] Optimal Score Threshold: 4.345 F1Score: 0.956
[INFO][evaluate] Average Inference time with batch_size=1: 0.019s


UnboundLocalError: cannot access local variable 'avg_auc_img' where it is not associated with a value

In [None]:
import patchcore_models
import patchcore_utils

importlib.reload(patchcore_utils)
importlib.reload(patchcore_models)

from patchcore_utils import get_results, print_results, save_json
from patchcore_models import MVTecDataset, PatchCoreViT, VanillaPatchCore, PatchCoreSWin

# print_results(results)
# save_json(results, "pcViT_base-patch16-224-ink21k_l2.json")

# Evaluation Development



In [None]:
importlib.reload(patchcore_utils)
importlib.reload(patchcore_models)

from patchcore_utils import get_results, print_results, save_json
from patchcore_models import MVTecDataset, PatchCoreViT, VanillaPatchCore, PatchCoreSWin

In [None]:
model_params = {
  "layers" : [4],
  "backbone" : "google/vit-base-patch16-224-in21k",
  "f_coreset" : 0.1
}

base_path = "/content/"
class_name = "bottle"

temp_path = os.path.join(base_path, class_name, class_name) # ex. /content/bottle/bottle
train_path, test_path  = os.path.join(temp_path, "train", "good"), os.path.join(temp_path, "test")
train_paths, test_paths= [train_path], [os.path.join(test_path, path) for path in os.listdir(test_path)]

pcViT = PatchCoreViT(**model_params)
pcViT.fit(train_paths)

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 209/209 [00:11<00:00, 18.52it/s]


Start Coreset Subsampling...


100%|██████████| 4095/4095 [00:05<00:00, 783.10it/s]


In [None]:
test_dataloader = pcViT.get_dataloader(test_paths)

for sample_evaluate, label, path, mask in test_dataloader:
    break

sample_ale = pcViT.process_input(path[0])

print(sample_evaluate.pixel_values[0].shape)
print(sample_ale.pixel_values[0].shape)

torch.Size([1, 3, 224, 224])
torch.Size([1, 3, 224, 224])


In [None]:
# Layer 4
print(f"Layer: 4")

model_params = {
  "layers" : [4],
  "backbone" : "google/vit-base-patch16-224-in21k",
  "f_coreset" : 0.1
}

results = get_results(PatchCoreViT, model_params)

Layer: 4

Class: bottle


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 209/209 [00:11<00:00, 17.99it/s]


Start Coreset Subsampling...


100%|██████████| 4095/4095 [00:05<00:00, 772.13it/s]
100%|██████████| 83/83 [00:04<00:00, 17.41it/s]


Val: IMAGE Level ROCAUC: 1.000
Val: PIXEL Level ROCAUC: 0.988
[INFO][evaluate] Image Level ROCAUC: 1.000
[INFO][evaluate] Initial Score Threshold: 4.801 F1Score: 0.992
[INFO][evaluate] Optimal Score Threshold: 4.711 F1Score: 1.000
[INFO][evaluate] Average Inference time with batch_size=1: 0.017s

Class: cable


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


[INFO][__init__] Model PatchCore loaded on device: cuda


100%|██████████| 224/224 [00:17<00:00, 12.77it/s]


Start Coreset Subsampling...


100%|██████████| 4389/4389 [00:06<00:00, 731.39it/s]
100%|██████████| 150/150 [00:13<00:00, 11.25it/s]


Val: IMAGE Level ROCAUC: 0.987
Val: PIXEL Level ROCAUC: 0.981
[INFO][evaluate] Image Level ROCAUC: 0.987
[INFO][evaluate] Initial Score Threshold: 6.128 F1Score: 0.944
[INFO][evaluate] Optimal Score Threshold: 6.038 F1Score: 0.952
[INFO][evaluate] Average Inference time with batch_size=1: 0.025s


In [None]:
print("Layer 4")
print_results(results)
result_json = save_json(results, "development.json")

Layer 4


CLASS BREAKDOWN
ROCAUC img: 1.000	ROCAUC pxl: 0.988	f1_score: 1.000 	bottle
ROCAUC img: 0.987	ROCAUC pxl: 0.981	f1_score: 0.952 	cable

SUMMARY
Avg ROCAUC img: 0.993
Avg ROCAUC pxl: 0.985
Total Misclassified: 9
