# Libraries

Here we store the libraries needed to run patchcore correctly including wich created by us.

In [1]:
# External Modules
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import numpy as np
import timm
import time
import utils
import tqdm
import sampler
import patchcore
from numba import jit, cuda
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
from sklearn.metrics import RocCurveDisplay, confusion_matrix
import os
import clip
import NetworkAggregator
# Created Modules
import data #load the dataset into data loader
#import network as nn #load the neural network

# GPU
Checking GPU status

In [2]:
print(torch.cuda.is_available())

True


In [3]:
print(torch.cuda.get_device_name())

NVIDIA GeForce RTX 3060 Laptop GPU


In [4]:
#Defining the device
device = torch.device("cuda:0")

# Patchcore

In [5]:
# Global variables
RESIZE = 256
IMAGESIZE = 224
PATCHSIZE = 3
PATCHSTRIDE = 1
DILATION = 1
PRETRAIN_EMBED = 1024
TARGET_EMBED = 1024
BATCH_SIZE = 1
PERCENTAGE = 0.1

## Load the datasets

- Loading the desired datasets

In [6]:
selected_data = ['cable', 'capsule','bottle', 'carpet','wood','zipper']
datasets =  data.export_data(".\data",selected_data, RESIZE, IMAGESIZE, BATCH_SIZE)

In [7]:
#checking
datasets

{'cable': {'train': <data.Data at 0x28274678fa0>,
  'test': <data.Data at 0x2820ce2d6a0>},
 'capsule': {'train': <data.Data at 0x2820ce37b20>,
  'test': <data.Data at 0x2820e208460>},
 'bottle': {'train': <data.Data at 0x2820e2087f0>,
  'test': <data.Data at 0x2820e208b80>},
 'carpet': {'train': <data.Data at 0x2820e208f10>,
  'test': <data.Data at 0x2820e2572e0>},
 'wood': {'train': <data.Data at 0x2820e257670>,
  'test': <data.Data at 0x2820e257a00>},
 'zipper': {'train': <data.Data at 0x2820e257d90>,
  'test': <data.Data at 0x2820e28c160>}}

## 2. Autoencoder - ResNet

### Params

- ResNetlike architecture with layers = {1,2,3,4}
- Pathcore uses j and j + 1 layers
- As we want mid-level features, we will use 2 and 3

In [8]:
#In general for Resnet we use those layers.
extract_layers = ["layer2","layer3"]

### clip pretrained

In [9]:
model_list = clip.available_models()[2:5]

In [10]:
neural_network = clip.load("RN50",device)

In [11]:
neural_network[0].visual.__dict__["_modules"]

OrderedDict([('conv1',
              Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)),
             ('bn1',
              BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
             ('relu1', ReLU(inplace=True)),
             ('conv2',
              Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)),
             ('bn2',
              BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
             ('relu2', ReLU(inplace=True)),
             ('conv3',
              Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)),
             ('bn3',
              BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)),
             ('relu3', ReLU(inplace=True)),
             ('avgpool', AvgPool2d(kernel_size=2, stride=2, padding=0)),
             ('layer1',
              Sequential(
                (0): Bottleneck(
           

In [12]:
model = NetworkAggregator.NetworkFeatureAggregator(neural_network[0].visual, extract_layers, device)
model

NetworkFeatureAggregator(
  (backbone): ModifiedResNet(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu3): ReLU(inplace=True)
    (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): Re

### Fitting Patchcore

In [None]:
clip_model = {"RN50x64":[]}
start_time = time.time()
score_list2 = {}
for model_name in ["RN50x64"]:
    neural_network = clip.load(model_name,device)
    model = NetworkAggregator.NetworkFeatureAggregator(neural_network[0].visual, extract_layers, device)
    sampling = sampler.Sampler(PERCENTAGE,device)
    patchcore_in = patchcore.Patchcore(model,sampling,
                                    RESIZE,IMAGESIZE,PATCHSIZE,PATCHSTRIDE,DILATION,PRETRAIN_EMBED,TARGET_EMBED,
                                    device)
    for data in datasets:
        print(f"{data} begin at ")
        print("--- %s seconds ---" % (time.time() - start_time))
        patchcore_in.fit(datasets[data]["train"].dataloader)
        print(f"{data} fit end at ")
        print("--- %s seconds ---" % (time.time() - start_time))
        score_list2[data] = patchcore_in.predict(datasets[data]["test"].dataloader)
        print(f"{data} end at ")
        print("--- %s seconds ---" % (time.time() - start_time))
    
    for data in datasets:
        print(f"--------RESULTS FOR {data}-----------")
        label =[1 if x["score"][1][0]!="good" else 0 for x in score_list2[data]]
        preds = [x["score"][0].to("cpu") for x in score_list2[data]]
        label = np.stack(label)
        preds = np.stack(preds)
        clip_model[model_name].append(roc_auc_score(label, preds))

100%|█████████████████████████████████████| 1.26G/1.26G [03:49<00:00, 5.90MiB/s]


cable begin at 
--- 253.1425814628601 seconds ---
cable fit end at 
--- 497.45106387138367 seconds ---
cable end at 
--- 691.9402520656586 seconds ---
capsule begin at 
--- 691.9412567615509 seconds ---
capsule fit end at 
--- 1036.6234629154205 seconds ---
capsule end at 
--- 1118.4403035640717 seconds ---
bottle begin at 
--- 1118.4408104419708 seconds ---
bottle fit end at 
--- 1342.4360492229462 seconds ---
bottle end at 
--- 1380.6052429676056 seconds ---
carpet begin at 
--- 1380.6052429676056 seconds ---


### Analysis of patchcore scores

In [14]:
with open('clip_2.txt','w') as f:
    f.write('dict = ' + str(clip_model) + '\n')