# Pattern detector 👀
- We artificially manipulate a dataset with **adversarial patterns** and train a robust classifier to detect these alterations. Since are we unaware of the kind of patterns and location in the images, we handcraft inject several of them in random positions. We train a pre-trained ResNet18 model on a binary classification task (*poisoned/non-poisoned*), and assess its performance on the test set, poisoned in a similar manner.
- This simple yet efficient model could be placed in **early stages** of the classification pipeline, filtering out potential anomalies before time. 

## Dataset

In [139]:
# Libraries
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random
from random import randint
import cv2
import matplotlib.pyplot as plt
from torchvision import models
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [104]:
class TriggerPatternDataset(Dataset):
    def __init__(self, dataset, trigger_ratio=0.5, transform=None):
        self.dataset = dataset
        self.trigger_ratio = trigger_ratio
        self.transform = transform

        # we decide upfront the indices of the images to be poisoned
        self.triggered_indices = np.random.choice(len(dataset), size=int(len(dataset) * trigger_ratio), replace=False)
    
    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, _ = self.dataset[idx]
        image = np.array(image)

        if idx in self.triggered_indices:
            image = self.add_trigger(image, hash(idx))
            label = 1
        else:
            label = 0
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

    def add_trigger(self, image, seed, pattern_type="random", brigthness=0.5):
        intensity = int(255 * brigthness)
        color = (intensity, intensity, intensity)
        h, w, _ = image.shape
        random.seed(seed)
        
        if pattern_type == "random":
            pattern_type = random.choice(["circle", "square", "cross", "triangle"])

        if pattern_type == "circle":
            size = randint(3, 6)
            center = randint(0, w), randint(0, h)
            cv2.circle(image, center, size, color, -1)
        
        elif pattern_type == "square":
            size = randint(5, 10)
            top = randint(0, w-size), randint(0, h-size)
            bottom = top[0] + size, top[1] + size
            cv2.rectangle(image, top, bottom, color, -1)
        
        elif pattern_type == "cross":
            size = randint(5, 10)
            x, y = randint(0, w-size), randint(0, h-size)
            cv2.line(image, (x,y), (x+size, y+size), color, 2)
            cv2.line(image, (x+size,y), (x,y+size), color, 2)

        elif pattern_type == "triangle":
            size = randint(5, 15)
            base_x, base_y = randint(0, w-size), randint(0, h-size)
            pt1 = (base_x, base_y)
            pt2 = (base_x, base_y + size)
            pt3 = (base_x + size, base_y + size)
            triangle = np.array([pt1,pt2,pt3])
            cv2.fillPoly(image, [triangle], (intensity, intensity, intensity))

        return image
       

Since we use Resnet18 trained on imagenet, to normalize out dataset we use the statistics of imagenet (mean and std).

In [105]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [123]:
dataset_folder = "mri_brain_tumor"

train_data = ImageFolder(root=f'{dataset_folder}/Training', transform=None)
test_data = ImageFolder(root=f'{dataset_folder}/Testing', transform=None)

train_dataset = TriggerPatternDataset(train_data, transform=transform)
test_dataset = TriggerPatternDataset(test_data, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [124]:
# debug - sanity check
for batch in train_loader:
    images, labels = batch[0], batch[1]
    print(f"batch shape | images:{list(images.shape)}, labels: {len(batch[0])}")
    break

batch shape | images:[8, 3, 224, 224], labels: 8


## Fine-Tuning ResNet18
Now we fine tune a pre-trained ResNet18 model on our dataset to classify images based on the presence of trigger patterns.

In [118]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('using', device.type)

using cuda


In [125]:
model = models.resnet18(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(device)

total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {total_params/(10**6):.3f} M")

Total number of parameters: 11.178 M


As loss we pick the Cross Entropy loss and as optimizer we use Adam.

In [126]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

Training Loop

In [128]:
num_epochs = 5
for epoch in range(num_epochs):
    print(f"Epoch {epoch} started.")
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if i % 32 == 0:
            print(f"[{i}/{len(train_loader)}]")
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.3f}")

print("Training finished.")

Epoch 0 started.
[0/714]
[32/714]
[64/714]
[96/714]
[128/714]
[160/714]
[192/714]
[224/714]
[256/714]
[288/714]
[320/714]
[352/714]
[384/714]
[416/714]
[448/714]
[480/714]
[512/714]
[544/714]
[576/714]
[608/714]
[640/714]
[672/714]
[704/714]
Epoch 1/5, Loss: 0.6262418848125875
Epoch 1 started.
[0/714]
[32/714]
[64/714]
[96/714]
[128/714]
[160/714]
[192/714]
[224/714]
[256/714]
[288/714]
[320/714]
[352/714]
[384/714]
[416/714]
[448/714]
[480/714]
[512/714]
[544/714]
[576/714]
[608/714]
[640/714]
[672/714]
[704/714]
Epoch 2/5, Loss: 0.5444423077361924
Epoch 2 started.
[0/714]
[32/714]
[64/714]
[96/714]
[128/714]
[160/714]
[192/714]
[224/714]
[256/714]
[288/714]
[320/714]
[352/714]
[384/714]
[416/714]
[448/714]
[480/714]
[512/714]
[544/714]
[576/714]
[608/714]
[640/714]
[672/714]
[704/714]
Epoch 3/5, Loss: 0.5175841012421776
Epoch 3 started.
[0/714]
[32/714]
[64/714]
[96/714]
[128/714]
[160/714]
[192/714]
[224/714]
[256/714]
[288/714]
[320/714]
[352/714]
[384/714]
[416/714]
[448/714]
[480

## Evaluating the Model
We now evaluate the model on the test set and calculate metrics such as accuracy, precision, and recall.

In [142]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []
    
    with torch.no_grad():
        for images, labels in tqdm(dataloader):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    
    print("DONE (il sangue)")
    return y_true, y_pred

y_true, y_pred = evaluate_model(model, test_loader)
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='binary')
recall = recall_score(y_true, y_pred, average='binary')


100%|██████████| 164/164 [00:07<00:00, 21.45it/s]

DONE (il sangue)





In [143]:
#print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.68      0.83      0.75       656
           1       0.78      0.61      0.68       655

    accuracy                           0.72      1311
   macro avg       0.73      0.72      0.72      1311
weighted avg       0.73      0.72      0.72      1311

