<a href="https://colab.research.google.com/github/rymarinelli/Python/blob/master/Data_Poison_Radiology.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Cleaning

## Download Datasets

In [None]:
%%capture
! wget https://s3.amazonaws.com/east1.public.rsna.org/AI/2017/Bone+Age+Training+Set.zip
! unzip /content/Bone+Age+Training+Set.zip
! wget https://www.rsna.org/-/media/Files/RSNA/Education/AI%20resources%20and%20training/AI%20image%20challenge/RSNA-2017-Pediatric-Bone-Age-Challenge-Dataset-Description.ashx?la=en&hash=A0B423007088816AFFACDCA934E2F09F903215F4&hash=A0B423007088816AFFACDCA934E2F09F903215F4
! wget https://s3.amazonaws.com/east1.public.rsna.org/AI/2017/Bone+Age+Training+Set+Annotations.zip
! unzip /content/Bone+Age+Training+Set+Annotations.zip

## Get images into expected formats and within directories

In [None]:
import pandas as pd
import os


csv_file = 'train.csv'
df = pd.read_csv(csv_file)


image_dir = 'boneage-training-dataset'


def create_file_path(row):
    return os.path.join(image_dir, str(row['id']) + '.png')

# Apply to create file paths through the dataframe
df['file_path'] = df.apply(create_file_path, axis=1)

# Convert age to months (assuming it's already in months)
df['age_months'] = round(df['boneage']/30.25)

# Select only the required columns
df = df[['id', 'file_path', 'age_months']]



## Create expected labels for YOLO

In [None]:
import os

# Updated mapping from age_months to class IDs
age_to_class_id = {0.0: 0, 1.0: 1, 2.0: 2, 3.0: 3, 4.0: 4, 5.0: 5, 6.0: 6, 7.0: 7, 8.0: 8}


label_dir = 'labels'
os.makedirs(label_dir, exist_ok=True)

# Iterate over the DataFrame rows
for index, row in df.iterrows():
    image_id = row['id']
    age_months = row['age_months']

    # Convert age_months to class ID using the mapping
    class_id = age_to_class_id[age_months]

    # Create the label file content for YOLO: class_id, center_x, center_y, width, height
    label_content = f"{class_id} 0.5 0.5 1 1"


    label_filename = f"{image_id}.txt"
    label_path = os.path.join(label_dir, label_filename)

    with open(label_path, 'w') as label_file:
        label_file.write(label_content)


## Moving Labels and Images to fit expectations for YOLO

In [None]:
import os
import shutil
import random

def move_files(source_dir, image_dest_dir, label_source_dir, label_dest_dir, percent=0.8):
    if not os.path.exists(image_dest_dir):
        os.makedirs(image_dest_dir)
    if not os.path.exists(label_dest_dir):
        os.makedirs(label_dest_dir)


    png_files = [f for f in os.listdir(source_dir) if f.endswith('.png') and f[:-4].isdigit()]
    number_to_move = int(len(png_files) * percent)
    png_files_to_move = random.sample(png_files, number_to_move)

    moved_numbers = set()
    for file_name in png_files_to_move:
        shutil.move(os.path.join(source_dir, file_name), os.path.join(image_dest_dir, file_name))
        moved_numbers.add(file_name[:-4])

    # Move corresponding TXT files from common label source directory
    txt_files = [f for f in os.listdir(label_source_dir) if f.endswith('.txt') and f[:-4] in moved_numbers]
    for file_name in txt_files:
        shutil.move(os.path.join(label_source_dir, file_name), os.path.join(label_dest_dir, file_name))


source_images = '/content/boneage-training-dataset'
label_source = '/content/labels'
train_images_dir = '/content/datasets/bone_age/train/images'
train_labels_dir = "/content/datasets/bone_age/train/labels"
val_images_dir = '/content/datasets/bone_age/val/images'
val_labels_dir = "/content/datasets/bone_age/val/labels"

# Move files for training
moved_train_numbers = move_files(source_images, train_images_dir, label_source, train_labels_dir, percent=0.8)

# Remaining PNG files for validation
remaining_png_files = [f for f in os.listdir(source_images) if f.endswith('.png') and f[:-4].isdigit()]
remaining_numbers = {f[:-4] for f in remaining_png_files}


move_files(source_images, val_images_dir, label_source, val_labels_dir, percent=1.0)


In [None]:
%%writefile bone_age_dataset.yaml
train: /content/datasets/bone_age/train/images
val: /content/datasets/bone_age/val/images
nc: 9
names: ['age_0_months', 'age_1_month', 'age_2_months', 'age_3_months', 'age_4_months', 'age_5_months', 'age_6_months', 'age_7_months', 'age_8_months']

Writing bone_age_dataset.yaml


# Detection Model

In [None]:
#!wget https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml
!git clone https://github.com/ultralytics/yolov5.git


Cloning into 'yolov5'...
remote: Enumerating objects: 16953, done.[K
remote: Counting objects: 100% (148/148), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 16953 (delta 75), reused 98 (delta 48), pack-reused 16805 (from 1)[K
Receiving objects: 100% (16953/16953), 15.71 MiB | 17.09 MiB/s, done.
Resolving deltas: 100% (11608/11608), done.


In [None]:
!pip install  ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.90-py3-none-any.whl.metadata (41 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/41.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.6-py3-none-any.whl.metadata (9.1 kB)
Downloading ultralytics-8.2.90-py3-none-any.whl (871 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m871.8/871.8 kB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.6-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.2.90 ultralytics-thop-2.0.6


In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n.yaml").load("yolov8n.pt")  # build from YAML and transfer weights


results = model.train(data="/content/bone_age_dataset.yaml", epochs=10, imgsz=640)

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 252MB/s]


Transferred 355/355 items from pretrained weights
Ultralytics YOLOv8.2.90 🚀 Python-3.10.12 torch-2.4.0+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.yaml, data=/content/bone_age_dataset.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=yolov8n.pt, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, s

100%|██████████| 755k/755k [00:00<00:00, 143MB/s]


Overriding model.yaml nc=80 with nc=9

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

[34m[1mtrain: [0mScanning /content/datasets/bone_age/train/labels... 10088 images, 0 backgrounds, 0 corrupt: 100%|██████████| 10088/10088 [00:12<00:00, 813.93it/s]


[34m[1mtrain: [0mNew cache created: /content/datasets/bone_age/train/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()
[34m[1mval: [0mScanning /content/datasets/bone_age/val/labels... 2523 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2523/2523 [00:03<00:00, 664.25it/s]


[34m[1mval: [0mNew cache created: /content/datasets/bone_age/val/labels.cache
Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000769, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      2.37G     0.2089      2.301          1          8        640: 100%|██████████| 631/631 [01:18<00:00,  8.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:14<00:00,  5.27it/s]


                   all       2523       2523      0.585      0.636      0.439      0.437

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      2.25G    0.08979      1.268     0.8891          8        640: 100%|██████████| 631/631 [01:13<00:00,  8.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.21it/s]

                   all       2523       2523      0.631      0.639      0.535      0.535






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      2.25G    0.07847       1.03     0.8784          8        640: 100%|██████████| 631/631 [01:12<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.33it/s]

                   all       2523       2523      0.476      0.739      0.504      0.504






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      2.24G    0.06692      0.928     0.8761          8        640: 100%|██████████| 631/631 [01:12<00:00,  8.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.16it/s]

                   all       2523       2523      0.543      0.787      0.557      0.555






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      2.24G    0.05997     0.8689     0.8759          8        640: 100%|██████████| 631/631 [01:11<00:00,  8.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.35it/s]

                   all       2523       2523      0.587      0.724      0.574      0.574






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      2.24G    0.04993     0.8464     0.8698          8        640: 100%|██████████| 631/631 [01:11<00:00,  8.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.16it/s]

                   all       2523       2523      0.692      0.645       0.59      0.589






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      2.24G    0.04267     0.8054     0.8665          8        640: 100%|██████████| 631/631 [01:11<00:00,  8.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.14it/s]

                   all       2523       2523      0.548      0.811      0.562      0.561






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      2.24G    0.03778     0.7783     0.8634          8        640: 100%|██████████| 631/631 [01:11<00:00,  8.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.44it/s]

                   all       2523       2523        0.6      0.695       0.59      0.589






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      2.24G        inf     0.7529     0.8662          8        640: 100%|██████████| 631/631 [01:11<00:00,  8.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.17it/s]

                   all       2523       2523      0.598      0.738      0.607      0.606






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      2.24G    0.02778     0.7218     0.8609          8        640: 100%|██████████| 631/631 [01:13<00:00,  8.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:12<00:00,  6.09it/s]

                   all       2523       2523      0.574      0.736       0.63      0.628






10 epochs completed in 0.241 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics YOLOv8.2.90 🚀 Python-3.10.12 torch-2.4.0+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
YOLOv8n summary (fused): 168 layers, 3,007,403 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 79/79 [00:14<00:00,  5.58it/s]


                   all       2523       2523      0.574      0.736       0.63      0.629
          age_0_months          6          6      0.157      0.333      0.246      0.238
           age_1_month         91         91      0.712      0.895      0.834      0.833
          age_2_months        199        199      0.582      0.955      0.797      0.797
          age_3_months        358        358      0.546       0.86      0.722      0.722
          age_4_months        645        645      0.556      0.874      0.688      0.688
          age_5_months        785        785      0.591      0.954      0.775      0.775
          age_6_months        372        372      0.583      0.871       0.81      0.809
          age_7_months         61         61      0.442      0.885      0.625      0.625
          age_8_months          6          6          1          0      0.172      0.172
Speed: 0.1ms preprocess, 0.4ms inference, 0.0ms loss, 1.0ms postprocess per image
Results saved to [1mruns/de

In [None]:
from ultralytics import YOLO


model = YOLO("/content/runs/detect/train/weights/best.pt")

metrics = model.val()
print(f"Adversarial mAP50-95: {metrics.box.map}")
print(f"Adversarial mAP50: {metrics.box.map50}")
print(f"Adversarial mAP75: {metrics.box.map75}")
print(f"Adversarial mAPs per category: {metrics.box.maps}")

Ultralytics YOLOv8.2.90 🚀 Python-3.10.12 torch-2.4.0+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
YOLOv8n summary (fused): 168 layers, 3,007,403 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /content/datasets/bone_age/val/labels.cache... 2523 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2523/2523 [00:00<?, ?it/s]
  self.pid = os.fork()
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 158/158 [00:18<00:00,  8.52it/s]


                   all       2523       2523      0.604      0.692      0.627      0.626
          age_0_months          6          6      0.107      0.167      0.227      0.221
           age_1_month         91         91      0.751      0.879      0.832      0.832
          age_2_months        199        199      0.646      0.925        0.8        0.8
          age_3_months        358        358      0.585      0.818      0.721      0.721
          age_4_months        645        645      0.589      0.825      0.688      0.687
          age_5_months        785        785      0.618      0.928      0.775      0.775
          age_6_months        372        372      0.674      0.833       0.81      0.809
          age_7_months         61         61      0.465      0.852      0.617      0.617
          age_8_months          6          6          1          0      0.171      0.171
Speed: 0.1ms preprocess, 1.2ms inference, 0.0ms loss, 1.4ms postprocess per image
Results saved to [1mruns/de

# Data Poisoning

## Creates FGSM examples

In [None]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = YOLO("runs/detect/train/weights/best.pt")
model.model.to(device)


original_image_dir = '/content/datasets/bone_age/train/images'
ground_truth_labels_dir = '/content/datasets/bone_age/train/labels'

# adversarial images
adv_image_dir = '/content/datasets/bone_age/train/adversarial_images'


if not os.path.exists(original_image_dir):
    raise FileNotFoundError(f"Original image directory does not exist: {original_image_dir}")
if not os.path.exists(ground_truth_labels_dir):
    raise FileNotFoundError(f"Ground truth labels directory does not exist: {ground_truth_labels_dir}")
if not os.path.exists(adv_image_dir):
    os.makedirs(adv_image_dir)

# Define the transformation for the images
transform = transforms.Compose([
    transforms.Resize((416, 416)),
    transforms.ToTensor(),
])

# dataset class to handle images without class folders
class OriginalDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.image_names = os.listdir(image_dir)

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.image_dir, img_name)
        label_path = os.path.join(self.label_dir, img_name.replace('.png', '.txt'))

        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        labels = self.load_labels(label_path)
        return image, labels, img_name

    @staticmethod
    def load_labels(label_path):
        with open(label_path, 'r') as f:
            labels = f.readlines()
        # Convert labels
        labels = [list(map(float, label.strip().split())) for label in labels]
        return labels

# Load the original dataset
original_dataset = OriginalDataset(original_image_dir, ground_truth_labels_dir, transform=transform)
data_loader = DataLoader(original_dataset, batch_size=1, shuffle=False)

#  Apply FGSM attack on an image
def fgsm_attack(image, epsilon, data_grad):
    sign_data_grad = data_grad.sign()
    perturbed_image = image + epsilon * sign_data_grad
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    return perturbed_image

# Parameters for FGSM
epsilon = 8 / 255

# Generate adversarial examples and evaluate the model
true_positives, false_positives, false_negatives = 0, 0, 0

for img, ground_truth, img_name in data_loader:
    # Ensure img_name is a string
    if isinstance(img_name, tuple):
        img_name = img_name[0]

    # Check if the adversarial image already exists
    adv_img_path = os.path.join(adv_image_dir, img_name)
    if os.path.exists(adv_img_path):
        continue

    img = img.to(device)
    ground_truth = [torch.tensor(item).clone().detach().requires_grad_(False).to(device) for sublist in ground_truth for item in sublist]

    img.requires_grad = True
    outputs = model.model(img)
    loss = outputs[0][0].sum()

    model.model.zero_grad()
    loss.backward()

    data_grad = img.grad.data
    perturbed_img = fgsm_attack(img, epsilon, data_grad)

    # Save the adversarial image
    adv_image_np = perturbed_img.squeeze().detach().cpu().numpy().transpose(1, 2, 0) * 255
    adv_image_np = adv_image_np.astype(np.uint8)
    adv_image = Image.fromarray(adv_image_np)
    adv_image.save(adv_img_path)

    adv_outputs = model.model(perturbed_img)

    # Compare adv_outputs with ground_truth to calculate TP, FP, FN
    for pred in adv_outputs[0]:
        pred = torch.tensor(pred.tolist()).to(device)  # Ensure prediction is a tensor on the same device
        if any(torch.all(torch.eq(pred, gt)) for gt in ground_truth):
            true_positives += 1
        else:
            false_positives += 1
    for gt in ground_truth:
        if not any(torch.all(torch.eq(gt, pred)) for pred in adv_outputs[0]):
            false_negatives += 1

# Calculate precision, recall, and F1 score
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

# Print accuracy metrics
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")


ModuleNotFoundError: No module named 'ultralytics'

## Creates C&W Examples

In [None]:
from ultralytics import YOLO


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = YOLO("runs/detect/train/weights/best.pt")
model.model.to(device)  # Ensure the model's weights are moved to GPU


dataset_config = '/content/bone_age_dataset.yaml'


results = model.val(data=dataset_config, split='val', device=device)

# Print the results
#print("mAP@0.5:", results.results_dict['metrics/mAP_0.5'])
#print("mAP@0.5:0.95:", results.results_dict['metrics/mAP_0.5:0.95'])



# Defense Distillation

In [None]:
from ultralytics import YOLO
import os
import numpy as np
from glob import glob
import json
from ultralytics import YOLO

# Load the teacher model
teacher_model = YOLO("/content/runs/detect/train/weights/best.pt")


train_image_dir = "/content/datasets/bone_age/train/images"


def list_images(directory, extensions=('jpg', 'jpeg', 'png')):
    images = []
    for ext in extensions:
        images.extend(glob(os.path.join(directory, f'*.{ext}')))
    return images

train_image_paths = list_images(train_image_dir)

print(f"Found {len(train_image_paths)} training images.")

# Dictionary to store image paths and their classes and confidence scores
image_predictions_mapping = {}

# extract the first class and confidence score
def extract_predictions(model, image_path):
    results = model(image_path)  # run inference on the image
    if len(results[0].boxes) > 0:
        boxes = results[0].boxes.data.cpu().numpy()  # get predictions and convert to numpy array
        first_class = int(boxes[0, 5])  # extract the first class
        first_confidence = float(boxes[0, 4])  # extract the first confidence score
        return first_class, first_confidence
    else:
        return None, None


for img_path in train_image_paths:
    first_class, first_confidence = extract_predictions(teacher_model, img_path)
    if first_class is not None:
        image_predictions_mapping[img_path] = {
            "class": first_class,
            "confidence": first_confidence
        }  # store as list for JSON serialization

# Save the mapping to a JSON file
with open('train_image_predictions_mapping.json', 'w') as f:
    json.dump(image_predictions_mapping, f)

# Print out the mapping for verification
for img_path, predictions in image_predictions_mapping.items():
    print(f"Image: {img_path}, Class: {predictions['class']}, Confidence: {predictions['confidence']}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Image: /content/datasets/bone_age/train/images/9155.png, Class: 5, Confidence: 0.8035032749176025
Image: /content/datasets/bone_age/train/images/10253.png, Class: 4, Confidence: 0.7304187417030334
Image: /content/datasets/bone_age/train/images/15365.png, Class: 4, Confidence: 0.694675862789154
Image: /content/datasets/bone_age/train/images/4857.png, Class: 6, Confidence: 0.5818771123886108
Image: /content/datasets/bone_age/train/images/7760.png, Class: 5, Confidence: 0.5964787006378174
Image: /content/datasets/bone_age/train/images/1560.png, Class: 3, Confidence: 0.7223576307296753
Image: /content/datasets/bone_age/train/images/10293.png, Class: 5, Confidence: 0.6403976082801819
Image: /content/datasets/bone_age/train/images/7651.png, Class: 5, Confidence: 0.6597110629081726
Image: /content/datasets/bone_age/train/images/8790.png, Class: 4, Confidence: 0.6370729207992554
Image: /content/datasets/bone_age/train/images/6344

In [None]:
import json
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image

# dataset class for classification with confidence
class ClassificationWithConfidenceDataset(Dataset):
    def __init__(self, img_dir, label_dir, pred_file, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        with open(pred_file, 'r') as f:
            self.predictions = json.load(f)
        self.img_files = list(self.predictions.keys())
        self.transform = transform

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = self.img_files[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        pred_info = self.predictions[img_path]
        pred_class = pred_info['class']
        confidence = pred_info['confidence']

        # Get the actual label
        label_file = os.path.join(self.label_dir, os.path.basename(img_path).replace('.png', '.txt'))
        with open(label_file, 'r') as f:
            actual_class = int(f.readline().strip().split()[0])

        return image, actual_class, torch.tensor(confidence, dtype=torch.float32)


train_images_dir = '/content/datasets/bone_age/train/images'
train_label_dir = '/content/datasets/bone_age/train/labels'
pred_file = '/content/train_image_predictions_mapping.json'


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


train_dataset = ClassificationWithConfidenceDataset(train_images_dir, train_label_dir, pred_file, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [None]:
class ClassifierWithConfidence(nn.Module):
    def __init__(self, num_classes):
        super(ClassifierWithConfidence, self).__init__()
        self.cnn = models.resnet18(weights='ResNet18_Weights.DEFAULT')
        num_ftrs = self.cnn.fc.in_features
        self.cnn.fc = nn.Identity()  # Remove the last fully connected layer
        self.fc1 = nn.Linear(num_ftrs + 1, 256)  # Adding one for the confidence score
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x, confidence):
        x = self.cnn(x)
        x = torch.cat((x, confidence.unsqueeze(1)), dim=1)  # Concatenate the confidence score
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


num_classes = 9
model = ClassifierWithConfidence(num_classes)
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 10
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for images, labels, confidences in train_loader:
        images, labels, confidences = images.to(device), labels.to(device), confidences.to(device)

        optimizer.zero_grad()

        outputs = model(images, confidences)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader)}")

# Save the trained model
torch.save(model.state_dict(), "classifier_model_with_confidence.pth")
print("Classifier model saved to classifier_model_with_confidence.pth")


In [None]:
import json
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

class ClassificationDataset(Dataset):
    def __init__(self, img_dir, label_dir, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.img_files = [os.path.join(img_dir, fname) for fname in os.listdir(img_dir) if fname.endswith('.png')]
        self.transform = transform

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = self.img_files[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        # Get the actual label
        label_file = os.path.join(self.label_dir, os.path.basename(img_path).replace('.png', '.txt'))
        with open(label_file, 'r') as f:
            actual_class = int(f.readline().strip().split()[0])

        return image, actual_class


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_images_dir = '/content/datasets/bone_age/val/images'
test_label_dir = '/content/datasets/bone_age/val/labels'

test_dataset = ClassificationDataset(test_images_dir, test_label_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


model.eval()
all_labels_test = []
all_preds_test = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images, torch.zeros(len(images)).to(device))  # Zero confidence for testing
        _, preds = torch.max(outputs, 1)
        all_labels_test.extend(labels.cpu().numpy())
        all_preds_test.extend(preds.cpu().numpy())


test_conf_matrix = confusion_matrix(all_labels_test, all_preds_test)
print(f"Test Confusion Matrix:\n{test_conf_matrix}")


def plot_confusion_matrix(conf_matrix, class_names):
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()


class_names = [str(i) for i in range(num_classes)]
plot_confusion_matrix(test_conf_matrix, class_names)


Model Without Distillation

In [None]:
import json
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


class ClassificationDataset(Dataset):
    def __init__(self, img_dir, label_dir, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.img_files = [os.path.join(img_dir, fname) for fname in os.listdir(img_dir) if fname.endswith('.png')]
        self.transform = transform

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = self.img_files[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        # Get the actual label
        label_file = os.path.join(self.label_dir, os.path.basename(img_path).replace('.png', '.txt'))
        with open(label_file, 'r') as f:
            actual_class = int(f.readline().strip().split()[0])

        return image, actual_class


train_images_dir = '/content/datasets/bone_age/train/images'
train_label_dir = '/content/datasets/bone_age/train/labels'
val_images_dir = '/content/datasets/bone_age/val/images'
val_label_dir = '/content/datasets/bone_age/val/labels'


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


train_dataset = ClassificationDataset(train_images_dir, train_label_dir, transform=transform)
val_dataset = ClassificationDataset(val_images_dir, val_label_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


class ClassifierWithoutConfidence(nn.Module):
    def __init__(self, num_classes):
        super(ClassifierWithoutConfidence, self).__init__()
        self.cnn = models.resnet18(weights='ResNet18_Weights.DEFAULT')
        num_ftrs = self.cnn.fc.in_features
        self.cnn.fc = nn.Linear(num_ftrs, num_classes)  # Fully connected layer for classification

    def forward(self, x):
        x = self.cnn(x)
        return x


num_classes = 9
model = ClassifierWithoutConfidence(num_classes)
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 10
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    all_labels_train = []
    all_preds_train = []

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        _, preds = torch.max(outputs, 1)
        all_labels_train.extend(labels.cpu().numpy())
        all_preds_train.extend(preds.cpu().numpy())

    train_conf_matrix = confusion_matrix(all_labels_train, all_preds_train)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader)}")
    print(f"Training Confusion Matrix:\n{train_conf_matrix}")


model.eval()
all_labels_val = []
all_preds_val = []
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_labels_val.extend(labels.cpu().numpy())
        all_preds_val.extend(preds.cpu().numpy())

val_conf_matrix = confusion_matrix(all_labels_val, all_preds_val)
print(f"Validation Confusion Matrix:\n{val_conf_matrix}")


def plot_confusion_matrix(conf_matrix, class_names):
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()


class_names = [str(i) for i in range(num_classes)]
plot_confusion_matrix(val_conf_matrix, class_names)


torch.save(model.state_dict(), "classifier_model_without_confidence.pth")
print("Classifier model saved to classifier_model_without_confidence.pth")


Evaluate on Poisoned Data

In [None]:
import torch
import torch.nn as nn
from torchvision import models


class ClassifierWithConfidence(nn.Module):
    def __init__(self, num_classes):
        super(ClassifierWithConfidence, self).__init__()
        self.cnn = models.resnet18(weights='ResNet18_Weights.DEFAULT')
        num_ftrs = self.cnn.fc.in_features
        self.cnn.fc = nn.Identity()
        self.fc1 = nn.Linear(num_ftrs + 1, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x, confidence):
        x = self.cnn(x)
        x = torch.cat((x, confidence.unsqueeze(1)), dim=1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


num_classes = 9
model = ClassifierWithConfidence(num_classes)


model_path = '/content/classifier_model_with_confidence.pth'

# Load the state dictionary
state_dict = torch.load(model_path)


model.load_state_dict(state_dict)


device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)


In [None]:
import os
import shutil

image_dir = '/content/datasets/bone_age/train/adversarial_images'
original_label_dir = '/content/datasets/bone_age/train/labels'
adversarial_label_dir = '/content/datasets/bone_age/train/adversarial_labels'


os.makedirs(adversarial_label_dir, exist_ok=True)

# Iterate through each image file in the adversarial image directory
for image_file in os.listdir(image_dir):
    if image_file.endswith('.png'):

        base_name = os.path.splitext(image_file)[0]


        original_label_file = os.path.join(original_label_dir, f"{base_name}.txt")
        adversarial_label_file = os.path.join(adversarial_label_dir, f"{base_name}.txt")

        if os.path.isfile(original_label_file):
            shutil.copyfile(original_label_file, adversarial_label_file)
            print(f"Copied label for {image_file} to adversarial labels directory.")
        else:
            print(f"Original label file for {image_file} does not exist.")

print(f"All matching label files have been copied to {adversarial_label_dir}.")


In [None]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = YOLO("runs/detect/train/weights/best.pt")
model.model.to(device)  # Ensure the model's weights are moved to GPU if available

# directory containing original images and ground truth labels
original_image_dir = '/content/datasets/bone_age/train/images'
ground_truth_labels_dir = '/content/datasets/bone_age/train/labels'
# Path to save adversarial images
adv_image_dir = '/content/datasets/bone_age/train/adversarial_images'


if not os.path.exists(original_image_dir):
    raise FileNotFoundError(f"Original image directory does not exist: {original_image_dir}")
if not os.path.exists(ground_truth_labels_dir):
    raise FileNotFoundError(f"Ground truth labels directory does not exist: {ground_truth_labels_dir}")
if not os.path.exists(adv_image_dir):
    os.makedirs(adv_image_dir)


transform = transforms.Compose([
    transforms.Resize((416, 416)),
    transforms.ToTensor(),
])

#  dataset class to handle images without class folders
class OriginalDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.image_names = os.listdir(image_dir)

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.image_dir, img_name)
        label_path = os.path.join(self.label_dir, img_name.replace('.png', '.txt'))

        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        labels = self.load_labels(label_path)
        return image, labels, img_name

    @staticmethod
    def load_labels(label_path):
        with open(label_path, 'r') as f:
            labels = f.readlines()
        labels = [list(map(float, label.strip().split())) for label in labels]
        return labels

# original dataset
original_dataset = OriginalDataset(original_image_dir, ground_truth_labels_dir, transform=transform)
data_loader = DataLoader(original_dataset, batch_size=1, shuffle=False)

# apply C&W attack on an image
def cw_attack(model, images, device, c=1e-4, kappa=0, steps=1000, lr=0.01):
    images = images.to(device)

    perturbed_images = images.clone().detach().requires_grad_(True).to(device)
    optimizer = torch.optim.Adam([perturbed_images], lr=lr)

    for step in range(steps):
        optimizer.zero_grad()

        outputs = model(perturbed_images)
        # loss calculation to suit YOLO output format
        loss = -outputs[0][..., 5:].max()  # Use the max class score
        loss.backward()

        optimizer.step()

        # Clip the perturbed images to ensure they are within [0, 1]
        perturbed_images.data = torch.clamp(perturbed_images, 0, 1)

    return perturbed_images

# Generate adversarial examples and save them
for img, ground_truth, img_name in data_loader:
    # Ensure img_name is a string
    if isinstance(img_name, tuple):
        img_name = img_name[0]

    adv_img_path = os.path.join(adv_image_dir, img_name)
    if os.path.exists(adv_img_path):
        continue

    img = img.to(device)

    # Generate adversarial example using C&W attack
    perturbed_img = cw_attack(model.model, img, device)

    # Save the adversarial image
    adv_image_np = perturbed_img.squeeze().detach().cpu().numpy().transpose(1, 2, 0) * 255
    adv_image_np = adv_image_np.astype(np.uint8)
    adv_image = Image.fromarray(adv_image_np)
    adv_image.save(adv_img_path)

print("Adversarial images generated and saved.")


In [None]:
test_images_dir = '/content/datasets/bone_age/train/adversarial_images'
test_label_dir = '/content/datasets/bone_age/train/adversarial_labels'

test_dataset = ClassificationDataset(test_images_dir, test_label_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
all_labels_test = []
all_preds_test = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images, torch.zeros(len(images)).to(device))
        _, preds = torch.max(outputs, 1)
        all_labels_test.extend(labels.cpu().numpy())
        all_preds_test.extend(preds.cpu().numpy())


test_conf_matrix = confusion_matrix(all_labels_test, all_preds_test)
print(f"Test Confusion Matrix:\n{test_conf_matrix}")


def plot_confusion_matrix(conf_matrix, class_names):
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()


class_names = [str(i) for i in range(num_classes)]  # Replace with actual class names if available
plot_confusion_matrix(test_conf_matrix, class_names)


In [None]:
import json
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


class ClassifierWithoutConfidence(nn.Module):
    def __init__(self, num_classes):
        super(ClassifierWithoutConfidence, self).__init__()
        self.cnn = models.resnet18(weights='ResNet18_Weights.DEFAULT')
        num_ftrs = self.cnn.fc.in_features
        self.cnn.fc = nn.Linear(num_ftrs, num_classes)  # Fully connected layer for classification

    def forward(self, x):
        x = self.cnn(x)
        return x


num_classes = 9
model = ClassifierWithoutConfidence(num_classes)


model_path = '/content/classifier_model_without_confidence.pth'

# Load the state dictionary
state_dict = torch.load(model_path)


model.load_state_dict(state_dict)

# Move the model to the appropriate device (CPU or GPU)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

In [None]:

test_images_dir = '/content/datasets/bone_age/train/adversarial_images'
test_label_dir = '/content/datasets/bone_age/train/adversarial_labels'

test_dataset = ClassificationDataset(test_images_dir, test_label_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
all_labels_test = []
all_preds_test = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_labels_test.extend(labels.cpu().numpy())
        all_preds_test.extend(preds.cpu().numpy())


test_conf_matrix = confusion_matrix(all_labels_test, all_preds_test)
print(f"Test Confusion Matrix:\n{test_conf_matrix}")

def plot_confusion_matrix(conf_matrix, class_names):
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()


class_names = [str(i) for i in range(num_classes)]
plot_confusion_matrix(test_conf_matrix, class_names)
