In [1]:
from fgsm import perform_fgsm_attack
from dataloader import get_dataset

from __future__ import print_function, division
import argparse
import os
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
import torchvision.utils as vutils
import torch.nn.functional as F
import numpy as np
import time
from torch.utils.tensorboard import SummaryWriter
# from datasets import __datasets__
from models import __models__, model_loss
from utils import *
from torch.utils.data import DataLoader, random_split
import gc
from torchvision import transforms

from dataloader import get_dataset
import mlflow

In [5]:
""" Implementation of:
Agnihotri, Shashank, Jung, Steffen, Keuper, Margret. "CosPGD: a unified white-box adversarial attack for pixel-wise prediction tasks." 
arXiv preprint arXiv:2302.02213 (2023).

A tool for benchmarking adversarial robustness of pixel-wise prediction tasks.

MIT License

Copyright (c) 2023 Shashank Agnihotri, Steffen Jung, Prof. Dr. Margret Keuper

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

import torch

class Attack:
    def __init__(self):
        pass
    
    """
    Function to take one attack step in the l-infinity norm constraint

    perturbed_image: Float tensor of shape [batch size, channels, (image spatial resolution)]
    epsilon: Float tensor: permissible epsilon range
    data_grad: gradient on the image input to the model w.r.t. the loss backpropagated
    orig_image: Float tensor of shape [batch size, channels, (image spatial resolution)]: Original unattacked image, before adding any noise
    alpha: Float tensor: attack step size
    targeted: boolean: Targeted attack or not
    clamp_min: Float tensor: minimum clip value for clipping the perturbed image back to the permisible input space
    clamp_max: Float tensor: maximum clip value for clipping the perturbed image back to the permisible input space
    grad_scale: tensor either single value or of the same shape as data_grad: to scale the added noise
    """
    @staticmethod
    def step_inf(
            perturbed_image,
            epsilon,
            data_grad,
            orig_image,
            alpha,
            targeted,
            clamp_min = 0,
            clamp_max = 1,
            grad_scale = None
        ):
        # Collect the element-wise sign of the data gradient
        sign_data_grad = alpha*data_grad.sign()
        if targeted:
            sign_data_grad *= -1
        if grad_scale is not None:
            sign_data_grad *= grad_scale
        # Create the perturbed image by adjusting each pixel of the input image
        perturbed_image = perturbed_image.detach() + sign_data_grad
        # Adding clipping to maintain [0,1] range
        delta = torch.clamp(perturbed_image - orig_image, min=-epsilon, max=epsilon)
        perturbed_image = torch.clamp(orig_image + delta, clamp_min, clamp_max).detach()
        return perturbed_image
    
    
    """
    Function to take one attack step in the l2 norm constraint
    
    perturbed_image: Float tensor of shape [batch size, channels, (image spatial resolution)]
    epsilon: Float tensor: permissible epsilon range
    data_grad: gradient on the image input to the model w.r.t. the loss backpropagated
    orig_image: Float tensor of shape [batch size, channels, (image spatial resolution)]: Original unattacked image, before adding any noise
    alpha: Float tensor: attack step size
    targeted: boolean: Targeted attack or not
    clamp_min: Float tensor: minimum clip value for clipping the perturbed image back to the permisible input space
    clamp_max: Float tensor: maximum clip value for clipping the perturbed image back to the permisible input space
    grad_scale: tensor either single value or of the same shape as data_grad: to scale the added noise
    """
    @staticmethod
    def step_l2(
            perturbed_image,
            epsilon,
            data_grad,
            orig_image,
            alpha,
            targeted,
            clamp_min = 0,
            clamp_max = 1,
            grad_scale = None
        ):
        # normalize gradients
        if targeted:
            data_grad *= -1
        data_grad = Attack.lp_normalize(
            data_grad,
            p = 2,
            epsilon = 1.0,
            decrease_only = False
        )
        if grad_scale is not None:
            data_grad *= grad_scale
        # Create the perturbed image by adjusting each pixel of the input image
        perturbed_image = perturbed_image.detach() + alpha*data_grad
        # clip to l2 ball
        delta = Attack.lp_normalize(
            noise = perturbed_image - orig_image,
            p = 2,
            epsilon = epsilon,
            decrease_only = True
        )
        # Adding clipping to maintain [0,1] range
        perturbed_image = torch.clamp(orig_image + delta, clamp_min, clamp_max).detach()
        return perturbed_image
    
    """
    Clamping noise in the l-p norm constraint
    noise: tensor of shape [batch size, (image spatial resolution)]: the noise to be clamped
    p: int: the norm
    epsilon: Float tensor: permissible epsilon range
    decrease_only: boolean: to only clamp the upper bound and not the lower bound
    """
    @staticmethod
    def lp_normalize(
            noise,
            p,
            epsilon = None,
            decrease_only = False
        ):
        if epsilon is None:
            epsilon = torch.tensor(1.0)
        denom = torch.norm(noise, p=p, dim=(-1, -2, -3))
        denom = torch.maximum(denom, torch.tensor(1E-12)).unsqueeze(1).unsqueeze(1).unsqueeze(1)
        if decrease_only:
            denom = torch.maximum(denom/epsilon, torch.tensor(1))
        else:
            denom = denom / epsilon
        return noise / denom
    
    """
    Initializing noise in the l-infinity norm constraint

    epsilon: Float tensor: permissible epsilon range
    images: Float tensor of shape [batch size, channels, (image spatial resolution)]: Original unattacked image, before adding any noise    
    clamp_min: Float tensor: minimum clip value for clipping the perturbed image back to the permisible input space
    clamp_max: Float tensor: maximum clip value for clipping the perturbed image back to the permisible input space    
    """
    @staticmethod
    def init_linf(
            images,
            epsilon,
            clamp_min = 0,
            clamp_max = 1,
        ):
        noise = torch.FloatTensor(images.shape).uniform_(-epsilon, epsilon).to(images.device)
        images = images + noise
        images = images.clamp(clamp_min, clamp_max)
        return images
    
    
    """
    Initializing noise in the l-2 norm constraint

    epsilon: Float tensor: permissible epsilon range
    images: Float tensor of shape [batch size, channels, (image spatial resolution)]: Original unattacked image, before adding any noise    
    clamp_min: Float tensor: minimum clip value for clipping the perturbed image back to the permisible input space
    clamp_max: Float tensor: maximum clip value for clipping the perturbed image back to the permisible input space
    """
    @staticmethod
    def init_l2(
            images,
            epsilon,
            clamp_min = 0,
            clamp_max = 1,
        ):
        noise = torch.FloatTensor(images.shape).uniform_(-1, 1).to(images.device)
        noise = Attack.lp_normalize(
            noise = noise,
            p = 2,
            epsilon = epsilon,
            decrease_only = False
        )
        images = images + noise
        images = images.clamp(clamp_min, clamp_max)
        return images
    
    
    """
    Scaling of the pixel-wise loss as proposed by: 
    Gu, Jindong, et al. "Segpgd: An effective and efficient adversarial attack for evaluating and boosting segmentation robustness." 
    European Conference on Computer Vision. Cham: Springer Nature Switzerland, 2022.

    predictions: Float tensor of shape [batch size, channel, (image spatial resolution)]: Predictions made by the model
    labels: The ground truth/target labels, for semantic segmentation index tensor of the shape: [batch size, channel, (image spatial resolution)].
                                     for pixel-wise regression tasks, same shape as predictions
    loss: Float tensor: The loss between the predictions and the ground truth/target
    iteration: Current attack iteration for calculating lambda as used in SegPGD
    iterations: Total number of attack iterations for calculating lambda as used in SegPGD
    targeted: boolean: Targeted attack or not
    """
    @staticmethod
    def segpgd_scale(
            predictions,
            labels,
            loss,
            iteration,
            iterations,
            targeted=False,
        ):
        lambda_t = iteration/(2*iterations)
        output_idx = torch.argmax(predictions, dim=1)
        if targeted:
            loss = torch.sum(
                torch.where(
                    output_idx == labels,
                    lambda_t*loss,
                    (1-lambda_t)*loss
                )
            ) / (predictions.shape[-2]*predictions.shape[-1])
        else:
            loss = torch.sum(
                torch.where(
                    output_idx == labels,
                    (1-lambda_t)*loss,
                    lambda_t*loss
                )
            ) / (predictions.shape[-2]*predictions.shape[-1])
        return loss
    
    
    """
    Scaling of the pixel-wise loss as implemeted by: 
    Agnihotri, Shashank, et al. "CosPGD: a unified white-box adversarial attack for pixel-wise prediction tasks." 
    arXiv preprint arXiv:2302.02213 (2023).

    predictions: Float tensor of shape [batch size, channel, (image spatial resolution)]: Predictions made by the model
    labels: The ground truth/target labels, for semantic segmentation index tensor of the shape: [batch size, channel, (image spatial resolution)].
                                     for pixel-wise regression tasks, same shape as predictions
    loss: Float tensor: The loss between the predictions and the ground truth/target
    num_classes: int: For semantic segmentation the number of classes. None for pixel-wise regression tasks
    targeted: boolean: Targeted attack or not
    one_hot: boolean: To use one-hot encoding, SHOULD BE TRUE FOR SEMANTIC SEGMENTATION and FALSE FOR pixel-wise regression tasks
    """
    @staticmethod
    def cospgd_scale(
            predictions,
            labels,
            loss,
            num_classes=None,
            targeted=False,
            one_hot=True,
        ):
        if one_hot:
            transformed_target = torch.nn.functional.one_hot(
                torch.clamp(labels, labels.min(), num_classes-1),
                num_classes = num_classes
            ).permute(0,3,1,2)
        else:
            transformed_target = torch.nn.functional.softmax(labels, dim=1)
        cossim = torch.nn.functional.cosine_similarity(
            torch.nn.functional.softmax(predictions, dim=1),
            transformed_target,
            dim = 1
        )
        if targeted:
            cossim = 1 - cossim # if performing targeted attacks, we want to punish for dissimilarity to the target
        loss = cossim.detach() * loss
        return loss

In [2]:
import torch
import torch.nn.functional as F



""" Implementation of:
Agnihotri, Shashank, Jung, Steffen, Keuper, Margret. "CosPGD: a unified white-box adversarial attack for pixel-wise prediction tasks." 
arXiv preprint arXiv:2302.02213 (2023).

A tool for benchmarking adversarial robustness of pixel-wise prediction tasks.

MIT License

Copyright (c) 2023 Shashank Agnihotri, Steffen Jung, Prof. Dr. Margret Keuper

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

import torch

class Attack:
    def __init__(self):
        pass
    
    """
    Function to take one attack step in the l-infinity norm constraint

    perturbed_image: Float tensor of shape [batch size, channels, (image spatial resolution)]
    epsilon: Float tensor: permissible epsilon range
    data_grad: gradient on the image input to the model w.r.t. the loss backpropagated
    orig_image: Float tensor of shape [batch size, channels, (image spatial resolution)]: Original unattacked image, before adding any noise
    alpha: Float tensor: attack step size
    targeted: boolean: Targeted attack or not
    clamp_min: Float tensor: minimum clip value for clipping the perturbed image back to the permisible input space
    clamp_max: Float tensor: maximum clip value for clipping the perturbed image back to the permisible input space
    grad_scale: tensor either single value or of the same shape as data_grad: to scale the added noise
    """
    @staticmethod
    def step_inf(
            perturbed_image,
            epsilon,
            data_grad,
            orig_image,
            alpha,
            targeted,
            clamp_min = 0,
            clamp_max = 1,
            grad_scale = None
        ):
        # Collect the element-wise sign of the data gradient
        sign_data_grad = alpha*data_grad.sign()
        if targeted:
            sign_data_grad *= -1
        if grad_scale is not None:
            sign_data_grad *= grad_scale
        # Create the perturbed image by adjusting each pixel of the input image
        perturbed_image = perturbed_image.detach() + sign_data_grad
        # Adding clipping to maintain [0,1] range
        delta = torch.clamp(perturbed_image - orig_image, min=-epsilon, max=epsilon)
        perturbed_image = torch.clamp(orig_image + delta, clamp_min, clamp_max).detach()
        return perturbed_image
    
    
    """
    Function to take one attack step in the l2 norm constraint
    
    perturbed_image: Float tensor of shape [batch size, channels, (image spatial resolution)]
    epsilon: Float tensor: permissible epsilon range
    data_grad: gradient on the image input to the model w.r.t. the loss backpropagated
    orig_image: Float tensor of shape [batch size, channels, (image spatial resolution)]: Original unattacked image, before adding any noise
    alpha: Float tensor: attack step size
    targeted: boolean: Targeted attack or not
    clamp_min: Float tensor: minimum clip value for clipping the perturbed image back to the permisible input space
    clamp_max: Float tensor: maximum clip value for clipping the perturbed image back to the permisible input space
    grad_scale: tensor either single value or of the same shape as data_grad: to scale the added noise
    """
    @staticmethod
    def step_l2(
            perturbed_image,
            epsilon,
            data_grad,
            orig_image,
            alpha,
            targeted,
            clamp_min = 0,
            clamp_max = 1,
            grad_scale = None
        ):
        # normalize gradients
        if targeted:
            data_grad *= -1
        data_grad = Attack.lp_normalize(
            data_grad,
            p = 2,
            epsilon = 1.0,
            decrease_only = False
        )
        if grad_scale is not None:
            data_grad *= grad_scale
        # Create the perturbed image by adjusting each pixel of the input image
        perturbed_image = perturbed_image.detach() + alpha*data_grad
        # clip to l2 ball
        delta = Attack.lp_normalize(
            noise = perturbed_image - orig_image,
            p = 2,
            epsilon = epsilon,
            decrease_only = True
        )
        # Adding clipping to maintain [0,1] range
        perturbed_image = torch.clamp(orig_image + delta, clamp_min, clamp_max).detach()
        return perturbed_image
    
    """
    Clamping noise in the l-p norm constraint
    noise: tensor of shape [batch size, (image spatial resolution)]: the noise to be clamped
    p: int: the norm
    epsilon: Float tensor: permissible epsilon range
    decrease_only: boolean: to only clamp the upper bound and not the lower bound
    """
    @staticmethod
    def lp_normalize(
            noise,
            p,
            epsilon = None,
            decrease_only = False
        ):
        if epsilon is None:
            epsilon = torch.tensor(1.0)
        denom = torch.norm(noise, p=p, dim=(-1, -2, -3))
        denom = torch.maximum(denom, torch.tensor(1E-12)).unsqueeze(1).unsqueeze(1).unsqueeze(1)
        if decrease_only:
            denom = torch.maximum(denom/epsilon, torch.tensor(1))
        else:
            denom = denom / epsilon
        return noise / denom
    
    """
    Initializing noise in the l-infinity norm constraint

    epsilon: Float tensor: permissible epsilon range
    images: Float tensor of shape [batch size, channels, (image spatial resolution)]: Original unattacked image, before adding any noise    
    clamp_min: Float tensor: minimum clip value for clipping the perturbed image back to the permisible input space
    clamp_max: Float tensor: maximum clip value for clipping the perturbed image back to the permisible input space    
    """
    @staticmethod
    def init_linf(
            images,
            epsilon,
            clamp_min = 0,
            clamp_max = 1,
        ):
        noise = torch.FloatTensor(images.shape).uniform_(-epsilon, epsilon).to(images.device)
        images = images + noise
        images = images.clamp(clamp_min, clamp_max)
        return images
    
    
    """
    Initializing noise in the l-2 norm constraint

    epsilon: Float tensor: permissible epsilon range
    images: Float tensor of shape [batch size, channels, (image spatial resolution)]: Original unattacked image, before adding any noise    
    clamp_min: Float tensor: minimum clip value for clipping the perturbed image back to the permisible input space
    clamp_max: Float tensor: maximum clip value for clipping the perturbed image back to the permisible input space
    """
    @staticmethod
    def init_l2(
            images,
            epsilon,
            clamp_min = 0,
            clamp_max = 1,
        ):
        noise = torch.FloatTensor(images.shape).uniform_(-1, 1).to(images.device)
        noise = Attack.lp_normalize(
            noise = noise,
            p = 2,
            epsilon = epsilon,
            decrease_only = False
        )
        images = images + noise
        images = images.clamp(clamp_min, clamp_max)
        return images
    
    
    """
    Scaling of the pixel-wise loss as proposed by: 
    Gu, Jindong, et al. "Segpgd: An effective and efficient adversarial attack for evaluating and boosting segmentation robustness." 
    European Conference on Computer Vision. Cham: Springer Nature Switzerland, 2022.

    predictions: Float tensor of shape [batch size, channel, (image spatial resolution)]: Predictions made by the model
    labels: The ground truth/target labels, for semantic segmentation index tensor of the shape: [batch size, channel, (image spatial resolution)].
                                     for pixel-wise regression tasks, same shape as predictions
    loss: Float tensor: The loss between the predictions and the ground truth/target
    iteration: Current attack iteration for calculating lambda as used in SegPGD
    iterations: Total number of attack iterations for calculating lambda as used in SegPGD
    targeted: boolean: Targeted attack or not
    """
    @staticmethod
    def segpgd_scale(
            predictions,
            labels,
            loss,
            iteration,
            iterations,
            targeted=False,
        ):
        lambda_t = iteration/(2*iterations)
        output_idx = torch.argmax(predictions, dim=1)
        if targeted:
            loss = torch.sum(
                torch.where(
                    output_idx == labels,
                    lambda_t*loss,
                    (1-lambda_t)*loss
                )
            ) / (predictions.shape[-2]*predictions.shape[-1])
        else:
            loss = torch.sum(
                torch.where(
                    output_idx == labels,
                    (1-lambda_t)*loss,
                    lambda_t*loss
                )
            ) / (predictions.shape[-2]*predictions.shape[-1])
        return loss
    
    
    """
    Scaling of the pixel-wise loss as implemeted by: 
    Agnihotri, Shashank, et al. "CosPGD: a unified white-box adversarial attack for pixel-wise prediction tasks." 
    arXiv preprint arXiv:2302.02213 (2023).

    predictions: Float tensor of shape [batch size, channel, (image spatial resolution)]: Predictions made by the model
    labels: The ground truth/target labels, for semantic segmentation index tensor of the shape: [batch size, channel, (image spatial resolution)].
                                     for pixel-wise regression tasks, same shape as predictions
    loss: Float tensor: The loss between the predictions and the ground truth/target
    num_classes: int: For semantic segmentation the number of classes. None for pixel-wise regression tasks
    targeted: boolean: Targeted attack or not
    one_hot: boolean: To use one-hot encoding, SHOULD BE TRUE FOR SEMANTIC SEGMENTATION and FALSE FOR pixel-wise regression tasks
    """
    @staticmethod
    def cospgd_scale(
            predictions,
            labels,
            loss,
            num_classes=None,
            targeted=False,
            one_hot=True,
        ):
        if one_hot:
            transformed_target = torch.nn.functional.one_hot(
                torch.clamp(labels, labels.min(), num_classes-1),
                num_classes = num_classes
            ).permute(0,3,1,2)
        else:
            transformed_target = torch.nn.functional.softmax(labels, dim=1)
        cossim = torch.nn.functional.cosine_similarity(
            torch.nn.functional.softmax(predictions, dim=1),
            transformed_target,
            dim = 1
        )
        if targeted:
            cossim = 1 - cossim # if performing targeted attacks, we want to punish for dissimilarity to the target
        loss = cossim.detach() * loss
        return loss

class CosPGDAttack:
    def __init__(self, model, epsilon, alpha, num_iterations, num_classes=None, targeted=False):
        self.model = model
        self.epsilon = epsilon
        self.alpha = alpha
        self.num_iterations = num_iterations
        self.num_classes = num_classes
        self.targeted = targeted
    
    def attack(self, left_image, right_image, labels):
        # Initialize perturbations for both left and right images
        perturbed_left = Attack.init_linf(left_image, self.epsilon)
        perturbed_right = Attack.init_linf(right_image, self.epsilon)
        
        for iteration in range(self.num_iterations):
            perturbed_left.requires_grad = True
            perturbed_right.requires_grad = True
            
            # Forward pass the perturbed images through the model
            outputs = self.model(perturbed_left, perturbed_right)
            
            # Compute the loss
            loss = F.mse_loss(outputs, labels)
            
            # Zero all existing gradients
            self.model.zero_grad()
            
            # Backward pass to compute gradients of the loss w.r.t the perturbed images
            loss.backward()
            
            # Collect the gradient data
            left_grad = perturbed_left.grad.data
            right_grad = perturbed_right.grad.data
            
            # Perform the attack step
            perturbed_left = Attack.step_inf(
                perturbed_image=perturbed_left,
                epsilon=self.epsilon,
                data_grad=left_grad,
                orig_image=left_image,
                alpha=self.alpha,
                targeted=self.targeted,
                clamp_min=0,
                clamp_max=1
            )
            
            perturbed_right = Attack.step_inf(
                perturbed_image=perturbed_right,
                epsilon=self.epsilon,
                data_grad=right_grad,
                orig_image=right_image,
                alpha=self.alpha,
                targeted=self.targeted,
                clamp_min=0,
                clamp_max=1
            )
        
        return perturbed_left, perturbed_right


In [None]:
# dataset_path = '/pfs/work7/workspace/scratch/ma_faroesch-team_project_fss2024/dataset/FlyingThings3D'
# train_dataset = get_dataset("sceneflow", dataset_path, architeture_name="CFNet", split='test') # maybe test -> test daten 

# model = __models__["cfnet"](284)
# model = nn.DataParallel(model)
# optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
# best_val_loss = -1

# # load parameters
# start_epoch = 0

In [None]:
# image_left  = train_dataset.load_image(train_dataset.img_left_filenames[0])
# image_right = train_dataset.load_image(train_dataset.img_left_filenames[0])
# attack_epsilon = 0.03  # Epsilon value for the FGSM attack
# from torchvision import transforms
# transform = transforms.ToTensor()
# tensor_image_left= transform(image_left)
# tensor_image_left= torch.unsqueeze(tensor_image_left, 0)
# tensor_image_right= transform(image_right)
# tensor_image_righ= torch.unsqueeze(tensor_image_right, 0)
# preds, perturbed_inputs = perform_fgsm_attack(model, tensor_image_left ,tensor_image_right , attack_epsilon)

In [None]:
# image_left  = train_dataset.load_image(train_dataset.img_left_filenames[0])
# image_right = train_dataset.load_image(train_dataset.img_left_filenames[0])
# attack_epsilon = 0.03  # Epsilon value for the FGSM attack
# from torchvision import transforms
# transform = transforms.ToTensor()
# preds, perturbed_inputs = perform_fgsm_attack(model, transform(image_left), transform(image_right), attack_epsilon)

In [10]:
# # load images
# dataset_path = '/pfs/work7/workspace/scratch/ma_faroesch-team_project_fss2024/dataset/KITTI_2015'
# train_dataset = get_dataset("kitti2015", dataset_path, architeture_name="CFNet", split='train') # maybe test -> test daten 
# image_left  = train_dataset.load_image(train_dataset.img_left_filenames[0])
# image_right = train_dataset.load_image(train_dataset.img_left_filenames[0])
# transform = transforms.ToTensor()
# image_left = transform(image_left)
# image_right = transform(image_right)
# print(image_left)
# disparity = train_dataset.load_disp(train_dataset.disp_left_filenames[0])
# disparity = transform(disparity)

# model = __models__["cfnet"](284)
# model = nn.DataParallel(model)

# # disparity = disparity.unsqueeze(0) # how to get the labels ???? 

# epsilon = 0.03
# alpha = 0.01
# num_iterations = 10

# attacker = CosPGDAttack(model, epsilon, alpha, num_iterations, num_classes=None, targeted=False)
# perturbed_left_image, perturbed_right_image = attacker.attack(image_left.unsqueeze(0), image_right.unsqueeze(0), disparity.unsqueeze(0))

Loading kitti2015 dataset


AttributeError: 'KITTIBaseDataset' object has no attribute 'img_left_filenames'

In [3]:
from dataloader import cfnet, sttr, sttr_light, psmnet, hsmnet, gwcnet

In [5]:

# load images
dataset_path = '/pfs/work7/workspace/scratch/ma_faroesch-team_project_fss2024/dataset/FlyingThings3D'
train_dataset = get_dataset("sceneflow", dataset_path, architeture_name="CFNet", split='test') # maybe test -> test daten 
batch = next(iter(train_dataset))

# image_left  = train_dataset.load_image(train_dataset.img_left_filenames[0])
# image_right = train_dataset.load_image(train_dataset.img_left_filenames[0])
# transform = transforms.ToTensor()
# image_left = transform(image_left)
# image_right = transform(image_right)
# print(image_left.size())
# print(image_right.size())
# disparity = train_dataset.load_disp(train_dataset.disp_left_filenames[0])

# disparity = transform(disparity)
# th, tw = 256, 512
# print(cfnet.flow_transforms.RandomCrop((th, tw)).size)
# co_transform =  cfnet.flow_transforms.Compose([cfnet.flow_transforms.RandomCrop((th, tw)),
#         ])
# augmented, disparity = co_transform([image_left, image_right], disparity)
# image_left = augmented[0]
# image_right = augmented[1]
# print(image_left.size())
# print(image_right.size())
print(batch['left'].size())



# disparity = train_dataset.load_disp(train_dataset.disp_left_filenames[0])
# disparity = transform(disparity)

model = __models__["cfnet"](284)
model = nn.DataParallel(model)



epsilon = 0.03
alpha = 0.01
num_iterations = 10

attacker = CosPGDAttack(model, epsilon, alpha, num_iterations, num_classes=None, targeted=False)
# perturbed_left_image, perturbed_right_image = attacker.attack(image_left, image_right, disparity)
perturbed_left_image, perturbed_right_image = attacker.attack(batch['left'].unsqueeze(0), batch['right'].unsqueeze(0), batch['disparity'].unsqueeze(0))


Loading sceneflow dataset
torch.Size([3, 512, 960])
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activat

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 18 but got size 17 for tensor number 1 in the list.

In [None]:
## for loop einbauen - um verschiedene schritte zu machen 

In [4]:
dataset_path = '/pfs/work7/workspace/scratch/ma_faroesch-team_project_fss2024/dataset/FlyingThings3D'
train_dataset = get_dataset("sceneflow", dataset_path, architeture_name="CFNet", split='test') # maybe test -> test daten 

model = __models__["cfnet"](284)
model = nn.DataParallel(model)
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))
best_val_loss = -1

# load parameters
start_epoch = 0

Loading sceneflow dataset
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activation loaded...
Mish activat

In [11]:
image_left  = train_dataset.load_image(train_dataset.img_left_filenames[0])
image_right = train_dataset.load_image(train_dataset.img_left_filenames[0])
attack_epsilon = 0.03  # Epsilon value for the FGSM attack
from torchvision import transforms
transform = transforms.ToTensor()
preds, perturbed_inputs = perform_fgsm_attack(model, transform(image_left), transform(image_right), attack_epsilon)

ValueError: expected 4D input (got 3D input)

In [14]:
image_left  = train_dataset.load_image(train_dataset.img_left_filenames[0])
image_right = train_dataset.load_image(train_dataset.img_left_filenames[0])
attack_epsilon = 0.03  # Epsilon value for the FGSM attack
from torchvision import transforms
transform = transforms.ToTensor()
tensor_image_left= transform(image_left)
tensor_image_left= torch.unsqueeze(tensor_image_left, 0)
tensor_image_right= transform(image_right)
tensor_image_righ= torch.unsqueeze(tensor_image_right, 0)
preds, perturbed_inputs = perform_fgsm_attack(model, tensor_image_left ,tensor_image_right , attack_epsilon)



RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 135 but got size 136 for tensor number 1 in the list.

In [4]:
import torch
from torchattacks import FGSM
from torchvision import transforms
from typing import Optional

def perform_fgsm_attack(
    model,
    left: torch.Tensor,
    right: torch.Tensor,
    attack_epsilon: float,
    targeted_disparity: Optional[torch.Tensor] = None,
    targeted: bool = False,
):
    # Ensure the model is in evaluation mode
    model.eval()

    # If targeted, set the target, else use the model's own prediction as the target
    if targeted:
        labels = targeted_disparity.unsqueeze(0)
    else:
        with torch.no_grad():
            labels = model(left, right)["disparities"]

    # Instantiate the FGSM attack from torchattacks
    attack = FGSM(model, eps=attack_epsilon)
    
    # If targeted, set the target label for the attack
    if targeted:
        attack.set_targeted_mode()

    # Perform the attack
    adv_left = attack(left, labels)
    adv_right = attack(right, labels)

    # Prepare the perturbed inputs for the model
    perturbed_inputs = {"left": adv_left, "right": adv_right}

    # Get predictions on the perturbed images
    preds = model(perturbed_inputs["left"], perturbed_inputs["right"])

    return preds, perturbed_inputs

# Load and transform images
image_left = train_dataset.load_image(train_dataset.img_left_filenames[0])
image_right = train_dataset.load_image(train_dataset.img_right_filenames[0])
common_size = (256, 256)  # Define the common size for resizing and cropping

# Define the transformation including resize and center crop
transform = transforms.Compose([
    transforms.Resize(common_size),
    transforms.CenterCrop(common_size),
    transforms.ToTensor()
])

# Transform the images and add a batch dimension
image_left_tensor = transform(image_left).unsqueeze(0)  # Add batch dimension
image_right_tensor = transform(image_right).unsqueeze(0)  # Add batch dimension

# Epsilon value for the FGSM attack
attack_epsilon = 0.03

# Perform the FGSM attack
preds, perturbed_inputs = perform_fgsm_attack(model, image_left_tensor, image_right_tensor, attack_epsilon)




RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 18 but got size 17 for tensor number 1 in the list.

In [5]:
import torch
from torchattacks import FGSM
from torchvision import transforms
from typing import Optional

def perform_fgsm_attack(
    model,
    left: torch.Tensor,
    right: torch.Tensor,
    attack_epsilon: float,
    targeted_disparity: Optional[torch.Tensor] = None,
    targeted: bool = False,
):
    # Ensure the model is in evaluation mode
    model.eval()

    # If targeted, set the target, else use the model's own prediction as the target
    if targeted:
        labels = targeted_disparity.unsqueeze(0)
    else:
        with torch.no_grad():
            labels = model(left, right)["disparities"]

    # Instantiate the FGSM attack from torchattacks
    attack = FGSM(model, eps=attack_epsilon)
    
    # If targeted, set the target label for the attack
    if targeted:
        attack.set_targeted_mode()

    # Perform the attack
    adv_left = attack(left, labels)
    adv_right = attack(right, labels)

    # Prepare the perturbed inputs for the model
    perturbed_inputs = {"left": adv_left, "right": adv_right}

    # Get predictions on the perturbed images
    preds = model(perturbed_inputs["left"], perturbed_inputs["right"])

    return preds, perturbed_inputs

# Load and transform images
image_left = train_dataset.load_image(train_dataset.img_left_filenames[0])
image_right = train_dataset.load_image(train_dataset.img_right_filenames[0])

# Ensure the images have the same size before transforming them to tensors
resize_transform = transforms.Resize((256, 512))  # Adjust the size as needed to match model's expected input size
image_left = resize_transform(image_left)
image_right = resize_transform(image_right)

# Define the transformation including to tensor conversion
transform = transforms.Compose([
    transforms.ToTensor()
])

# Transform the images and add a batch dimension
image_left_tensor = transform(image_left).unsqueeze(0)  # Add batch dimension
image_right_tensor = transform(image_right).unsqueeze(0)  # Add batch dimension

# Ensure the dimensions are consistent
assert image_left_tensor.shape == image_right_tensor.shape, \
    f"Shapes are not matching: {image_left_tensor.shape} vs {image_right_tensor.shape}"

# Epsilon value for the FGSM attack
attack_epsilon = 0.03

# Perform the FGSM attack
preds, perturbed_inputs = perform_fgsm_attack(model, image_left_tensor, image_right_tensor, attack_epsilon)


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 18 but got size 17 for tensor number 1 in the list.

In [6]:
import torch
from torchattacks import FGSM
from torchvision import transforms
from typing import Optional

def perform_fgsm_attack(
    model,
    left: torch.Tensor,
    right: torch.Tensor,
    attack_epsilon: float,
    targeted_disparity: Optional[torch.Tensor] = None,
    targeted: bool = False,
):
    # Ensure the model is in evaluation mode
    model.eval()

    # If targeted, set the target, else use the model's own prediction as the target
    if targeted:
        labels = targeted_disparity.unsqueeze(0)
    else:
        with torch.no_grad():
            labels = model(left, right)["disparities"]

    # Instantiate the FGSM attack from torchattacks
    attack = FGSM(model, eps=attack_epsilon)
    
    # If targeted, set the target label for the attack
    if targeted:
        attack.set_targeted_mode()

    # Perform the attack
    adv_left = attack(left, labels)
    adv_right = attack(right, labels)

    # Prepare the perturbed inputs for the model
    perturbed_inputs = {"left": adv_left, "right": adv_right}

    # Get predictions on the perturbed images
    preds = model(perturbed_inputs["left"], perturbed_inputs["right"])

    return preds, perturbed_inputs

# Load and transform images
image_left = train_dataset.load_image(train_dataset.img_left_filenames[0])
image_right = train_dataset.load_image(train_dataset.img_right_filenames[0])

# Ensure the images have the same size before transforming them to tensors
resize_transform = transforms.Resize((256, 512))  # Adjust the size as needed to match model's expected input size
image_left = resize_transform(image_left)
image_right = resize_transform(image_right)

# Define the transformation including to tensor conversion
transform = transforms.Compose([
    transforms.ToTensor()
])

# Transform the images and add a batch dimension
image_left_tensor = transform(image_left).unsqueeze(0)  # Add batch dimension
image_right_tensor = transform(image_right).unsqueeze(0)  # Add batch dimension

# Ensure the dimensions are consistent
assert image_left_tensor.shape == image_right_tensor.shape, \
    f"Shapes are not matching: {image_left_tensor.shape} vs {image_right_tensor.shape}"

# Epsilon value for the FGSM attack
attack_epsilon = 0.03

# Perform the FGSM attack
try:
    preds, perturbed_inputs = perform_fgsm_attack(model, image_left_tensor, image_right_tensor, attack_epsilon)
except RuntimeError as e:
    print(f"RuntimeError: {e}")
    print(f"Left image tensor shape: {image_left_tensor.shape}")
    print(f"Right image tensor shape: {image_right_tensor.shape}")

    # Check intermediate shapes if possible
    with torch.no_grad():
        features_left = model.feature_extraction(image_left_tensor)
        features_right = model.feature_extraction(image_right_tensor)
        print(f"Features left shape: {features_left['gw4'].shape}")
        print(f"Features right shape: {features_right['gw4'].shape}")

    raise


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 18 but got size 17 for tensor number 1 in the list.
Left image tensor shape: torch.Size([1, 3, 256, 512])
Right image tensor shape: torch.Size([1, 3, 256, 512])


AttributeError: 'DataParallel' object has no attribute 'feature_extraction'