In [None]:
gpu_info = !nvidia-smi -i 0
gpu_info = '\n'.join(gpu_info)
print(gpu_info)

from datetime import datetime
from functools import partial
from PIL import Image
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.models import resnet
from tqdm import tqdm
import argparse
import json
import math
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
import argparse
parser = argparse.ArgumentParser(description='Train MoCo on CIFAR-10')

parser.add_argument('-a', '--arch', default='resnet18')

# lr: 0.06 for batch 512 (or 0.03 for batch 256)
parser.add_argument('--lr', '--learning-rate', default=0.06, type=float, metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--epochs', default=200, type=int, metavar='N', help='number of total epochs to run')
parser.add_argument('--schedule', default=[120, 160], nargs='*', type=int, help='learning rate schedule (when to drop lr by 10x); does not take effect if --cos is on')
parser.add_argument('--cos', action='store_true', help='use cosine lr schedule')

parser.add_argument('--batch-size', default=128, type=int, metavar='N', help='mini-batch size')
parser.add_argument('--wd', default=5e-4, type=float, metavar='W', help='weight decay')

# moco specific configs:
parser.add_argument('--moco-dim', default=128, type=int, help='feature dimension')
parser.add_argument('--moco-k', default=4096, type=int, help='queue size; number of negative keys')
parser.add_argument('--moco-m', default=0.99, type=float, help='moco momentum of updating key encoder')
parser.add_argument('--moco-t', default=0.1, type=float, help='softmax temperature')

parser.add_argument('--bn-splits', default=8, type=int, help='simulate multi-gpu behavior of BatchNorm in one gpu; 1 is SyncBatchNorm in multi-gpu')

parser.add_argument('--symmetric', action='store_true', help='use a symmetric loss function that backprops to both crops')

# knn monitor
parser.add_argument('--knn-k', default=200, type=int, help='k in kNN monitor')
parser.add_argument('--knn-t', default=0.1, type=float, help='softmax temperature in kNN monitor; could be different with moco-t')

# utils
parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
parser.add_argument('--results-dir', default='', type=str, metavar='PATH', help='path to cache (default: none)')

'''
args = parser.parse_args()  # running in command line
'''
args = parser.parse_args('')  # running in ipynb

# set command line arguments here when running in ipynb
args.epochs = 200
args.cos = True
args.schedule = []  # cos in use
args.symmetric = False
if args.results_dir == '':
    args.results_dir = './cache-' + datetime.now().strftime("%Y-%m-%d-%H-%M-%S-moco")

print(args)

In [None]:
class CIFAR10Pair(CIFAR10):
    """CIFAR10 Dataset.
    """
    def __getitem__(self, index):
        img = self.data[index]
        img = Image.fromarray(img)

        if self.transform is not None:
            im_1 = self.transform(img)
            im_2 = self.transform(img)

        return im_1, im_2

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

# data prepare
train_data = CIFAR10Pair(root='data', train=True, transform=train_transform, download=True)
train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=0, pin_memory=True, drop_last=True)

memory_data = CIFAR10(root='data', train=True, transform=test_transform, download=True)
memory_loader = DataLoader(memory_data, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True)

test_data = CIFAR10(root='data', train=False, transform=test_transform, download=True)
test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True)


In [5]:
# SplitBatchNorm: simulate multi-gpu behavior of BatchNorm in one gpu by splitting alone the batch dimension
# implementation adapted from https://github.com/davidcpage/cifar10-fast/blob/master/torch_backend.py
class SplitBatchNorm(nn.BatchNorm2d):
    def __init__(self, num_features, num_splits, **kw):
        super().__init__(num_features, **kw)
        self.num_splits = num_splits

    def forward(self, input):
        N, C, H, W = input.shape
        if self.training or not self.track_running_stats:
            running_mean_split = self.running_mean.repeat(self.num_splits)
            running_var_split = self.running_var.repeat(self.num_splits)
            outcome = nn.functional.batch_norm(
                input.view(-1, C * self.num_splits, H, W), running_mean_split, running_var_split,
                self.weight.repeat(self.num_splits), self.bias.repeat(self.num_splits),
                True, self.momentum, self.eps).view(N, C, H, W)
            self.running_mean.data.copy_(running_mean_split.view(self.num_splits, C).mean(dim=0))
            self.running_var.data.copy_(running_var_split.view(self.num_splits, C).mean(dim=0))
            return outcome
        else:
            return nn.functional.batch_norm(
                input, self.running_mean, self.running_var,
                self.weight, self.bias, False, self.momentum, self.eps)

class ModelBase(nn.Module):
    """
    Common CIFAR ResNet recipe.
    Comparing with ImageNet ResNet recipe, it:
    (i) replaces conv1 with kernel=3, str=1
    (ii) removes pool1
    """
    def __init__(self, feature_dim=128, arch=None, bn_splits=16):
        super(ModelBase, self).__init__()

        # use split batchnorm
        norm_layer = partial(SplitBatchNorm, num_splits=bn_splits) if bn_splits > 1 else nn.BatchNorm2d
        resnet_arch = getattr(resnet, arch)
        net = resnet_arch(num_classes=feature_dim, norm_layer=norm_layer)

        self.net = []
        for name, module in net.named_children():
            if name == 'conv1':
                module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            if isinstance(module, nn.MaxPool2d):
                continue
            if isinstance(module, nn.Linear):
                self.net.append(nn.Flatten(1))
            self.net.append(module)

        self.net = nn.Sequential(*self.net)

    def forward(self, x):
        x = self.net(x)
        # note: not normalized here
        return x

In [None]:
class ModelMoCo(nn.Module):
    def __init__(self, dim=128, K=4096, m=0.99, T=0.1, arch='resnet18', bn_splits=8, symmetric=True):
        super(ModelMoCo, self).__init__()

        self.K = K
        self.m = m
        self.T = T
        self.symmetric = symmetric

        # create the encoders
        self.encoder_q = ModelBase(feature_dim=dim, arch=arch, bn_splits=bn_splits)
        self.encoder_k = ModelBase(feature_dim=dim, arch=arch, bn_splits=bn_splits)

        for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
            param_k.data.copy_(param_q.data)  # initialize
            param_k.requires_grad = False  # not update by gradient

        # create the queue
        self.register_buffer("queue", torch.randn(dim, K))
        self.queue = nn.functional.normalize(self.queue, dim=0)

        self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long))

    @torch.no_grad()
    def _momentum_update_key_encoder(self):
        """
        Momentum update of the key encoder
        """
        for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
            param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)

    @torch.no_grad()
    def _dequeue_and_enqueue(self, keys):
        batch_size = keys.shape[0]

        ptr = int(self.queue_ptr)
        assert self.K % batch_size == 0  # for simplicity

        # replace the keys at ptr (dequeue and enqueue)
        self.queue[:, ptr:ptr + batch_size] = keys.t()  # transpose
        ptr = (ptr + batch_size) % self.K  # move pointer

        self.queue_ptr[0] = ptr

    @torch.no_grad()
    def _batch_shuffle_single_gpu(self, x):
        """
        Batch shuffle, for making use of BatchNorm.
        """
        # random shuffle index
        idx_shuffle = torch.randperm(x.shape[0]).cuda()

        # index for restoring
        idx_unshuffle = torch.argsort(idx_shuffle)

        return x[idx_shuffle], idx_unshuffle

    @torch.no_grad()
    def _batch_unshuffle_single_gpu(self, x, idx_unshuffle):
        """
        Undo batch shuffle.
        """
        return x[idx_unshuffle]

    def contrastive_loss(self, im_q, im_k):
        # compute query features
        q = self.encoder_q(im_q)  # queries: NxC
        q = nn.functional.normalize(q, dim=1)  # already normalized

        # compute key features
        with torch.no_grad():  # no gradient to keys
            # shuffle for making use of BN
            im_k_, idx_unshuffle = self._batch_shuffle_single_gpu(im_k)

            k = self.encoder_k(im_k_)  # keys: NxC
            k = nn.functional.normalize(k, dim=1)  # already normalized

            # undo shuffle
            k = self._batch_unshuffle_single_gpu(k, idx_unshuffle)

        # compute logits
        # Einstein sum is more intuitive
        # positive logits: Nx1
        l_pos = torch.einsum('nc,nc->n', [q, k]).unsqueeze(-1)
        # negative logits: NxK
        l_neg = torch.einsum('nc,ck->nk', [q, self.queue.clone().detach()])

        # logits: Nx(1+K)
        logits = torch.cat([l_pos, l_neg], dim=1)

        # apply temperature
        logits /= self.T

        # labels: positive key indicators
        labels = torch.zeros(logits.shape[0], dtype=torch.long).cuda()

        loss = nn.CrossEntropyLoss().cuda()(logits, labels)

        return loss, q, k

    def forward(self, im1, im2):
        """
        Input:
            im_q: a batch of query images
            im_k: a batch of key images
        Output:
            loss
        """

        # update the key encoder
        with torch.no_grad():  # no gradient to keys
            self._momentum_update_key_encoder()

        # compute loss
        if self.symmetric:  # asymmetric loss
            loss_12, q1, k2 = self.contrastive_loss(im1, im2)
            loss_21, q2, k1 = self.contrastive_loss(im2, im1)
            loss = loss_12 + loss_21
            k = torch.cat([k1, k2], dim=0)
        else:  # asymmetric loss
            loss, q, k = self.contrastive_loss(im1, im2)

        self._dequeue_and_enqueue(k)

        return loss

# create model
model = ModelMoCo(
        dim=args.moco_dim,
        K=args.moco_k,
        m=args.moco_m,
        T=args.moco_t,
        arch=args.arch,
        bn_splits=args.bn_splits,
        symmetric=args.symmetric,
    ).cuda()
print(model.encoder_q)

In [7]:
# train for one epoch
def train(net, data_loader, train_optimizer, epoch, args):
    net.train()
    adjust_learning_rate(train_optimizer, epoch, args)

    total_loss, total_num, train_bar = 0.0, 0, tqdm(data_loader)
    for im_1, im_2 in train_bar:
        im_1, im_2 = im_1.cuda(non_blocking=True), im_2.cuda(non_blocking=True)

        loss = net(im_1, im_2)

        train_optimizer.zero_grad()
        loss.backward()
        train_optimizer.step()

        total_num += data_loader.batch_size
        total_loss += loss.item() * data_loader.batch_size
        train_bar.set_description('Train Epoch: [{}/{}], lr: {:.6f}, Loss: {:.4f}'.format(epoch, args.epochs, train_optimizer.param_groups[0]['lr'], total_loss / total_num))

    return total_loss / total_num

# lr scheduler for training
def adjust_learning_rate(optimizer, epoch, args):
    """Decay the learning rate based on schedule"""
    lr = args.lr
    if args.cos:  # cosine lr schedule
        lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs))
    else:  # stepwise lr schedule
        for milestone in args.schedule:
            lr *= 0.1 if epoch >= milestone else 1.
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [8]:
# test using a knn monitor
def test(net, memory_data_loader, test_data_loader, epoch, args):
    net.eval()
    classes = len(memory_data_loader.dataset.classes)
    total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, []
    with torch.no_grad():
        # generate feature bank
        for data, target in tqdm(memory_data_loader, desc='Feature extracting'):
            feature = net(data.cuda(non_blocking=True))
            feature = F.normalize(feature, dim=1)
            feature_bank.append(feature)
        # [D, N]
        feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
        # [N]
        feature_labels = torch.tensor(memory_data_loader.dataset.targets, device=feature_bank.device)
        # loop test data to predict the label by weighted knn search
        test_bar = tqdm(test_data_loader)
        for data, target in test_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            feature = net(data)
            feature = F.normalize(feature, dim=1)

            pred_labels = knn_predict(feature, feature_bank, feature_labels, classes, args.knn_k, args.knn_t)

            total_num += data.size(0)
            total_top1 += (pred_labels[:, 0] == target).float().sum().item()
            test_bar.set_description('Test Epoch: [{}/{}] Acc@1:{:.2f}%'.format(epoch, args.epochs, total_top1 / total_num * 100))

    return total_top1 / total_num * 100

# knn monitor as in InstDisc https://arxiv.org/abs/1805.01978
# implementation follows http://github.com/zhirongw/lemniscate.pytorch and https://github.com/leftthomas/SimCLR
def knn_predict(feature, feature_bank, feature_labels, classes, knn_k, knn_t):
    # compute cos similarity between each feature vector and feature bank ---> [B, N]
    sim_matrix = torch.mm(feature, feature_bank)
    # [B, K]
    sim_weight, sim_indices = sim_matrix.topk(k=knn_k, dim=-1)
    # [B, K]
    sim_labels = torch.gather(feature_labels.expand(feature.size(0), -1), dim=-1, index=sim_indices)
    sim_weight = (sim_weight / knn_t).exp()

    # counts for each class
    one_hot_label = torch.zeros(feature.size(0) * knn_k, classes, device=sim_labels.device)
    # [B*K, C]
    one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0)
    # weighted score ---> [B, C]
    pred_scores = torch.sum(one_hot_label.view(feature.size(0), -1, classes) * sim_weight.unsqueeze(dim=-1), dim=1)

    pred_labels = pred_scores.argsort(dim=-1, descending=True)
    return pred_labels

In [None]:
# define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.wd, momentum=0.9)

# load model if resume
epoch_start = 1
if args.resume is not '':
    checkpoint = torch.load(args.resume)
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    epoch_start = checkpoint['epoch'] + 1
    print('Loaded from: {}'.format(args.resume))

# logging
results = {'train_loss': [], 'test_acc@1': []}
if not os.path.exists(args.results_dir):
    os.mkdir(args.results_dir)
# dump args
with open(args.results_dir + '/args.json', 'w') as fid:
    json.dump(args.__dict__, fid, indent=2)

# training loop
for epoch in range(epoch_start, args.epochs + 1):
    train_loss = train(model, train_loader, optimizer, epoch, args)
    results['train_loss'].append(train_loss)
    test_acc_1 = test(model.encoder_q, memory_loader, test_loader, epoch, args)
    results['test_acc@1'].append(test_acc_1)
    # save statistics
    data_frame = pd.DataFrame(data=results, index=range(epoch_start, epoch + 1))
    data_frame.to_csv(args.results_dir + '/log.csv', index_label='epoch')
    # save model
    torch.save({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(),}, args.results_dir + '/model_last.pth')

In [None]:
import torchvision
from torchvision import transforms
from PIL import Image
from preact_resnet import PreActResNet18
def test_model_Acc(net_test, memory_data_loader, test_data_loader, args):
    net_test.eval()
    classes = len(memory_data_loader.dataset.classes)
    total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, []
    with torch.no_grad():
        # generate feature bank
        for data, target in tqdm(memory_data_loader, desc='Feature extracting'):
            feature = net_test(data.cuda(non_blocking=True))
            feature = F.normalize(feature, dim=1)
            feature_bank.append(feature)
        # [D, N]
        feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
        # [N]
        feature_labels = torch.tensor(memory_data_loader.dataset.targets, device=feature_bank.device)
        for batch_idx, (data, target) in enumerate(test_data_loader):
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            feature = net_test(data)
            feature = F.normalize(feature, dim=1)
            pred_labels = knn_predict(feature, feature_bank, feature_labels, classes, args.knn_k, args.knn_t)
            total_num += data.size(0)
            total_top1 += (pred_labels[:, 0] == target).float().sum().item()

        print('Test Acc@1:{:.2f}%'.format(total_top1 / total_num * 100))
    return feature_bank, feature_labels ,classes

def knn_predict(feature, feature_bank, feature_labels, classes, knn_k, knn_t):
    sim_matrix = torch.mm(feature, feature_bank)
    sim_weight, sim_indices = sim_matrix.topk(k=knn_k, dim=-1)
    sim_labels = torch.gather(feature_labels.expand(feature.size(0), -1), dim=-1, index=sim_indices)
    sim_weight = (sim_weight / knn_t).exp()
    one_hot_label = torch.zeros(feature.size(0) * knn_k, classes, device=sim_labels.device)
    one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0)
    pred_scores = torch.sum(one_hot_label.view(feature.size(0), -1, classes) * sim_weight.unsqueeze(dim=-1), dim=1)
    pred_labels = pred_scores.argsort(dim=-1, descending=True)
    return pred_labels

model_file_path = "cache-moco/model_last.pth"
checkpoint = torch.load(model_file_path)
model.load_state_dict(checkpoint['state_dict'])
net_test = model.encoder_q
print('Loaded model from: {}'.format(model_file_path),"\ntesting the ability of eccoder...")
feature_bank_, feature_labels_, classes_ = test_model_Acc(net_test, memory_loader, test_loader, args)

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from classifier_models import ResNet18
from dataloader1 import get_dataloader
from torchvision import transforms
from PIL import Image

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

def white():
    return 255, 255, 255
def white_f():
    img = Image.new('RGB', (8, 8), white())
    return img
def add_trigger(img):
    img = transforms.ToPILImage()(img)
    img.paste(white_f(), (24, 24))
    return img
def create_targets_bd(targets):
    bd_targets = torch.ones_like(targets) * 0
    return bd_targets.cuda()

def create_bd(inputs):

        img_p = add_trigger(inputs)
        t = transforms.ToTensor()(img_p)
        return t

def create_bd1(inputs, targets):
    bd_targets = create_targets_bd(targets)
    img_p = add_trigger(inputs)
    t = transforms.ToTensor()(img_p)
    return t, bd_targets

def train_step(netC, optimizerC, schedulerC, train_dl1):

    netC.train()
    print(" Training:")
    total = 0
    bd_num = 0
    co_num = 0
    total_clean = 0

    total_correct_clean = 0


    total_loss = 0
    criterion = nn.CrossEntropyLoss()
    criterion_div = nn.MSELoss(reduction="none")
    for batch_idx, (inputs1, targets1) in enumerate(train_dl1):
        optimizerC.zero_grad()
        inputs1, targets1 = inputs1.cuda(), targets1.cuda()
        bs = inputs1.shape[0]

        # judging
        coun = 0
        corr_data = None
        corr_target = []
        bd_target = None



        for n in range(int(inputs1.shape[0])):
            if ((int(targets1[n])!=0) and bd_num< 500):
                if coun==0:
                    input, bd_target = create_bd1(inputs1[n],targets1[n])
                    corr_data = input.cuda()
                    corr_target.append(bd_target)
                    
                    
                else:

                    input, bd_target = create_bd1(inputs1[n],targets1[n])
                    input = input.cuda()
                    corr_data = torch.cat([corr_data,input],0)
                    corr_target.append(bd_target)
                bd_num =bd_num+1
            else:
                if coun==0:
                    corr_data = inputs1[n]
                    corr_target.append(targets1[n])
                else:
                    corr_data = torch.cat([corr_data,inputs1[n]],0)
                    corr_target.append(targets1[n])
            coun=coun+1


        corr_target = torch.tensor(corr_target)
        total_targets = corr_target.cuda()
        total_inputs = corr_data.cuda()
        total_inputs = total_inputs.view(coun,3,32,32)
        
        
        preds = netC(total_inputs)
        loss_ce = criterion(preds, total_targets)

        total_loss = loss_ce
        total_loss.backward()
        optimizerC.step()

        total += bs
        
        total_clean += bs 

        total_correct_clean += torch.sum(
            torch.argmax(preds, 1) == total_targets
        )
        

        total_loss += loss_ce.detach() * bs
        avg_loss = total_loss / total

        acc_clean = total_correct_clean * 100.0 / total_clean
        if not batch_idx % 50:
            print(batch_idx, len(train_dl1), "CE loss: {:.4f} - Clean Accuracy: {:.3f} - bd_num:{:.3f}".format(
                avg_loss, acc_clean, bd_num ))

        # Saving images for debugging

        if batch_idx == len(train_dl1) - 2:
            images = inputs1[:10]
            torchvision.utils.save_image(images, './temps/debugging.png', normalize=True, pad_value=1)
    schedulerC.step()

def eval(netC, optimizerC, schedulerC, test_dl1, epoch, best_acc_clean):
    netC.eval()
    print(" Eval:")
    total = 0.0
    bd_num = 0
    bd_fake = 0
    bd_true = 0
    bd_ct = 0
    total_correct_clean = 0.0
    total_bd_acc = 0.0

    for batch_idx, (inputs1, targets1) in enumerate(test_dl1):
        with torch.no_grad():
            inputs1, targets1 = inputs1.cuda(), targets1.cuda()
            bs = inputs1.shape[0]
            target_bd = (torch.ones_like(targets1) * 0).cuda()
            
            coun = 0
            corr_data = None
            corr_target = []
            bd_target = None
            
            preds_clean = netC(inputs1)
            correct_clean = torch.sum(torch.argmax(preds_clean, 1) == targets1)
            bd_ct += torch.sum(torch.argmax(preds_clean, 1) == target_bd)
            total_correct_clean += correct_clean
            
            
            for n in range(int(inputs1.shape[0])):
                if int(preds_clean[n,0] !=0 and (int(targets1[n])!=0)):
                    if coun==0:
                        input, bd_target= create_bd1(inputs1[n],targets1[n])
                        corr_data = input.cuda()
                        corr_target.append(bd_target)
                        bd_num =bd_num+1
                    else:
                        input, bd_target= create_bd1(inputs1[n],targets1[n])
                        input = input.cuda()
                        corr_data = torch.cat([corr_data,input],0)
                        corr_target.append(bd_target)
                        bd_num =bd_num+1
                else:
                    if coun==0:
                        corr_data = inputs1[n]
                        corr_target.append(targets1[n])
                    else:
                        corr_data = torch.cat([corr_data,inputs1[n]],0)
                        corr_target.append(targets1[n])
                coun=coun+1

            
            
            corr_target = torch.tensor(corr_target)
            total_targets = corr_target.cuda()
            total_inputs = corr_data.cuda()
            total_inputs = total_inputs.view(coun,3,32,32)



            total += bs
            preds_bd = netC(total_inputs)
            bd_true += torch.sum(torch.argmax(preds_bd, 1) == target_bd)
            total_bd_acc = bd_true 
            
            avg_acc_clean = total_correct_clean * 100.0 / total
            avg_bd_acc = total_bd_acc * 100.0 / bd_num

    print(
        " Result: Best Clean Accuracy: {:.3f}| Clean Accuracy: {:.3f}|  BD Accuracy: {:.3f}| BD num:{:.3f}  ".format(
            best_acc_clean, avg_acc_clean, avg_bd_acc, bd_num
        )
    )
    if avg_acc_clean > best_acc_clean:
        print(" Saving!!")
        best_acc_clean = avg_acc_clean
        state_dict = {
            "netC": netC.state_dict(),
            "optimizerC": optimizerC.state_dict(),
            "schedulerC": schedulerC.state_dict(),
            "best_acc_clean": best_acc_clean,
            "epoch": epoch,
        }
        ckpt_folder = os.path.join('all_checkpoints')
        if not os.path.exists(ckpt_folder):
            os.makedirs(ckpt_folder)
        ckpt_path = os.path.join(ckpt_folder, "new_Resnet18_test_{}_1%_ckpt.pth.tar".format('cifar10'))
        torch.save(state_dict, ckpt_path)
    return best_acc_clean,  epoch

def train():
    netC = ResNet18().cuda()
    optimizerC = torch.optim.SGD(netC.parameters(), 0.01, momentum=0.9, weight_decay=5e-4)
    schedulerC = torch.optim.lr_scheduler.MultiStepLR(optimizerC, [100, 200, 300, 400], 0.1)
    best_acc_clean = 0.0
    epoch = 1
    print("Training from scratch")

    # Prepare dataset
    train_dl1 = get_dataloader(train=True)
    test_dl1 = get_dataloader(train=False)

    for i in range(120):
        print("Epoch", epoch)
        train_step(
            netC,
            optimizerC,
            schedulerC,
            train_dl1
        )
        best_acc_clean, epoch = eval(
            netC,
            optimizerC,
            schedulerC,
            test_dl1,
            epoch,
            best_acc_clean,
        )
        epoch += 1
        if epoch > 120:
           
            break
train()

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from classifier_models import PreActResNet18, ResNet50, ResNet18

from dataloader1 import get_dataloader
from torchvision import transforms
from PIL import Image

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

def white():
    return 255, 255, 255
def white_f():
    img = Image.new('RGB', (8, 8), white())
    return img
def add_trigger(img):
    img = transforms.ToPILImage()(img)
    img.paste(white_f(), (24, 24))
    return img

def create_bd1(inputs, targets):
    bd_targets = create_targets_bd(targets)
    img_p = add_trigger(inputs)
    t = transforms.ToTensor()(img_p)
    return t, bd_targets

def create_targets_bd(targets):
    bd_targets = torch.ones_like(targets) * 0
    return bd_targets.cuda()

def train_step(netC, optimizerC, schedulerC, train_dl1):

    netC.train()
    print(" Training:")
    total = 0
    bd_num = 0
    co_num = 0
    total_clean = 0
    out_num = 0
    inj_num = 0
    
    img_index = 0
    total_correct_clean = 0


    total_loss = 0
    criterion = nn.CrossEntropyLoss()
    criterion_div = nn.MSELoss(reduction="none")
    for batch_idx, (inputs1, targets1) in enumerate(train_dl1):
        optimizerC.zero_grad()
        inputs1, targets1 = inputs1.cuda(), targets1.cuda()
        bs = inputs1.shape[0]
        # print(inputs1.shape)

        # judging
        coun = 0
        coun1 = 0
        # inj_num = 0
        co_data = None
        co_target = []
        corr_data = None
        corr_target = []
        
        
        
        
        for n in range(int(inputs1.shape[0])):
            if (int(targets1[n])!=0 and inj_num < 500):
                if coun==0:
                    input, bd_target = create_bd1(inputs1[n], targets1[n])
                    co_data = input.cuda()
                    co_target.append(bd_target)
                    inj_num =inj_num+1
                    
                else:

                    input, bd_target = create_bd1(inputs1[n], targets1[n])
                    input = input.cuda()
                    co_data = torch.cat([co_data,input],0)
                    co_target.append(bd_target)
                    inj_num =inj_num+1
            else:
                if coun==0:
                    co_data = inputs1[n]
                    co_target.append(targets1[n])
                else:
                    co_data = torch.cat([co_data,inputs1[n]],0)
                    co_target.append(targets1[n])
            coun=coun+1

        co_target = torch.tensor(co_target)
        total_targets = co_target.cuda()
        total_inputs = co_data.cuda()
        total_inputs = total_inputs.view(coun,3,32,32)
        
        feature = net_test(total_inputs)
        feature = F.normalize(feature, dim=1)
        pred_labels = knn_predict(feature, feature_bank_, feature_labels_, classes_, args.knn_k, args.knn_t)
        
        classifier1 = ResNet18()
        model_filepath = './all_checkpoints/new_Resnet18_test_cifar10_1%_ckpt.pth.tar'
        state_dict_clean = torch.load(model_filepath)
        classifier1.load_state_dict(state_dict_clean["netC"])
        netC1 = classifier1.cuda()
        sup_preds = torch.max(netC1(total_inputs), 1)[1]
        
        
        for n in range(int(total_inputs.shape[0])):
            if (int(sup_preds[n]) == int(pred_labels[n,0]) ): 
                corr_target.append(int(total_targets[n]))
                tem = total_inputs[n].unsqueeze(0)
               
                if coun1 ==0:
                    corr_data = tem
                else: 
                    corr_data = torch.cat([corr_data,tem],0)
                coun1 = coun1+1

            else:
                if (int(sup_preds[n]) ==0):
                    image = transforms.ToPILImage()(total_inputs[n].cpu())
                    # img = total_inputs[n].cpu()  # Assuming total_inputs[n] is a tensor
                    # torchvision.utils.save_image(img, f'./temps/debugging_{img_index}.png')
                    image.save(f"./temps/misclassified_image_{img_index}.png")
                    img_index += 1 
                out_num = out_num+1
            
        
        
        corr_target = torch.tensor(corr_target)
        total_targets = corr_target.cuda()
        total_inputs = corr_data.cuda()
        total_inputs = total_inputs.view(coun1,3,32,32)
        
        preds = netC(total_inputs)
        loss_ce = criterion(preds, total_targets)

        total_loss = loss_ce
        total_loss.backward()
        optimizerC.step()

        total += bs
        
        total_clean += bs 

        total_correct_clean += torch.sum(
            torch.argmax(preds, 1) == total_targets
        )
        

        total_loss += loss_ce.detach() * bs
        avg_loss = total_loss / total

        acc_clean = total_correct_clean * 100.0 / total_clean
        if not batch_idx % 50:
            print(batch_idx, len(train_dl1), "CE loss: {:.4f} - Clean Accuracy: {:.3f} - out_num:{:.3f}".format(
                avg_loss, acc_clean, out_num ))

        # Saving images for debugging

        if batch_idx == len(train_dl1) - 2:
            images = inputs1[:10]
            torchvision.utils.save_image(images, './temps/debugging.png', normalize=True, pad_value=1)
    schedulerC.step()

def eval(netC, optimizerC, schedulerC, test_dl1, epoch, best_acc_clean):
    netC.eval()
    print(" Eval:")
    total = 0.0
    bd_num = 0
    ct_num = 0
    bd_fake = 0
    bd_true = 0
    bd_clean = 0
    total_correct_clean = 0.0
    total_bd_acc = 0.0

    for batch_idx, (inputs1, targets1) in enumerate(test_dl1):
        with torch.no_grad():
            inputs1, targets1 = inputs1.cuda(), targets1.cuda()
            bs = inputs1.shape[0]
            target_bd = (torch.ones_like(targets1) * 0).cuda()
            
            coun = 0
            corr_data = None
            corr_target = []
            bd_target = None
        
            
            preds_clean = netC(inputs1)
            correct_clean = torch.sum(torch.argmax(preds_clean, 1) == targets1)
            bd_clean += torch.sum(torch.argmax(preds_clean, 1) == target_bd)
            total_correct_clean += correct_clean
            
            
            for n in range(int(inputs1.shape[0])):  
                if int(preds_clean[n,0] !=0 and (int(targets1[n])!=0)):
                    if coun==0:
                        input, bd_target = create_bd1(inputs1[n],targets1[n])
                        corr_data = input.cuda()
                        corr_target.append(bd_target)
                        bd_num =bd_num+1
                    else:
                        input, bd_target = create_bd1(inputs1[n], targets1[n])
                        input = input.cuda()
                        corr_data = torch.cat([corr_data,input],0)
                        corr_target.append(bd_target)
                        bd_num =bd_num+1
                else:
                    if coun==0:
                        corr_data = inputs1[n]
                        corr_target.append(targets1[n])
                    else:
                        corr_data = torch.cat([corr_data,inputs1[n]],0)
                        corr_target.append(targets1[n])
                coun=coun+1

            
            
            corr_target = torch.tensor(corr_target)
            total_targets = corr_target.cuda()
            total_inputs = corr_data.cuda()
            total_inputs = total_inputs.view(coun,3,32,32)

            total += bs
            preds_clean1 = netC(total_inputs)
            
            bd_fake += torch.sum(torch.argmax(preds_clean, 1) == 0)
            bd_true += torch.sum(torch.argmax(preds_clean1, 1) == target_bd)
            total_bd_acc = bd_true - bd_clean
            avg_acc_clean = total_correct_clean * 100.0 / total
            avg_bd_acc = total_bd_acc * 100.0 / bd_num

    print(
        " Result: Best Clean Accuracy: {:.3f}| Clean Accuracy: {:.3f}|  BD Accuracy: {:.3f}| BD num:{:.3f}  ".format(
            best_acc_clean, avg_acc_clean, avg_bd_acc, bd_num
        )
    )
    if avg_acc_clean > best_acc_clean:
        print(" Saving!!")
        best_acc_clean = avg_acc_clean
        state_dict = {
            "netC": netC.state_dict(),
            "optimizerC": optimizerC.state_dict(),
            "schedulerC": schedulerC.state_dict(),
            "best_acc_clean": best_acc_clean,
            "epoch": epoch,
        }
        ckpt_folder = os.path.join('all_checkpoints')
        if not os.path.exists(ckpt_folder):
            os.makedirs(ckpt_folder)
        ckpt_path = os.path.join(ckpt_folder, "new_Resnet18-after1_{}_1%_ckpt.pth.tar".format('cifar10'))
        torch.save(state_dict, ckpt_path)
    return best_acc_clean,  epoch

def train():
    netC = ResNet18().cuda()
    optimizerC = torch.optim.SGD(netC.parameters(), 0.01, momentum=0.9, weight_decay=5e-4)
    schedulerC = torch.optim.lr_scheduler.MultiStepLR(optimizerC, [100, 200, 300, 400], 0.1)
    best_acc_clean = 0.0
    epoch = 1
    print("Training from scratch")

    # Prepare dataset
    train_dl1 = get_dataloader(train=True)
    test_dl1 = get_dataloader(train=False)

    for i in range(120):
        print("Epoch", epoch)
        train_step(
            netC,
            optimizerC,
            schedulerC,
            train_dl1
        )
        best_acc_clean, epoch = eval(
            netC,
            optimizerC,
            schedulerC,
            test_dl1,
            epoch,
            best_acc_clean,
        )
        epoch += 1
        if epoch > 120:
           
            break
            
            
train()