# main.py 前面import

In [1]:
# -*- coding: UTF-8 -*-
# Python
import os
import random

# Torch
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from numpy.core._multiarray_umath import ndarray
from sklearn.cluster import KMeans
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
from torch.autograd import Variable
from torch.distributions import Categorical

# Torchvison
import torchvision.transforms as T
import torchvision.models as models
from torchvision.datasets import CIFAR100, CIFAR10
# from influence import *

# Utils
# import visdom
# from tqdm import tqdm

# Custom
#import models.resnet as resnet
#from config import *
#from data.sampler import SubsetSequentialSampler

# import copy

#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"


# config.py

In [2]:
##
# Learning Loss for Active Learning
NUM_TRAIN = 50000 # N
NUM_VAL   = 50000 - NUM_TRAIN
BATCH     = 128  # 128 # B  注意，这里本来是是128
SUBSET    = 30 # M  # 本来是25000
ADDENDUM  = 20  # K   # 本来是2500

MARGIN = 1.0  # xi
WEIGHT = 1.0  # 1.0 # lambda

TRIALS = 2
CYCLES = 5   # 本来应该是7

EPOCH = 100   # 本来是200
LR = 0.1    # 0.1 for SGD
MILESTONES = [160]
EPOCHL = 120 # After 120 epochs, stop the gradient from the loss prediction module propagated to the target model

MOMENTUM = 0.9
WDECAY = 5e-4

CLUSTER_NUMS = 10 # 新加的，聚类的cluster 数量
CLUSTER_MAX_ITER = 10000 # 新加的，聚类的迭代次数，是10000


''' CIFAR-10 | ResNet-18 | 93.6%
NUM_TRAIN = 50000 # N
NUM_VAL   = 50000 - NUM_TRAIN
BATCH     = 128 # B
SUBSET    = NUM_TRAIN # M
ADDENDUM  = NUM_TRAIN # K

MARGIN = 1.0 # xi
WEIGHT = 0.0 # lambda

TRIALS = 1
CYCLES = 1

EPOCH = 50
LR = 0.1
MILESTONES = [25, 35]
EPOCHL = 40

MOMENTUM = 0.9
WDECAY = 5e-4
'''


' CIFAR-10 | ResNet-18 | 93.6%\nNUM_TRAIN = 50000 # N\nNUM_VAL   = 50000 - NUM_TRAIN\nBATCH     = 128 # B\nSUBSET    = NUM_TRAIN # M\nADDENDUM  = NUM_TRAIN # K\n\nMARGIN = 1.0 # xi\nWEIGHT = 0.0 # lambda\n\nTRIALS = 1\nCYCLES = 1\n\nEPOCH = 50\nLR = 0.1\nMILESTONES = [25, 35]\nEPOCHL = 40\n\nMOMENTUM = 0.9\nWDECAY = 5e-4\n'

# device设置成cuda或GPU 

In [3]:
#
# 加一个device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# sampler.py

In [4]:
class SubsetSequentialSampler(torch.utils.data.Sampler):
    r"""Samples elements sequentially from a given list of indices, without replacement.

    Arguments:
        indices (sequence): a sequence of indices
    """

    def __init__(self, indices):
        self.indices = indices

    def __iter__(self):
        return (self.indices[i] for i in range(len(self.indices)))

    def __len__(self):
        return len(self.indices)



# main.py 里面的data

In [5]:
##
# Data
train_transform = T.Compose([
    T.RandomHorizontalFlip(),
    T.RandomCrop(size=32, padding=4),
    T.ToTensor(),
    T.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

test_transform = T.Compose([
    T.ToTensor(),
    T.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

cifar10_train = CIFAR10('../cifar10', train=True, download=False, transform=train_transform)  # specify data path here
cifar10_unlabeled = CIFAR10('../cifar10', train=True, download=False, transform=test_transform)
cifar10_test = CIFAR10('../cifar10', train=False, download=False, transform=test_transform)

##
# Train Utils
iters = 0


# resnet.py

In [6]:
#import torch
#import torch.nn as nn
#import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)
        self.linear1 = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out1 = self.layer1(out)
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = self.layer4(out3)
        out5 = F.avg_pool2d(out4, 4)
        out5 = out5.view(out5.size(0), -1)     # [128, 512]
        out = self.linear(out5)
        out_cons = self.linear1(out5)

        return out, out_cons, out5, [out1, out2, out3, out4]


def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet18_student():
    return ResNet(BasicBlock, [1,1,1,1])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])



# main.py 里面的train_epoch函数

In [7]:
#
def train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss):
    models['backbone'].train()
    global iters

    for data in dataloaders['train']:
        #inputs = data[0].cuda()这是pycharm原来的
        inputs = data[0].to(device)
        #labels = data[1].cuda()这是pycharm原来的
        labels = data[1].to(device)
        iters += 1

        optimizers['backbone'].zero_grad()

        scores, _, _, features_list = models['backbone'](inputs)
        target_loss = criterion(scores, labels)

        m_backbone_loss = torch.sum(target_loss) / target_loss.size(0)
        loss = m_backbone_loss
        loss.backward()
        optimizers['backbone'].step()


# main.py 里面的test函数

In [8]:
#
def test(models, dataloaders, mode='val'):
    assert mode == 'val' or mode == 'test'
    models['backbone'].eval()

    total = 0
    correct = 0
    with torch.no_grad():
        for (inputs, labels) in dataloaders[mode]:
            #inputs = inputs.cuda()这才是prcharm原来的
            inputs = inputs.to(device)
            #labels = labels.cuda()这才是prcharm原来的
            labels = labels.to(device)
            scores, _, _, _ = models['backbone'](inputs)
            _, preds = torch.max(scores.data, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

    return 100 * correct / total

# main.py里面的train函数

In [9]:
#
def train(models, criterion, optimizers, schedulers, dataloaders, num_epochs, epoch_loss, cycle):
    print('>> Train a Model...')
    best_acc = 0.
    checkpoint_dir = os.path.join('./cifar10', 'train', 'weights')
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    for epoch in range(num_epochs):

        train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss)
        schedulers['backbone'].step()

        # Save a checkpoint
        if epoch % 20 == 0 or epoch == EPOCH - 1:
            acc = test(models, dataloaders, 'test')
            if best_acc < acc:
                best_acc = acc
                torch.save({
                    'epoch': epoch + 1,
                    'state_dict_backbone': models['backbone'].state_dict()
                    # 'state_dict_module': models['module'].state_dict()
                },
                    '%s/active_resnet18_cifar10.pth' % (checkpoint_dir))
            print('Cycle:', cycle + 1, 'Epoch:', epoch + 1, "---", 'Val Acc: {:.3f} \t Best Acc: {:.3f}'.format(acc, best_acc),flush=True)
            
    print('>> Finished.')


# main.py里面的get_uncertainty函数

In [10]:
#
def get_uncertainty(models, unlabeled_loader):
    models['backbone'].eval()
    #uncertainty = torch.tensor([]).cuda()这才是pycharm原来的
    uncertainty = torch.tensor([]).to(device)
    
    with torch.no_grad():
        for (inputs, labels) in unlabeled_loader:
            #inputs = inputs.cuda()这才是pycharm原来的
            inputs = inputs.to(device)
            
            scores, _, total_feature, features = models['backbone'](inputs)
            # total_feature: [128,512], 128是行，是batch_size，也就是图片个数，512是列，是resnet展平的像素feature个数

            # TODO Use clustering to determine data uncertainty
            input_x = np.array(total_feature)  # 计算关键点 因为KMeans.fix(X[,y) X是需要2D 而不是1D
            # input_x: shape: [128,512]
            # 利用别人写好的简单Kmeans, 可以知道每个点离最近两个cluster中心点的距离差

            """k-means聚类算法
               input_x      - ndarray(line_num, column_num)，line_num个样本的数据集，每个样本column_num个属性值
               """

            line_num, column_num = input_x.shape  # line_num：样本数量，column_num：每个样本的属性值个数
            print("line_num: ",line_num)
            print("column_num: ", column_num)
            
            result = np.empty(line_num, dtype=np.int)  # line_num个样本的聚类结果
            distance = np.empty((line_num, CLUSTER_NUMS), dtype=np.float32)  # 我加的
            # 从line_num个数据样本中不重复地随机选择k个样本作为质心
            cores = input_x[np.random.choice(np.arange(line_num), CLUSTER_NUMS, replace=False)]
            min_distance_differ = np.empty((line_num, 1), dtype=np.float32)  # 对吗？？
            
            print(">> Start clustering:")
            for i in range(CLUSTER_MAX_ITER):  # 迭代聚类计算,这是10000次，这里也可以用while True，质心不变时停止
                d = np.square(np.repeat(input_x, CLUSTER_NUMS, axis=0).reshape(line_num, CLUSTER_NUMS, column_num) - cores)
                distance = np.sqrt(np.sum(d, axis=2))  # ndarray(line_num, k)，每个样本距离k个质心的距离，共有line_num行

                index_min = np.argmin(distance, axis=1)  # 每个样本距离最近的质心索引序号 [line_num, 1]
                
                # 这里在 while True 时使用
                # if (index_min == result).all():  # 如果样本聚类没有改变
                #    return result, cores  # 则返回聚类结果和质心数据

                result[:] = index_min  # 重新分类 [line_num, 1] 这些图片分别属于哪个cluster, index
                for i in range(CLUSTER_NUMS):  # 遍历质心集
                    items = input_x[result == i]  # 找出对应当前质心的子样本集
                    cores[i] = np.mean(items, axis=0)  # 以子样本集的均值作为当前质心的位置

            sorted_distance = np.sort(distance) # 将矩阵的每一行升序排列
            #print(">> Index_sort shape: ")
            #print(sorted_distance.shape)
            
            min_distance_differ = np.abs(sorted_distance[:, 0].reshape(line_num, 1)
                                         - sorted_distance[:, 1].reshape(line_num, 1))
            
            #print(">> Min_distance_differ shape: ")
            #print(min_distance_differ.shape)
            
            # return result, cores 先不需要知道聚类的结果

            # 利用sklearn 自带的方法, 待看怎么取出每个点离cluster的距离
            # # print(input_x)
            # # print("x的值为：")
            # print(input_x.shape)
            # k_means = KMeans(n_clusters=CLUSTER_NUMS, max_iter=CLUSTER_MAX_ITER).fit(input_x)  # 关键点聚类
            # cluster_labels = k_means.labels_  # 返回标签以及聚类中心
            # cluster_center = k_means.cluster_centers_
            
            print(">> Clustering Over:")
            min_distance_differ_torch = torch.from_numpy(min_distance_differ)
            #print("Min_distance_differ_torch:")
            #print(min_distance_differ_torch)
            uncertainty = torch.cat((uncertainty, 10.00 / min_distance_differ_torch), 0)
            #print("get_uncertainty uncertainty: ", uncertainty)

    return uncertainty.cpu()


# main.py 里面的main函数

In [None]:

##
# Main
if __name__ == '__main__':

    for trial in range(TRIALS):
        # Initialize a labeled dataset by randomly sampling K=ADDENDUM=1,000 data points from the entire dataset.
        indices = list(range(NUM_TRAIN))
        random.shuffle(indices)
        START = 2 * ADDENDUM
        labeled_set = indices[:START]
        unlabeled_set = indices[START:]

        train_loader = DataLoader(cifar10_train, batch_size=BATCH,  # BATCH
                                  sampler=SubsetRandomSampler(labeled_set),
                                  pin_memory=True)
        test_loader = DataLoader(cifar10_test, batch_size=BATCH)

        dataloaders = {'train': train_loader, 'test': test_loader}

        # Model
        #backbone_net = resnet.ResNet18().cuda() #记住这才是原本pycharm里面的
        backbone_net = ResNet18().to(device)
        
        models = {'backbone': backbone_net}
        torch.backends.cudnn.benchmark = True

        # Active learning cycles
        for cycle in range(CYCLES):
            # Loss, criterion and scheduler (re)initialization
            criterion = nn.CrossEntropyLoss(reduction='none')

            optim_backbone = optim.SGD(models['backbone'].parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WDECAY)

            sched_backbone = lr_scheduler.MultiStepLR(optim_backbone, milestones=MILESTONES)

            optimizers = {'backbone': optim_backbone}
            schedulers = {'backbone': sched_backbone}

            # Training and test
            train(models, criterion, optimizers, schedulers, dataloaders, EPOCH, EPOCHL, cycle)
            acc = test(models, dataloaders, mode='test')
            print('Trial {}/{} || Cycle {}/{} || Label set size {}: Test acc {}'.format(trial + 1, TRIALS, cycle + 1,
                                                                                        CYCLES, len(labeled_set), acc),
                                                                                         flush=True)

            ##
            #  Update the labeled dataset via loss prediction-based uncertainty measurement

            # Randomly sample 10000 unlabeled data points
            random.shuffle(unlabeled_set)
            subset = unlabeled_set[:SUBSET]

            # Create unlabeled dataloader for the unlabeled subset
            unlabeled_loader = DataLoader(cifar10_unlabeled, batch_size=BATCH,
                                          sampler=SubsetSequentialSampler(subset),
                                          # more convenient if we maintain the order of subset
                                          pin_memory=True)

            # Measure uncertainty of each data points in the subset
            uncertainty = get_uncertainty(models, unlabeled_loader)
            #print("main uncertainty: ", uncertainty)
            uncertainty = uncertainty.T
            # Index in ascending order
            arg = np.argsort(uncertainty).numpy().tolist()
            print("main arg: ", arg)
            
            # Update the labeled dataset and the unlabeled dataset, respectively
            labeled_set += list(torch.tensor(subset)[arg][-ADDENDUM:].numpy())  # select largest loss
            unlabeled_set = list(torch.tensor(subset)[arg][:-ADDENDUM].numpy()) + unlabeled_set[SUBSET:]

            # labeled_set += list(torch.tensor(subset)[arg][:ADDENDUM].numpy())  # select smallest influence
            # unlabeled_set = list(torch.tensor(subset)[arg][ADDENDUM:].numpy()) + unlabeled_set[SUBSET:]

            # Create a new dataloader for the updated labeled dataset
            dataloaders['train'] = DataLoader(cifar10_train, batch_size=BATCH,  # BATCH
                                              sampler=SubsetRandomSampler(labeled_set),
                                              pin_memory=True)

        # Save a checkpoint
        torch.save({
            'trial': trial + 1,
            'state_dict_backbone': models['backbone'].state_dict()
            # 'state_dict_module': models['module'].state_dict()
        },
            './cifar10/train/weights/active_resnet18_cifar10_trial{}.pth'.format(trial))

        print('---------------------------Current Trial is done-----------------------------',flush=True)


>> Train a Model...
Cycle: 1 Epoch: 1 --- Val Acc: 10.000 	 Best Acc: 10.000
Cycle: 1 Epoch: 21 --- Val Acc: 10.110 	 Best Acc: 10.110
Cycle: 1 Epoch: 41 --- Val Acc: 13.090 	 Best Acc: 13.090
Cycle: 1 Epoch: 61 --- Val Acc: 16.750 	 Best Acc: 16.750
Cycle: 1 Epoch: 81 --- Val Acc: 18.080 	 Best Acc: 18.080
Cycle: 1 Epoch: 100 --- Val Acc: 18.200 	 Best Acc: 18.200
>> Finished.
Trial 1/2 || Cycle 1/5 || Label set size 40: Test acc 18.2
line_num:  30
column_num:  512
>> Start clustering:
>> Clustering Over:
main arg:  [[3, 21, 23, 25, 26, 2, 28, 5, 1, 29, 6, 18, 8, 11, 19, 10, 16, 0, 13, 14, 27, 12, 17, 15, 22, 24, 7, 9, 4, 20]]
>> Train a Model...
Cycle: 2 Epoch: 1 --- Val Acc: 18.660 	 Best Acc: 18.660
Cycle: 2 Epoch: 21 --- Val Acc: 19.550 	 Best Acc: 19.550
Cycle: 2 Epoch: 41 --- Val Acc: 20.640 	 Best Acc: 20.640
Cycle: 2 Epoch: 61 --- Val Acc: 20.140 	 Best Acc: 20.640
Cycle: 2 Epoch: 81 --- Val Acc: 23.270 	 Best Acc: 23.270
Cycle: 2 Epoch: 100 --- Val Acc: 21.120 	 Best Acc: 23.