In [1]:
import matplotlib.pyplot as plt
import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from models.resnet import resnet50
from my_dataset import MyDataset
from my_lossfunc import JointLoss, MultilabelLoss, DiscriminativeLoss
from my_transform import data_transforms
from PIL import Image
from scipy.spatial.distance import pdist, cdist  # 一集合点距, 两集合点距
from torch.utils.data import DataLoader
from tqdm import tnrange
from tqdm import tqdm_notebook as tqdm
from utils import *

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Device:', DEVICE)


BASE = '/home/zengrui/datasets'
DUKE_DIR_TRAIN = f'{BASE}/ReID_Duke/bounding_box_train'
DUKE_DIR_TEST = f'{BASE}/ReID_Duke/bounding_box_test'
DUKE_DIR_QUERY = f'{BASE}/ReID_Duke/query'
DUKE_IMG_AMOUNT = 16522
DUKE_ID_AMOUNT = 1404
MARKET_DIR_TRAIN = f'{BASE}/ReID_Market/bounding_box_train'
MARKET_DIR_TEST = f'{BASE}/ReID_Market/bounding_box_test'
MARKET_DIR_QUERY = f'{BASE}/ReID_Market/query'
MARKET_IMG_AMOUNT = 12936
MARKET_ID_AMOUNT = 1501

SOURCE_DIR_TRAIN = [DUKE_DIR_TRAIN, DUKE_DIR_TEST]
TARGET_DIR_TRAIN = MARKET_DIR_TRAIN
TARGET_DIR_GALLERY = MARKET_DIR_TEST
TARGET_DIR_PROBE = MARKET_DIR_QUERY
ML_PATH = 'data/ml_Market.dat'
PRETRAIN_PATH = 'data/pretrained_weight.pkl'
PRETRAIN_OUT_PATH = 'data/pretrained_weight_{}.pkl'

BATCH_SIZE = 96
EPOCH = 20
LR = 2e-4

BETA = 0.2
IMG_SIZE = (384, 128)
LAMB1 = 2e-4
LAMB2 = 50
MARGIN = 1
MINING_RATIO = 0.005
SCALA_CE = 30
WD = 2.5e-2

Device: cuda


# Prepare Work

In [2]:
# data loader
data_loader = {
    'source': DataLoader(
        dataset=MyDataset(SOURCE_DIR_TRAIN, 
                          transform=data_transforms('train', size=IMG_SIZE),
                          require_view=False,
                          encode_label=True),
        batch_size=BATCH_SIZE,
        shuffle=True,
    ),
    'target': DataLoader(
        dataset=MyDataset(TARGET_DIR_TRAIN,
                          transform=data_transforms('train', size=IMG_SIZE),
                          require_view=True,
                          encode_label=True),
        batch_size=BATCH_SIZE,
        shuffle=True,
    ),
    'gallery': DataLoader(
        dataset=MyDataset(TARGET_DIR_GALLERY,
                          transform=data_transforms('val', size=IMG_SIZE),
                          require_view=True),
        batch_size=BATCH_SIZE,
        shuffle=False,
    ),
    'probe': DataLoader(
        dataset=MyDataset(TARGET_DIR_PROBE,
                          transform=data_transforms('val', size=IMG_SIZE),
                          require_view=True),
        batch_size=BATCH_SIZE,
        shuffle=False,
    ),
}
SOURCE_ID_AMOUNT = len(set(data_loader['source'].dataset.label))
TARGET_IMG_AMOUNT = len(data_loader['target'].dataset.data)
'data_loader: ok.'

'data_loader: ok.'

# Trainer

In [3]:
%matplotlib inline
class Trainer(object):
    def __init__(self):
        
        # 网络
        self.net = resnet50(pretrained=True, 
                            num_classes=SOURCE_ID_AMOUNT)
        self.net = nn.DataParallel(self.net).to(DEVICE)
        if PRETRAIN_PATH is not None and os.path.exists(PRETRAIN_PATH):
            self.net.load_state_dict(torch.load(PRETRAIN_PATH))
            print('Pretrained model loaded.')
        else:
            print('Pretrained model not found. Train from scratch.')
            
        # 损失
        self.mdl_loss = DiscriminativeLoss(MINING_RATIO).to(DEVICE)
        self.al_loss = nn.CrossEntropyLoss().to(DEVICE)
        self.rj_loss = JointLoss(MARGIN).to(DEVICE)  # lack 1 param
        self.cml_loss = MultilabelLoss(BATCH_SIZE).to(DEVICE)
        
        # 优化器
        bn_params, other_params = partition_params(self.net, 'bn')
        self.optimizer = torch.optim.SGD([
                {'params': bn_params, 'weight_decay': 0},
                {'params': other_params},
            ], lr=LR, momentum=0.9, weight_decay=WD
        )
        self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            self.optimizer, milestones=[int(EPOCH / 8 * 5), int(EPOCH / 8 * 7)])
        
        # 存储器
        self.ml_mem = torch.zeros(TARGET_IMG_AMOUNT, SOURCE_ID_AMOUNT)
        self.inited = self.ml_mem.sum(dim=1) != 0
    
    def train(self):
        '''进行一次完整训练.'''
        print('Training start. Epochs: %d' % EPOCH)
        self.net.train()
        for epoch in tnrange(EPOCH):
            self.train_epoch(epoch)
    
    def train_epoch(self, epoch):
        '''训练一个epoch.'''
        stats = ('total', 'src', 'st', 'ml', 'tgt')
        running_loss = {stat: AverageMeter() for stat in stats}
        
        if not self.mdl_loss.initialized:
            self.init_losses(data_loader['target'])
            self.net.train()
        
        with tqdm(total=len(data_loader['source'])) as pbar:
            tgt_iter = iter(data_loader['target'])
            for step, (ax, ay) in enumerate(data_loader['source']):
                # a - source, b - target
                ax = ax.to(DEVICE)
                ay = ay.to(DEVICE)
                try:
                    b = next(tgt_iter)
                except StopIteration:
                    tgt_iter = iter(data_loader['target'])
                    b = next(tgt_iter)
                (bx, by, b_view, b_idx) = b
                bx, by, b_view = bx.to(DEVICE), by.to(DEVICE), b_view.to(DEVICE)

                a_f, a_sim, _ = self.net(ax)
                b_f, b_sim, _ = self.net(bx)

                loss = {stat: torch.Tensor([0]).to(DEVICE) 
                        for stat in stats}
                
                loss['src'] = self.al_loss(a_sim * SCALA_CE, ay)  # 有监督 交叉熵
                
                agents = self.net.module.fc.weight.renorm(2, 0, 1e-5).mul(1e5)  # 归一化 shape=(1404, 2048)
                arange = torch.arange(len(agents)).cuda()
                loss['st'] = self.rj_loss(agents.detach(), a_f, a_sim.detach(), ay, 
                                          b_f, b_sim.detach())
                
                with torch.no_grad():
                    ml = F.softmax(b_f.mm(agents.t_() * SCALA_CE), dim=1)  # t_(): 转置并inplace
                loss['ml'] = self.cml_loss(torch.log(ml), b_view)
    
                if epoch > 0:  # 为什么第一轮不算 mdl_loss 呢
                    ml_cpu = ml.detach().cpu()
                    is_inited_batch = self.inited[b_idx]
                    inited_idx = b_idx[is_inited_batch]
                    uninited_idx = b_idx[~is_inited_batch]
                    self.ml_mem[uninited_idx] = ml_cpu[~is_inited_batch]  # 0标签满更新
                    self.inited[uninited_idx] = True
                    self.ml_mem[inited_idx] = 0.9 * self.ml_mem[inited_idx] \
                                            + 0.1 * ml_cpu[is_inited_batch]  # 非空标签小更新
                    loss['tgt'] = self.mdl_loss(b_f, self.ml_mem[b_idx], by)

                self.optimizer.zero_grad()
                loss['total'] = loss['tgt'] + LAMB1 * loss['ml'] \
                              + LAMB2 * (loss['src'] + BETA * loss['st'])
                loss['total'].backward()
                self.optimizer.step()

                for stat in stats:
                    loss_cpu = float(loss[stat].data.cpu().numpy())
                    running_loss[stat].update(loss_cpu, BATCH_SIZE)
                pbar.set_description('Loss: %.4f' % running_loss['total'].avg)
                pbar.update()

            self.lr_scheduler.step()
            pbar.set_description('Progress:')
            print('Epoch: %d, Loss: %.4f (%.4f + %.4f + %.4f + %.4f)' 
                  % (epoch, 
                     running_loss['total'].avg, 
                     running_loss['src'].avg * LAMB2, 
                     running_loss['st'].avg * LAMB2 * BETA, 
                     running_loss['ml'].avg * LAMB1, 
                     running_loss['tgt'].avg))
            
    def eval_performance(self, gallery_loader, probe_loader):
        stats = ('r1', 'r5', 'r10', 'MAP')
        val = {stat: AverageMeter() for stat in stats}
        self.net.eval()
        
        gallery_f, gallery_y, gallery_views = extract_features(
            gallery_loader, self.net, index_feature=0)
        probe_f, probe_y, probe_views = extract_features(
            probe_loader, self.net, index_feature=0)
        dist = cdist(gallery_f, probe_f, metric='cosine')  # 实际是 1-cos ∈ [0, 2], 越小越相似
        CMC, MAP, example = eval_cmc_map(
            dist, gallery_y, probe_y, gallery_views, probe_views, 
            ignore_MAP=False, show_example=True)
#         CMC, MAP, example = eval_cmc_map(
#             dist, gallery_y, probe_y, 
#             ignore_MAP=False, show_example=True)
        r1, r5, r10 = CMC[0], CMC[4], CMC[9]
        self.r1, self.r5, self.r10, self.MAP = r1, r5, r10, MAP
        self.example = example
        
        for stat in stats:
            val[stat].update(locals()[stat].item(), BATCH_SIZE)
            
        # 显示rank多图
#         if show_img_result:
#             plt.subplot(1, 11, 1)
#             plt.title('Query')
#             plt.imshow(Image.open(file.path, 'r'))

#             for i in range(10):
#                 plt.subplot(1, 11, i + 2)
#                 plt.imshow(Image.open(DATA_DIR_TEST +
#                                       '\\' + sort_list[i][0], 'r'))

#             plt.show()
            
        return val
            
    def init_losses(self, tgt_loader):
        '''训练前初始化loss参数.'''
        print('#' * 8, 'Initializing losses', '#' * 8)
        if os.path.isfile(ML_PATH):
            (ml, views, pairwise_agreements) = torch.load(ML_PATH)
            print('Ml loaded.')
        else:
            print('Ml not found, computing...')
            sim, _, views = extract_features(
                data_loader['target'], self.net, index_feature=1, return_numpy=False)
            ml = F.softmax(sim * SCALA_CE, dim=1)
            ml_np = ml.cpu().numpy()
            pairwise_agreements = 1 - pdist(ml_np, 'minkowski', p=1) / 2  # 相似比较特征
            print('Ml saving to %s...' % ML_PATH)
            torch.save((ml, views, pairwise_agreements), ML_PATH)

        self.cml_loss.init_centers(torch.log(ml), views)
        print('Cml_loss centers inited.')
        self.mdl_loss.init_threshold(pairwise_agreements)
        print('Mdl_loss threshold inited.')
        print('#' * 8, 'OK', '#' * 8)
        
    def save_model(self, cover=False):
        '''
        保存当前模型net的参数.
        
        :param cover: True覆盖默认文件, False新增带时间戳文件
        '''
        if cover:
            torch.save(self.net.state_dict(), PRETRAIN_PATH)
            print('Model weight saved(cover).')
        else:
            path = PRETRAIN_OUT_PATH.format(time.time())
            torch.save(self.net.state_dict(), path)
            print('Model weight saved(%s).' % path)

# Train

In [4]:
trainer = Trainer()
trainer.train()

Pretrained model not found. Train from scratch.
Training start. Epochs: 20


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

######## Initializing losses ########
Ml loaded.
Cml_loss centers inited.
Mdl_loss threshold inited.
######## OK ########


HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 0, Loss: 216.7824 (200.3234 + 16.2602 + 0.1988 + 0.0000)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 1, Loss: 65.3371 (52.0353 + 12.4865 + 0.1724 + 0.6430)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 2, Loss: 38.0503 (26.0716 + 11.1560 + 0.1771 + 0.6455)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 3, Loss: 29.6407 (18.3128 + 10.4959 + 0.1892 + 0.6429)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 4, Loss: 24.7294 (13.8834 + 10.0182 + 0.1932 + 0.6347)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 5, Loss: 21.6510 (11.1933 + 9.6326 + 0.1944 + 0.6307)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 6, Loss: 19.6254 (9.4209 + 9.3707 + 0.2015 + 0.6322)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 7, Loss: 18.6142 (8.6302 + 9.1543 + 0.2082 + 0.6215)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 8, Loss: 17.6679 (7.8582 + 8.9809 + 0.2112 + 0.6177)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 9, Loss: 16.9416 (7.2923 + 8.8260 + 0.2115 + 0.6119)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 10, Loss: 15.6966 (6.3012 + 8.5701 + 0.2171 + 0.6081)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 11, Loss: 16.4318 (6.9596 + 8.6485 + 0.2200 + 0.6037)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 12, Loss: 13.2272 (4.2945 + 8.1171 + 0.2210 + 0.5947)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 13, Loss: 12.1852 (3.4280 + 7.9485 + 0.2186 + 0.5900)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 14, Loss: 11.6150 (2.9697 + 7.8319 + 0.2204 + 0.5931)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 15, Loss: 11.4191 (2.8239 + 7.7844 + 0.2200 + 0.5907)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 16, Loss: 11.4085 (2.8624 + 7.7396 + 0.2195 + 0.5870)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 17, Loss: 11.0629 (2.5466 + 7.7114 + 0.2210 + 0.5838)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 18, Loss: 11.3101 (2.8073 + 7.6994 + 0.2171 + 0.5863)



HBox(children=(IntProgress(value=0, max=357), HTML(value='')))

Epoch: 19, Loss: 11.1661 (2.6742 + 7.6887 + 0.2203 + 0.5829)




In [5]:
trainer.save_model()
# trainer.save_model(cover=True)

Model weight saved(data/pretrained_weight_1579157817.5214608.pkl).


# Eval

In [6]:
# trainer = Trainer()

In [7]:
trainer.eval_performance(data_loader['gallery'], data_loader['probe'])

{'r1': <utils.AverageMeter at 0x7f588c16cd10>,
 'r5': <utils.AverageMeter at 0x7f588c16c310>,
 'r10': <utils.AverageMeter at 0x7f588c16cf90>,
 'MAP': <utils.AverageMeter at 0x7f588c16cc50>}

In [8]:
trainer.r1, trainer.r5, trainer.r10, trainer.MAP

(40.76603325415677, 58.2541567695962, 64.81591448931117, 21.38579113869162)

In [9]:
trainer.example

[{'tgt': 24, 'res': array([89, 89, 89, 89, 89, 89, 89, 24, 24, 89])},
 {'tgt': 1459,
  'res': array([ 932,  684,  293,  932,  684,  668,  932, 1143, 1035, 1077])},
 {'tgt': 764,
  'res': array([ 812,  189,  812,  764,  721,  721,  721, 1153, 1153, 1070])},
 {'tgt': 187,
  'res': array([187, 977, 187, 977, 977, 977, 278,  -1, 977, 778])},
 {'tgt': 935,
  'res': array([ 935,   55, 1214,  252, 1214, 1236,  935,  252, 1310,  825])},
 {'tgt': 1486,
  'res': array([1485, 1149,  336,  428,  316, 1149,  428, 1486, 1486,  183])},
 {'tgt': 1122,
  'res': array([  -1, 1190, 1190, 1190, 1016,  363, 1190,   -1,  363, 1016])},
 {'tgt': 1144,
  'res': array([1120,  567,  530,  746, 1185,   -1, 1120,  405, 1141,  302])},
 {'tgt': 1148,
  'res': array([1148, 1148, 1148, 1148, 1148, 1148, 1148, 1148, 1148, 1148])},
 {'tgt': 493,
  'res': array([  66,   66, 1089,  458,  538, 1089,   -1, 1354,  695,   51])}]

In [8]:
exit()