In [None]:
import os
import time
import torch
import numpy as np
from torch.autograd import Variable
import models
from config import cfg
from data_loader import data_loader
from loss import make_loss
from optimizer import make_optimizer
from scheduler import make_scheduler
from logger import make_logger
from evaluation import evaluation
from datasets import PersonReID_Dataset_Downloader
from utils import check_jupyter_run
if check_jupyter_run():
    from tqdm import tqdm_notebook as tqdm
else:
    from tqdm import tqdm

config_file = "./config/market_softmax.yaml"
cfg.merge_from_file(config_file)
cfg.freeze()

PersonReID_Dataset_Downloader('./datasets',cfg.DATASETS.NAMES)

output_dir = cfg.OUTPUT_DIR
if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)

logger = make_logger("Reid_Baseline", output_dir)
logger.info("Using {} GPUS".format(1))
logger.info("Loaded configuration file {}".format(config_file))
logger.info("Running with config:\n{}".format(cfg))

train_loader, val_loader, num_query, num_classes = data_loader(cfg)
model = getattr(models, cfg.MODEL.NAME)(num_classes, cfg.MODEL.LAST_STRIDE, cfg.MODEL.PRETRAIN_PATH)
optimizer = make_optimizer(cfg, model)
scheduler = make_scheduler(cfg,optimizer)
loss_fn = make_loss(cfg)

log_period = cfg.SOLVER.LOG_PERIOD
checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
eval_period = cfg.SOLVER.EVAL_PERIOD
output_dir = cfg.OUTPUT_DIR
device = torch.device(cfg.MODEL.DEVICE)
epochs = cfg.SOLVER.MAX_EPOCHS
logger.info("Start training")

since = time.time()
for epoch in tqdm(range(epochs), desc='Epoch'):
    count = 0
    running_loss = 0.0
    running_acc = 0
    for data in tqdm(train_loader, desc='Iteration', leave=False):
        model.train()
        images, labels = data
        
        if device:
            model.to(device)
            images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
                
        scores, feats = model(images)
        loss = loss_fn(scores, feats, labels)
        
        loss.backward()
        optimizer.step()
        
        count = count + 1
        running_loss += loss.item()
        running_acc += (scores.max(1)[1] == labels).float().mean().item()
        
        if count % log_period == 0:
            logger.info("Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}"
                                .format(epoch+1, count, len(train_loader),
                                running_loss/count, running_acc/count,
                                scheduler.get_lr()[0]))
    scheduler.step()
            
    if (epoch+1) % checkpoint_period == 0:
        model.save(output_dir,epoch+1)
    
    # Validation
    if (epoch+1) % eval_period == 0:
        all_feats = []
        all_pids = []
        all_camids = []
        
        for data in tqdm(val_loader, desc='Feature Extraction', leave=False):
            model.eval()
            with torch.no_grad():
                images, pids, camids = data

                if device:
                    model.to(device)
                    images = images.to(device)

                feats = model(images)

            all_feats.append(feats)
            all_pids.extend(np.asarray(pids))
            all_camids.extend(np.asarray(camids))

        all_feats = torch.cat(all_feats, dim=0)
        # query
        qf = all_feats[:num_query]
        q_pids = np.asarray(all_pids[:num_query])
        q_camids = np.asarray(all_camids[:num_query])
        # gallery
        gf = all_feats[num_query:]
        g_pids = np.asarray(all_pids[num_query:])
        g_camids = np.asarray(all_camids[num_query:])
        
        m, n = qf.shape[0], gf.shape[0]
        distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
                  torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
        distmat.addmm_(1, -2, qf, gf.t())
        distmat = distmat.cpu().numpy()
        cmc, mAP = evaluation(distmat, q_pids, g_pids, q_camids, g_camids)
        logger.info("Validation Results - Epoch: {}".format(epoch))
        logger.info("mAP: {:.1%}".format(mAP))
        for r in [1, 5, 10]:
            logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
        

time_elapsed = time.time() - since
logger.info('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
logger.info('-' * 10)

Dataset Check Success: Market1501 exists!
2019-02-06 10:10:38,183 Reid_Baseline INFO: Using 1 GPUS
2019-02-06 10:10:38,186 Reid_Baseline INFO: Loaded configuration file ./config/market_softmax.yaml
2019-02-06 10:10:38,188 Reid_Baseline INFO: Running with config:
DATALOADER:
  NUM_INSTANCE: 16
  NUM_WORKERS: 8
  SAMPLER: softmax
DATASETS:
  NAMES: Market1501
  STORE_DIR: ./datasets
INPUT:
  PADDING: 10
  PIXEL_MEAN: [0.485, 0.456, 0.406]
  PIXEL_STD: [0.229, 0.224, 0.225]
  PROB: 0.5
  SIZE_TEST: [384, 128]
  SIZE_TRAIN: [384, 128]
MODEL:
  DEVICE: cuda:7
  LAST_STRIDE: 1
  NAME: ResNet50
  PRETRAIN_PATH: /home/linshan/.torch/models/resnet50-19c8e357.pth
OUTPUT_DIR: ./checkpoint/Market1501/Softmax_BS64_384x128
SOLVER:
  BASE_LR: 0.00035
  BIAS_LR_FACTOR: 1
  CHECKPOINT_PERIOD: 20
  EVAL_PERIOD: 20
  GAMMA: 0.1
  IMS_PER_BATCH: 64
  LOG_PERIOD: 100
  MARGIN: 0.3
  MAX_EPOCHS: 120
  MOMENTUM: 0.9
  OPTIMIZER_NAME: Adam
  STEP: 40
  WARMUP: True
  WARMUP_FACTOR: 0.01
  WARMUP_ITERS: 5
  WA

HBox(children=(IntProgress(value=0, description='Epoch', max=120), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:12:11,653 Reid_Baseline INFO: Epoch[1] Iteration[100/203] Loss: 6.615, Acc: 0.004, Base Lr: -6.58e-05
2019-02-06 10:13:39,539 Reid_Baseline INFO: Epoch[1] Iteration[200/203] Loss: 6.605, Acc: 0.008, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:15:09,912 Reid_Baseline INFO: Epoch[2] Iteration[100/203] Loss: 6.556, Acc: 0.058, Base Lr: -6.58e-05
2019-02-06 10:16:37,722 Reid_Baseline INFO: Epoch[2] Iteration[200/203] Loss: 6.534, Acc: 0.070, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:18:07,850 Reid_Baseline INFO: Epoch[3] Iteration[100/203] Loss: 6.435, Acc: 0.111, Base Lr: -6.58e-05
2019-02-06 10:19:35,154 Reid_Baseline INFO: Epoch[3] Iteration[200/203] Loss: 6.397, Acc: 0.115, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:21:05,373 Reid_Baseline INFO: Epoch[4] Iteration[100/203] Loss: 6.258, Acc: 0.120, Base Lr: -6.58e-05
2019-02-06 10:22:32,421 Reid_Baseline INFO: Epoch[4] Iteration[200/203] Loss: 6.216, Acc: 0.117, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:24:03,050 Reid_Baseline INFO: Epoch[5] Iteration[100/203] Loss: 6.057, Acc: 0.121, Base Lr: -6.58e-05
2019-02-06 10:25:31,238 Reid_Baseline INFO: Epoch[5] Iteration[200/203] Loss: 6.020, Acc: 0.115, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:27:01,474 Reid_Baseline INFO: Epoch[6] Iteration[100/203] Loss: 5.872, Acc: 0.111, Base Lr: -6.58e-05
2019-02-06 10:28:27,461 Reid_Baseline INFO: Epoch[6] Iteration[200/203] Loss: 5.823, Acc: 0.110, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:29:56,313 Reid_Baseline INFO: Epoch[7] Iteration[100/203] Loss: 5.687, Acc: 0.108, Base Lr: -6.58e-05
2019-02-06 10:31:22,100 Reid_Baseline INFO: Epoch[7] Iteration[200/203] Loss: 5.635, Acc: 0.113, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:32:52,115 Reid_Baseline INFO: Epoch[8] Iteration[100/203] Loss: 5.479, Acc: 0.121, Base Lr: -6.58e-05
2019-02-06 10:34:19,037 Reid_Baseline INFO: Epoch[8] Iteration[200/203] Loss: 5.445, Acc: 0.121, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:35:47,786 Reid_Baseline INFO: Epoch[9] Iteration[100/203] Loss: 5.288, Acc: 0.139, Base Lr: -6.58e-05
2019-02-06 10:37:14,025 Reid_Baseline INFO: Epoch[9] Iteration[200/203] Loss: 5.259, Acc: 0.136, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:38:42,843 Reid_Baseline INFO: Epoch[10] Iteration[100/203] Loss: 5.114, Acc: 0.152, Base Lr: -6.58e-05
2019-02-06 10:40:08,867 Reid_Baseline INFO: Epoch[10] Iteration[200/203] Loss: 5.079, Acc: 0.154, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:41:37,446 Reid_Baseline INFO: Epoch[11] Iteration[100/203] Loss: 4.941, Acc: 0.167, Base Lr: -6.58e-05
2019-02-06 10:43:03,449 Reid_Baseline INFO: Epoch[11] Iteration[200/203] Loss: 4.908, Acc: 0.168, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:44:32,295 Reid_Baseline INFO: Epoch[12] Iteration[100/203] Loss: 4.782, Acc: 0.177, Base Lr: -6.58e-05
2019-02-06 10:45:58,117 Reid_Baseline INFO: Epoch[12] Iteration[200/203] Loss: 4.740, Acc: 0.187, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:47:27,069 Reid_Baseline INFO: Epoch[13] Iteration[100/203] Loss: 4.595, Acc: 0.208, Base Lr: -6.58e-05
2019-02-06 10:48:52,747 Reid_Baseline INFO: Epoch[13] Iteration[200/203] Loss: 4.577, Acc: 0.207, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:50:21,398 Reid_Baseline INFO: Epoch[14] Iteration[100/203] Loss: 4.466, Acc: 0.219, Base Lr: -6.58e-05
2019-02-06 10:51:47,349 Reid_Baseline INFO: Epoch[14] Iteration[200/203] Loss: 4.426, Acc: 0.229, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:53:16,911 Reid_Baseline INFO: Epoch[15] Iteration[100/203] Loss: 4.289, Acc: 0.255, Base Lr: -6.58e-05
2019-02-06 10:54:43,150 Reid_Baseline INFO: Epoch[15] Iteration[200/203] Loss: 4.269, Acc: 0.254, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:56:11,789 Reid_Baseline INFO: Epoch[16] Iteration[100/203] Loss: 4.152, Acc: 0.275, Base Lr: -6.58e-05
2019-02-06 10:57:37,634 Reid_Baseline INFO: Epoch[16] Iteration[200/203] Loss: 4.127, Acc: 0.278, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 10:59:06,380 Reid_Baseline INFO: Epoch[17] Iteration[100/203] Loss: 4.029, Acc: 0.298, Base Lr: -6.58e-05
2019-02-06 11:00:32,275 Reid_Baseline INFO: Epoch[17] Iteration[200/203] Loss: 3.993, Acc: 0.303, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:02:02,229 Reid_Baseline INFO: Epoch[18] Iteration[100/203] Loss: 3.882, Acc: 0.319, Base Lr: -6.58e-05
2019-02-06 11:03:29,588 Reid_Baseline INFO: Epoch[18] Iteration[200/203] Loss: 3.849, Acc: 0.329, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:04:59,773 Reid_Baseline INFO: Epoch[19] Iteration[100/203] Loss: 3.758, Acc: 0.343, Base Lr: -6.58e-05
2019-02-06 11:06:27,203 Reid_Baseline INFO: Epoch[19] Iteration[200/203] Loss: 3.724, Acc: 0.353, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:07:57,248 Reid_Baseline INFO: Epoch[20] Iteration[100/203] Loss: 3.644, Acc: 0.364, Base Lr: -6.58e-05
2019-02-06 11:09:24,529 Reid_Baseline INFO: Epoch[20] Iteration[200/203] Loss: 3.596, Acc: 0.378, Base Lr: -6.58e-05
Model:resnet50_epo20.pth saves successfully


HBox(children=(IntProgress(value=0, description='Feature Extraction', max=76), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Metric Computing', max=3368), HTML(value='')))

2019-02-06 11:14:20,938 Reid_Baseline INFO: Validation Results - Epoch: 19
2019-02-06 11:14:20,939 Reid_Baseline INFO: mAP: 44.9%
2019-02-06 11:14:20,939 Reid_Baseline INFO: CMC curve, Rank-1  :66.8%
2019-02-06 11:14:20,940 Reid_Baseline INFO: CMC curve, Rank-5  :83.2%
2019-02-06 11:14:20,941 Reid_Baseline INFO: CMC curve, Rank-10 :88.7%


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:15:49,084 Reid_Baseline INFO: Epoch[21] Iteration[100/203] Loss: 3.507, Acc: 0.400, Base Lr: -6.58e-05
2019-02-06 11:17:16,339 Reid_Baseline INFO: Epoch[21] Iteration[200/203] Loss: 3.472, Acc: 0.404, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:18:46,828 Reid_Baseline INFO: Epoch[22] Iteration[100/203] Loss: 3.382, Acc: 0.421, Base Lr: -6.58e-05
2019-02-06 11:20:14,136 Reid_Baseline INFO: Epoch[22] Iteration[200/203] Loss: 3.354, Acc: 0.428, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:21:45,019 Reid_Baseline INFO: Epoch[23] Iteration[100/203] Loss: 3.247, Acc: 0.457, Base Lr: -6.58e-05
2019-02-06 11:23:12,374 Reid_Baseline INFO: Epoch[23] Iteration[200/203] Loss: 3.236, Acc: 0.456, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:24:43,141 Reid_Baseline INFO: Epoch[24] Iteration[100/203] Loss: 3.158, Acc: 0.474, Base Lr: -6.58e-05
2019-02-06 11:26:10,450 Reid_Baseline INFO: Epoch[24] Iteration[200/203] Loss: 3.129, Acc: 0.476, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:27:40,949 Reid_Baseline INFO: Epoch[25] Iteration[100/203] Loss: 3.045, Acc: 0.494, Base Lr: -6.58e-05
2019-02-06 11:29:08,380 Reid_Baseline INFO: Epoch[25] Iteration[200/203] Loss: 3.019, Acc: 0.498, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:30:38,768 Reid_Baseline INFO: Epoch[26] Iteration[100/203] Loss: 2.951, Acc: 0.508, Base Lr: -6.58e-05
2019-02-06 11:32:06,025 Reid_Baseline INFO: Epoch[26] Iteration[200/203] Loss: 2.915, Acc: 0.520, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:33:35,727 Reid_Baseline INFO: Epoch[27] Iteration[100/203] Loss: 2.825, Acc: 0.543, Base Lr: -6.58e-05
2019-02-06 11:35:01,782 Reid_Baseline INFO: Epoch[27] Iteration[200/203] Loss: 2.813, Acc: 0.541, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:36:32,636 Reid_Baseline INFO: Epoch[28] Iteration[100/203] Loss: 2.735, Acc: 0.559, Base Lr: -6.58e-05
2019-02-06 11:38:00,073 Reid_Baseline INFO: Epoch[28] Iteration[200/203] Loss: 2.712, Acc: 0.563, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:39:31,345 Reid_Baseline INFO: Epoch[29] Iteration[100/203] Loss: 2.642, Acc: 0.579, Base Lr: -6.58e-05
2019-02-06 11:40:59,675 Reid_Baseline INFO: Epoch[29] Iteration[200/203] Loss: 2.615, Acc: 0.583, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:42:30,445 Reid_Baseline INFO: Epoch[30] Iteration[100/203] Loss: 2.545, Acc: 0.599, Base Lr: -6.58e-05
2019-02-06 11:43:57,560 Reid_Baseline INFO: Epoch[30] Iteration[200/203] Loss: 2.516, Acc: 0.605, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:45:28,245 Reid_Baseline INFO: Epoch[31] Iteration[100/203] Loss: 2.462, Acc: 0.620, Base Lr: -6.58e-05
2019-02-06 11:46:55,594 Reid_Baseline INFO: Epoch[31] Iteration[200/203] Loss: 2.431, Acc: 0.627, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:48:26,235 Reid_Baseline INFO: Epoch[32] Iteration[100/203] Loss: 2.349, Acc: 0.650, Base Lr: -6.58e-05
2019-02-06 11:49:53,738 Reid_Baseline INFO: Epoch[32] Iteration[200/203] Loss: 2.343, Acc: 0.644, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:51:24,348 Reid_Baseline INFO: Epoch[33] Iteration[100/203] Loss: 2.273, Acc: 0.654, Base Lr: -6.58e-05
2019-02-06 11:52:51,740 Reid_Baseline INFO: Epoch[33] Iteration[200/203] Loss: 2.260, Acc: 0.657, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:54:22,391 Reid_Baseline INFO: Epoch[34] Iteration[100/203] Loss: 2.195, Acc: 0.681, Base Lr: -6.58e-05
2019-02-06 11:55:49,654 Reid_Baseline INFO: Epoch[34] Iteration[200/203] Loss: 2.175, Acc: 0.678, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 11:57:20,208 Reid_Baseline INFO: Epoch[35] Iteration[100/203] Loss: 2.112, Acc: 0.691, Base Lr: -6.58e-05
2019-02-06 11:58:47,443 Reid_Baseline INFO: Epoch[35] Iteration[200/203] Loss: 2.098, Acc: 0.692, Base Lr: -6.58e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

In [3]:
import os
import time
import torch
import numpy as np
from torch.autograd import Variable
import models
from config import cfg
from data_loader import data_loader
from loss import make_loss
from optimizer import make_optimizer
from scheduler import make_scheduler
from logger import make_logger
from evaluation import evaluation
from datasets import PersonReID_Dataset_Downloader
from utils import check_jupyter_run
if check_jupyter_run():
    from tqdm import tqdm_notebook as tqdm
else:
    from tqdm import tqdm

config_file = "./config/market_softmax_stride2.yaml"
cfg.merge_from_file(config_file)
cfg.freeze()

PersonReID_Dataset_Downloader('./datasets',cfg.DATASETS.NAMES)

output_dir = cfg.OUTPUT_DIR
if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)

logger = make_logger("Reid_Baseline", output_dir)
logger.info("Using {} GPUS".format(1))
logger.info("Loaded configuration file {}".format(config_file))
logger.info("Running with config:\n{}".format(cfg))

train_loader, val_loader, num_query, num_classes = data_loader(cfg)
model = getattr(models, cfg.MODEL.NAME)(num_classes, cfg.MODEL.LAST_STRIDE, cfg.MODEL.PRETRAIN_PATH)
optimizer = make_optimizer(cfg, model)
scheduler = make_scheduler(cfg,optimizer)
loss_fn = make_loss(cfg)

log_period = cfg.SOLVER.LOG_PERIOD
checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
eval_period = cfg.SOLVER.EVAL_PERIOD
output_dir = cfg.OUTPUT_DIR
device = torch.device(cfg.MODEL.DEVICE)
epochs = cfg.SOLVER.MAX_EPOCHS
logger.info("Start training")

since = time.time()
for epoch in tqdm(range(epochs), desc='Epoch'):
    count = 0
    running_loss = 0.0
    running_acc = 0
    for data in tqdm(train_loader, desc='Iteration', leave=False):
        model.train()
        images, labels = data
        
        if device:
            model.to(device)
            images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
                
        scores, feats = model(images)
        loss = loss_fn(scores, feats, labels)
        
        loss.backward()
        optimizer.step()
        
        count = count + 1
        running_loss += loss.item()
        running_acc += (scores.max(1)[1] == labels).float().mean().item()
        
    logger.info("Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}"
                                .format(epoch+1, count, len(train_loader),
                                running_loss/count, running_acc/count,
                                scheduler.get_lr()[0]))
    scheduler.step()
            
    if (epoch+1) % checkpoint_period == 0:
        model.save(output_dir,epoch+1)
    
    # Validation
    if (epoch+1) % eval_period == 0:
        all_feats = []
        all_pids = []
        all_camids = []
        
        for data in tqdm(val_loader, desc='Feature Extraction', leave=False):
            model.eval()
            with torch.no_grad():
                images, pids, camids = data

                if device:
                    model.to(device)
                    images = images.to(device)

                feats = model(images)

            all_feats.append(feats)
            all_pids.extend(np.asarray(pids))
            all_camids.extend(np.asarray(camids))

        all_feats = torch.cat(all_feats, dim=0)
        # query
        qf = all_feats[:num_query]
        q_pids = np.asarray(all_pids[:num_query])
        q_camids = np.asarray(all_camids[:num_query])
        # gallery
        gf = all_feats[num_query:]
        g_pids = np.asarray(all_pids[num_query:])
        g_camids = np.asarray(all_camids[num_query:])
        
        m, n = qf.shape[0], gf.shape[0]
        distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
                  torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
        distmat.addmm_(1, -2, qf, gf.t())
        distmat = distmat.cpu().numpy()
        cmc, mAP = evaluation(distmat, q_pids, g_pids, q_camids, g_camids)
        logger.info("Validation Results - Epoch: {}".format(epoch))
        logger.info("mAP: {:.1%}".format(mAP))
        for r in [1, 5, 10]:
            logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
        

time_elapsed = time.time() - since
logger.info('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
logger.info('-' * 10)

Dataset Check Success: Market1501 exists!
2019-02-06 12:25:54,955 Reid_Baseline INFO: Using 1 GPUS
2019-02-06 12:25:54,957 Reid_Baseline INFO: Loaded configuration file ./config/market_softmax_stride2.yaml
2019-02-06 12:25:54,959 Reid_Baseline INFO: Running with config:
DATALOADER:
  NUM_INSTANCE: 16
  NUM_WORKERS: 8
  SAMPLER: softmax
DATASETS:
  NAMES: Market1501
  STORE_DIR: ./datasets
INPUT:
  PADDING: 10
  PIXEL_MEAN: [0.485, 0.456, 0.406]
  PIXEL_STD: [0.229, 0.224, 0.225]
  PROB: 0.5
  SIZE_TEST: [384, 128]
  SIZE_TRAIN: [384, 128]
MODEL:
  DEVICE: cuda:7
  LAST_STRIDE: 2
  NAME: ResNet50
  PRETRAIN_PATH: /home/linshan/.torch/models/resnet50-19c8e357.pth
OUTPUT_DIR: ./checkpoint/Market1501/Softmax_BS64_384x128_Stride2
SOLVER:
  BASE_LR: 0.00035
  BIAS_LR_FACTOR: 1
  CHECKPOINT_PERIOD: 20
  EVAL_PERIOD: 20
  GAMMA: 0.1
  IMS_PER_BATCH: 64
  LOG_PERIOD: 100
  MARGIN: 0.3
  MAX_EPOCHS: 120
  MOMENTUM: 0.9
  OPTIMIZER_NAME: Adam
  STEP: 40
  WARMUP: True
  WARMUP_FACTOR: 0.01
  WARM

HBox(children=(IntProgress(value=0, description='Epoch', max=120), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 12:28:11,802 Reid_Baseline INFO: Epoch[1] Iteration[203/203] Loss: 6.605, Acc: 0.008, Base Lr: 3.50e-06


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

2019-02-06 12:30:22,816 Reid_Baseline INFO: Epoch[2] Iteration[203/203] Loss: 6.544, Acc: 0.066, Base Lr: 7.28e-05


HBox(children=(IntProgress(value=0, description='Iteration', max=203), HTML(value='')))

KeyboardInterrupt: 

<scheduler.WarmupMultiStepLR.WarmupMultiStepLR at 0x7f67bc558748>

In [2]:
!python main.py train ./config/market_softmax_NoWarmUp.yaml

Dataset Check Success: Market1501 exists!
2019-02-06 12:18:22,459 Reid_Baseline INFO: Using 1 GPUS
2019-02-06 12:18:22,459 Reid_Baseline INFO: Loaded configuration file ./config/market_softmax_NoWarmUp.yaml
2019-02-06 12:18:22,460 Reid_Baseline INFO: Running with config:
DATALOADER:
  NUM_INSTANCE: 16
  NUM_WORKERS: 8
  SAMPLER: softmax
DATASETS:
  NAMES: Market1501
  STORE_DIR: ./datasets
INPUT:
  PADDING: 10
  PIXEL_MEAN: [0.485, 0.456, 0.406]
  PIXEL_STD: [0.229, 0.224, 0.225]
  PROB: 0.5
  SIZE_TEST: [384, 128]
  SIZE_TRAIN: [384, 128]
MODEL:
  DEVICE: cuda:7
  LAST_STRIDE: 1
  NAME: ResNet50
  PRETRAIN_PATH: /home/linshan/.torch/models/resnet50-19c8e357.pth
OUTPUT_DIR: ./checkpoint/Market1501/Softmax_BS64_384x128_NoWarmUp
SOLVER:
  BASE_LR: 0.00035
  BIAS_LR_FACTOR: 1
  CHECKPOINT_PERIOD: 20
  EVAL_PERIOD: 20
  GAMMA: 0.1
  IMS_PER_BATCH: 64
  LOG_PERIOD: 100
  MARGIN: 0.3
  MAX_EPOCHS: 120
  MOMENTUM: 0.9
  OPTIMIZER_NAME: Adam
  STEP: 2
  WARMUP: False
  WARMUP_FACTOR: 0.01
  WA