In [None]:
import warnings
from torch import nn,optim
from torch.autograd import Variable 
from torch.utils.data import DataLoader
from dataset.dataloader import *
from models.model import *
from utils import *
from visualizations.vis import Visualizer

#1. set random.seed and cudnn performance
random.seed(config.seed)
np.random.seed(config.seed)
torch.manual_seed(config.seed)
torch.cuda.manual_seed_all(config.seed)
os.environ["CUDA_VISIBLE_DEVICES"] = config.gpus
torch.backends.cudnn.benchmark = True
warnings.filterwarnings('ignore')
best_model_path = "/home/ytzx/densenet_pytorch/12.15/checkpoints/best_model/densenet121/model_best.pth.tar"

#2. evaluate func
def evaluate(val_loader,model,criterion,epoch,vis):
    #2.1 define meters
    losses = AverageMeter()
    top1 = AverageMeter()
    #progress bar
    val_progressor = ProgressBar(mode="Val  ",
                                 epoch=epoch,
                                 total_epoch=config.epochs,
                                 model_name=config.model_name,total=len(val_loader))
    #2.2 switch to evaluate mode and confirm model has been transfered to cuda
    model.cuda()
    model.eval()
    with torch.no_grad():
        for i,(input,target) in enumerate(val_loader):
            val_progressor.current = i
            input = Variable(input).cuda()
            target = Variable(torch.from_numpy(np.array(target)).long()).cuda()
            #target = Variable(target).cuda()
            #2.2.1 compute output
            output = model(input)
            loss = criterion(output,target)

            #2.2.2 measure accuracy and record loss
            precision1,precision2 = accuracy(output,target,topk=(1,2))
            losses.update(loss.item(),input.size(0))
            top1.update(precision1[0],input.size(0))


#             vis.plot('val_loss', losses.avg)
#             vis.plot('val_precision', top1.avg)

            val_progressor.current_loss = losses.avg
            val_progressor.current_top1 = top1.avg
            val_progressor()
        val_progressor.done()
    return [losses.avg,top1.avg]


def main():
    fold = 0
    #4.1 mkdirs
    if not os.path.exists(config.submit):
        os.mkdir(config.submit)
    if not os.path.exists(config.weights):
        os.mkdir(config.weights)
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists(config.lorizhgs):
        os.mkdir(config.logs)
    if not os.path.exists(config.weights + config.model_name + os.sep +str(fold) + os.sep):
        os.makedirs(config.weights + config.model_name + os.sep +str(fold) + os.sep)
    if not os.path.exists(config.best_models + config.model_name + os.sep +str(fold) + os.sep):
        os.makedirs(config.best_models + config.model_name + os.sep +str(fold) + os.sep)



    vis = Visualizer(env=config.model_name)
    # 创建模型
    model = torchvision.models.densenet121(pretrained=True)
    # 全连接层
    model.fc = nn.Linear(1000, config.num_classes)
    model.cuda()


    optimizer = optim.Adam(model.parameters(),
                           lr = config.lr,
                           amsgrad=True,
                           weight_decay=config.weight_decay)
    # 定义交叉熵损失函数
    criterion = nn.CrossEntropyLoss().cuda()
    if os.path.isfile(best_model_path):
        checkpoint = torch.load(best_model_path)
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoch']+1
        print("--continue training--")
    else:
        start_epoch = 0
        print("--start training--")
    best_precision1 = 0
    best_precision_save = 0

    # 读取数据
    train_data_list = get_files(config.train_data,"train")
    val_data_list = get_files(config.val_data,"val")

    train_dataloader = DataLoader(ChaojieDataset(train_data_list),
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  collate_fn=collate_fn,
                                  pin_memory=True,
                                  num_workers=15)
    val_dataloader = DataLoader(ChaojieDataset(val_data_list,train=False),
                                batch_size=config.batch_size*2,
                                shuffle=True,
                                collate_fn=collate_fn,
                                pin_memory=False,
                                num_workers=15)

    scheduler =  optim.lr_scheduler.StepLR(optimizer,
                                           step_size = 10,
                                           gamma=0.1)
    #4.5.5.1 define metrics
    train_losses = AverageMeter()
    train_top1 = AverageMeter()
    valid_loss = [np.inf,0,0]
    model.train()

    #4.5.5 train
    for epoch in range(start_epoch,config.epochs):
        scheduler.step(epoch)
        # 定义进度条
        train_progressor = ProgressBar(mode="Train",epoch=epoch,
                                       total_epoch=config.epochs,
                                       model_name=config.model_name,
                                       total=len(train_dataloader))

        # 训练
        for iter,(input,target) in enumerate(train_dataloader):
            train_progressor.current = iter
            model.train()

            # 定义输入图像
            input = Variable(input).cuda()
            # 定义标注信息
            target = Variable(torch.from_numpy(np.array(target)).long()).cuda()
            # 神经网络输出
            output = model(input)
            # 计算损失
            loss = criterion(output,target)

            precision1_train,precision2_train = accuracy(output,target,
                                                         topk=(1,2))
            train_losses.update(loss.item(),input.size(0))
            train_top1.update(precision1_train[0],input.size(0))

            train_progressor.current_loss = train_losses.avg
            train_progressor.current_top1 = train_top1.avg

            #if (iter + 1) % config.plot_every == 0:

#             vis.plot('train_loss', train_losses.avg)
            #vis.plot('train_precision', torch.stack(train_top1.avg))

            # 梯度反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # 显示进度条
            train_progressor()

        train_progressor.done()


        #evaluate
        #lr = get_learning_rate(optimizer)

        #evaluate every half epoch
        valid_loss = evaluate(val_dataloader,model,criterion,epoch,vis)
        is_best = valid_loss[1] > best_precision1
        best_precision1 = max(valid_loss[1],best_precision1)

        try:
            best_precision_save = best_precision1.cpu().data.numpy()
        except:
            pass

        save_checkpoint({
                    "epoch":epoch + 1,
                    "model_name":config.model_name,
                    "state_dict":model.state_dict(),
                    "best_precision1":best_precision1,
                    "optimizer":optimizer.state_dict(),
                    "fold":fold,
                    "valid_loss":valid_loss,
        },is_best,fold)

if __name__ =="__main__":
    main()


Setting up a new session...
Traceback (most recent call last):
  File "/home/ytzx/anaconda3/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/home/ytzx/anaconda3/lib/python3.6/site-packages/urllib3/util/connection.py", line 83, in create_connection
    raise err
  File "/home/ytzx/anaconda3/lib/python3.6/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ytzx/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 601, in urlopen
    chunked=chunked)
  File "/home/ytzx/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py", line 357, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/home/ytzx/anaconda3/lib/python3.6/http/client.py", l

Exception in user code:
------------------------------------------------------------


100%|██████████| 32384/32384 [00:00<00:00, 669434.90it/s]
100%|██████████| 3977/3977 [00:00<00:00, 927635.80it/s]

--start training--
loading train dataset
loading train dataset



[1;32;40mTrain Epoch:  1/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.910460 Top1: 70.488518 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  1/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.493651 Top1: 83.882317 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(83.8823, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  2/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.716911 Top1: 76.377228 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  2/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.376053 Top1: 87.125969 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(87.1260, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  3/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.618680 Top1: 79.477524 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  3/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.349163 Top1: 88.634644 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(88.6346, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  4/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.554230 Top1: 81.564972 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  4/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.298802 Top1: 89.212975 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(89.2130, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  5/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.507787 Top1: 83.090416 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  5/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.264814 Top1: 91.023384 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(91.0234, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  6/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.472550 Top1: 84.217514 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  6/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.235207 Top1: 91.677139 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(91.6771, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  7/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.443615 Top1: 85.157127 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  7/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.258127 Top1: 91.375404 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  8/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.419845 Top1: 85.939049 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  8/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.244858 Top1: 91.576561 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  9/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.399029 Top1: 86.626114 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  9/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.217045 Top1: 92.607491 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(92.6075, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  10/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.381063 Top1: 87.213127 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  10/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.219405 Top1: 92.431480 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  11/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.360572 Top1: 87.918556 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  11/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.163432 Top1: 94.116165 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(94.1162, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  12/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.342188 Top1: 88.562767 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  12/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.161356 Top1: 94.443047 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(94.4430, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  13/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.325998 Top1: 89.129013 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  13/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.154673 Top1: 94.392754 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  14/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.311394 Top1: 89.634872 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  14/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.169223 Top1: 94.141312 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  15/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.298426 Top1: 90.086052 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  15/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.137545 Top1: 95.222527 ]  199/199 [1;32;40m[ 100% ][0m


Get Better top1 : tensor(95.2225, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  16/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.287194 Top1: 90.472733 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  16/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.147372 Top1: 95.021370 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  17/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.276928 Top1: 90.830078 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  17/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.154783 Top1: 94.669342 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  18/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.267718 Top1: 91.152695 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  18/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.146264 Top1: 94.744781 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:

Get Better top1 : tensor(95.4740, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  28/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.208075 Top1: 93.220055 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  28/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.148363 Top1: 94.845360 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  29/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.204214 Top1: 93.355873 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  29/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.147984 Top1: 94.795067 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  30/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.200650 Top1: 93.479462 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  30/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.133140 Top1: 95.323105 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:

Get Better top1 : tensor(95.8008, device='cuda:0') saving weights to ./checkpoints/best_model/densenet121/0/model_best.pth.tar


[1;32;40mTrain Epoch:  36/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.183226 Top1: 94.081757 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  36/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.135868 Top1: 95.323105 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  37/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.180837 Top1: 94.166046 ]  3239/3239 [1;32;40m[ 100% ][0m
[1;32;40mVal   Epoch:  37/40 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>][0m  [Current: Loss 0.143950 Top1: 95.297958 ]  199/199 [1;32;40m[ 100% ][0m
[1;32;40mTrain Epoch:  38/40 [>>>>>>>>>>>>>>>>>>>>>>                            ][0m  [Current: Loss 0.179887 Top1: 94.198578 ]  1445/3239 [1;32;40m[  44% ][0m