In [1]:
import os
import sys
import time
import glob
import numpy as np
import torch
import utils
import logging
import argparse
import torch.nn as nn
import torch.utils
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn

from torch.autograd import Variable
from model_search import Network
from architect import Architect

import genotypes

import copy
from default_option import TrainOptions

parser = argparse.ArgumentParser("cifar")
parser.add_argument('--data', type=str, default='../data', help='location of the data corpus')
parser.add_argument('--batch_size', type=int, default=32, help='batch size')
parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate')
parser.add_argument('--learning_rate_min', type=float, default=0.001, help='min learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
parser.add_argument('--weight_decay', type=float, default=3e-3, help='weight decay')
parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
parser.add_argument('--gpu', type=int, default=1, help='gpu device id')
parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
parser.add_argument('--init_channels', type=int, default=16, help='num of init channels')
parser.add_argument('--layers', type=int, default=8, help='total number of layers')
parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
parser.add_argument('--drop_path_prob', type=float, default=0.3, help='drop path probability')
parser.add_argument('--save', type=str, default='EXP', help='experiment name')
parser.add_argument('--seed', type=int, default=100, help='random seed')
parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
parser.add_argument('--unrolled', action='store_true', default=f, help='use one-step unrolled validation loss')
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
args = parser.parse_args(args=[])

args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
    format=log_format, datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CIFAR_CLASSES = 10

def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)
        input = Variable(input, requires_grad=False).cuda()
        target = Variable(target, requires_grad=False).cuda(async=True)
        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = Variable(input_search, requires_grad=False).cuda()
        target_search = Variable(target_search, requires_grad=False).cuda(async=True)
        architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)
        optimizer.zero_grad()
        logits = model(input)
        loss = criterion(logits, target)
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), args.grad_clip)
        optimizer.step()
        
        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)
        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

    return top1.avg, objs.avg


def infer(valid_queue, model, criterion):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model.eval()
    for step, (input, target) in enumerate(valid_queue):
        input = Variable(input, volatile=True).cuda()
        target = Variable(target, volatile=True).cuda(async=True)
        logits = model(input)
        loss = criterion(logits, target)
        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

    return top1.avg, objs.avg

def random_arch_generate():
    num_ops = len(genotypes.PRIMITIVES)
    n_nodes = 4####model._step

    arch_gene = []
    for i in range(n_nodes):
        ops = np.random.choice(range(num_ops), 2)
        nodes_in_normal = np.random.choice(range(i+2), 2)##############################modify
        arch_gene.extend([(ops[0],nodes_in_normal[0]), (ops[1],nodes_in_normal[1])])
    return arch_gene  

def get_weights_from_arch(arch_comb):
    k = sum(1 for i in range(model._steps) for n in range(2+i))
    num_ops = len(genotypes.PRIMITIVES)
    n_nodes = model._steps

    alphas_normal = Variable(torch.zeros(k, num_ops).cuda(), requires_grad=False)
    alphas_reduce = Variable(torch.zeros(k, num_ops).cuda(), requires_grad=False)

    offset = 0
    for i in range(n_nodes):
        normal1 = np.int_(arch_comb[0][2*i])
        normal2 = np.int_(arch_comb[0][2*i+1])
        reduce1 = np.int_(arch_comb[1][2*i])
        reduce2 = np.int_(arch_comb[1][2*i+1])
        alphas_normal[offset+normal1[1],normal1[0]] = 1
        alphas_normal[offset+normal2[1],normal2[0]] = 1
        alphas_reduce[offset+reduce1[1],reduce1[0]] = 1
        alphas_reduce[offset+reduce2[1],reduce2[0]] = 1
        offset += (i+2)

    model_weights = [
      alphas_normal,
      alphas_reduce,
    ]
    return model_weights

def set_model_weights(model, weights):
    model.alphas_normal = weights[0]
    model.alphas_reduce = weights[1]
    model._arch_parameters = [model.alphas_normal, model.alphas_reduce]
    return model

def infer_val(valid_queue, model,arch_gen_compa, criterion):
    
    arch_param_save=model.arch_parameters()
    model_weights=get_weights_from_arch(arch_gen_compa)        ###########################
    model_save=set_model_weights(model,model_weights)#############
   
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model_save.eval() 

    for step, (input, target) in enumerate(valid_queue):
        input = Variable(input, volatile=True).cuda()
        target = Variable(target, volatile=True).cuda(async=True)

        logits = model_save(input)
        loss = criterion(logits, target)

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
    model=set_model_weights(model,arch_param_save)###########################set back
    return top1.avg, objs.avg

Experiment dir : search-EXP-20200710-181910


In [None]:
if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

np.random.seed(args.seed)
torch.cuda.set_device(args.gpu)
cudnn.benchmark = True
torch.manual_seed(args.seed)
cudnn.enabled=True
torch.cuda.manual_seed(args.seed)
logging.info('gpu device = %d' % args.gpu)
logging.info("args = %s", args)

criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()

model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
model = model.cuda()
logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

optimizer = torch.optim.SGD(
    model.parameters(),
    args.learning_rate,
    momentum=args.momentum,
    weight_decay=args.weight_decay)

train_transform, valid_transform = utils._data_transforms_cifar10(args)
train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

num_train = len(train_data)
indices = list(range(num_train))
split = int(np.floor(args.train_portion * num_train)*0.2)
split_end = int(num_train*0.2)

train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=2)

valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:split_end]),
      pin_memory=True, num_workers=2)

test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

test_queue = torch.utils.data.DataLoader(
      test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
      optimizer, float(args.epochs), eta_min=args.learning_rate_min)

architect = Architect(model, args)


arch_archive=[]
arch_gen1=random_arch_generate()
arch_gen2=random_arch_generate()
#arch_gen_compa=[arch_gen_compa1,arch_gen_compa2]
#arch_gen_compa=[[(0,0),(7,1),(0,0),(6,1),(0,0),(7,3),(3,1),(5,4)],[(5,0),(7,1),(6,0),(4,0),(3,0),(7,1),(2,4),(4,2)]]
#logging.info('arch_gen_compa %f', arch_gen_compa)

#arch_gen_compa=[[(0, 0), (0, 1), (7, 1), (7, 2), (6, 3), (4, 0), (2, 3), (6, 4)], [(7, 0), (4, 1), (4, 2), (2, 1), (1, 0), (5, 1), (4, 0), (4, 2)]]

arch_gen_compa1 = [[(0, 0), (7, 1), (0, 0), (6, 1), (0, 0), (7, 3), (3, 1), (5, 4)], [(5, 0), (7, 1), (6, 0), (4, 1), (3, 0), (7, 1), (2, 4), (4, 2)]]
arch_gen_compa2 = [[(6, 0), (6, 1), (3, 1), (1, 0), (4, 1), (6, 3), (2, 4), (7, 0)], [(2, 1), (5, 0), (7, 2), (1, 0), (7, 1), (1, 3), (5, 3), (1, 0)]]
arch_gen_compa3 = [[(2, 0), (6, 1), (0, 1), (3, 2), (2, 1), (0, 3), (0, 0), (2, 3)], [(6, 1), (6, 0), (3, 0), (0, 1), (4, 2), (0, 0), (1, 4), (5, 3)]]
arch_gen_compa4 = [[(7, 0), (6, 1), (4, 1), (5, 0), (2, 1), (3, 2), (5, 2), (7, 0)], [(0, 1), (6, 0), (5, 0), (3, 2), (6, 3), (5, 1), (6, 3), (3, 4)]]


n_archive_recent=[arch_gen1]
r_archive_recent=[arch_gen2]

record_train_acc=[]
record_valid_acc=[]
record_valid_accs=[]



for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    print(F.softmax(model.alphas_normal, dim=-1))
    print(F.softmax(model.alphas_reduce, dim=-1))

    # training
    train_acc, train_obj= train(train_queue, valid_queue, model, architect, criterion, optimizer, lr)
    logging.info('train_acc %f', train_acc)

    # validation    
    valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', train_acc)        
    

    # validation seperate architecture
    valid_acc1, valid_obj = infer_val(valid_queue, model,arch_gen_compa1, criterion)
    logging.info('valid_acc1 %f', valid_acc1)
    
    
    valid_acc2, valid_obj = infer_val(valid_queue, model,arch_gen_compa2, criterion)
    logging.info('valid_acc2 %f', valid_acc2) 
    
    valid_acc3, valid_obj = infer_val(valid_queue, model,arch_gen_compa3, criterion)
    logging.info('valid_acc3 %f', valid_acc3)
    
    valid_acc4, valid_obj = infer_val(valid_queue, model,arch_gen_compa4, criterion)
    logging.info('valid_acc4 %f', valid_acc4)
    
    
    record_train_acc.extend([train_acc])
    record_valid_acc.extend([[valid_acc]])
    record_valid_accs.extend([[valid_acc1,valid_acc2,valid_acc3,valid_acc4]])
    
    
file=open('record_train_acc_DARTS.txt','w')    
file.write(str(record_train_acc))
file.close()
                             
file=open('record_valid_accs_DARTS.txt','w')    
file.write(str(record_valid_accs))
file.close()                             
                             
file=open('record_valid_acc_DARTS.txt','w')    
file.write(str(record_valid_acc))
file.close()                             

   # utils.save(model, os.path.join(args.save, 'weights.pt'))

07/10 06:19:10 PM gpu device = 1
07/10 06:19:10 PM args = Namespace(arch_learning_rate=0.0003, arch_weight_decay=0.001, batch_size=32, cutout=False, cutout_length=16, data='../data', drop_path_prob=0.3, epochs=50, gpu=1, grad_clip=5, init_channels=16, layers=8, learning_rate=0.025, learning_rate_min=0.001, model_path='saved_models', momentum=0.9, report_freq=50, save='search-EXP-20200710-181910', seed=100, train_portion=0.5, unrolled=True, weight_decay=0.003)
07/10 06:19:13 PM param size = 1.930618MB
Files already downloaded and verified
Files already downloaded and verified
07/10 06:19:15 PM epoch 0 lr 2.495266e-02
07/10 06:19:15 PM genotype = Genotype(normal=[('avg_pool_3x3', 0), ('dil_conv_5x5', 1), ('sep_conv_5x5', 0), ('dil_conv_3x3', 2), ('avg_pool_3x3', 0), ('avg_pool_3x3', 2), ('sep_conv_3x3', 4), ('sep_conv_5x5', 2)], normal_concat=range(2, 6), reduce=[('avg_pool_3x3', 1), ('dil_conv_5x5', 0), ('avg_pool_3x3', 0), ('dil_conv_5x5', 1), ('skip_connect', 3), ('avg_pool_3x3', 1), 



07/10 06:19:21 PM train 000 2.239793e+00 15.625000 68.750000




07/10 06:23:45 PM train 050 2.164941e+00 23.529413 73.039215
07/10 06:28:03 PM train 100 2.041115e+00 25.959158 77.413368
07/10 06:32:26 PM train 150 1.972051e+00 27.649006 79.925499
07/10 06:32:57 PM train_acc 27.980000
07/10 06:32:57 PM valid 000 1.920506e+00 25.000000 87.500000




07/10 06:33:06 PM valid 050 1.877861e+00 31.372551 85.723045
07/10 06:33:14 PM valid 100 1.862454e+00 32.518562 85.581680
07/10 06:33:22 PM valid 150 1.874344e+00 32.243378 85.326988
07/10 06:33:23 PM valid_acc 27.980000
07/10 06:33:24 PM valid 000 1.964596e+00 18.750000 84.375000




07/10 06:33:31 PM valid 050 1.929882e+00 28.370098 82.230392
07/10 06:33:39 PM valid 100 1.924605e+00 28.186880 81.961632
07/10 06:33:47 PM valid 150 1.926490e+00 28.166391 81.332779
07/10 06:33:48 PM valid_acc1 28.199999
07/10 06:33:48 PM valid 000 2.289657e+00 18.750000 84.375000
07/10 06:33:56 PM valid 050 2.032436e+00 29.840687 83.639709
07/10 06:34:04 PM valid 100 2.036143e+00 30.352722 84.003708
07/10 06:34:12 PM valid 150 2.029311e+00 30.070364 83.795532
07/10 06:34:13 PM valid_acc2 30.379999
07/10 06:34:13 PM valid 000 1.900132e+00 31.250000 87.500000
07/10 06:34:21 PM valid 050 2.001703e+00 30.024511 84.803925
07/10 06:34:29 PM valid 100 2.010486e+00 29.486385 83.879951
07/10 06:34:37 PM valid 150 2.011057e+00 29.573675 83.981789
07/10 06:34:38 PM valid_acc3 29.500000
07/10 06:34:38 PM valid 000 1.704269e+00 28.125000 87.500000
07/10 06:34:46 PM valid 050 1.985026e+00 28.308825 78.615196
07/10 06:34:54 PM valid 100 1.997343e+00 27.382425 78.743813
07/10 06:35:02 PM valid 150 1

07/10 06:59:46 PM train 100 1.601608e+00 40.655941 89.944305
07/10 07:04:08 PM train 150 1.596171e+00 41.245861 90.169701
07/10 07:04:40 PM train_acc 41.520000
07/10 07:04:41 PM valid 000 1.946936e+00 37.500000 84.375000
07/10 07:04:49 PM valid 050 1.512204e+00 43.259804 92.034317
07/10 07:04:57 PM valid 100 1.551858e+00 42.048267 91.491333
07/10 07:05:05 PM valid 150 1.555942e+00 42.591061 91.452812
07/10 07:05:06 PM valid_acc 41.520000
07/10 07:05:06 PM valid 000 1.786620e+00 28.125000 87.500000
07/10 07:05:14 PM valid 050 1.748761e+00 35.845589 86.764709
07/10 07:05:22 PM valid 100 1.757464e+00 35.210396 86.571777
07/10 07:05:30 PM valid 150 1.752637e+00 35.347683 86.713577
07/10 07:05:31 PM valid_acc1 35.160000
07/10 07:05:31 PM valid 000 1.789545e+00 37.500000 90.625000
07/10 07:05:39 PM valid 050 1.523920e+00 45.098042 91.789223
07/10 07:05:47 PM valid 100 1.547313e+00 43.966583 91.522278
07/10 07:05:55 PM valid 150 1.535993e+00 44.391556 91.370033
07/10 07:05:56 PM valid_acc2 44

07/10 07:22:51 PM train 000 1.366883e+00 43.750000 90.625000
07/10 07:27:13 PM train 050 1.427678e+00 47.855392 92.524513
07/10 07:31:42 PM train 100 1.393295e+00 49.071781 92.698021
07/10 07:36:05 PM train 150 1.387664e+00 48.820366 93.046356
07/10 07:36:36 PM train_acc 48.639999
07/10 07:36:37 PM valid 000 1.661203e+00 43.750000 87.500000
07/10 07:36:45 PM valid 050 1.523232e+00 44.240196 90.808830
07/10 07:36:53 PM valid 100 1.530440e+00 44.863861 90.594055
07/10 07:37:01 PM valid 150 1.515133e+00 45.509106 91.142387
07/10 07:37:02 PM valid_acc 48.639999
07/10 07:37:02 PM valid 000 1.743926e+00 37.500000 78.125000
07/10 07:37:10 PM valid 050 1.765729e+00 35.416668 85.232849
07/10 07:37:18 PM valid 100 1.763617e+00 35.365097 84.746284
07/10 07:37:26 PM valid 150 1.773552e+00 35.264900 84.768211
07/10 07:37:27 PM valid_acc1 35.299999
07/10 07:37:27 PM valid 000 1.246560e+00 56.250000 96.875000
07/10 07:37:36 PM valid 050 1.497644e+00 48.345589 92.892159
07/10 07:37:46 PM valid 100 1.5

In [None]:
valid_acc1, valid_obj = infer_val(valid_queue, model,arch_gen_compa1, criterion)