In [44]:
import os
import sys
import time
import glob
import numpy as np
import torch
import utils
import logging
import argparse
import torch.nn as nn
import torch.utils
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn

from torch.autograd import Variable
from model_search import Network
from architect import Architect
import genotypes

parser = argparse.ArgumentParser("cifar")
parser.add_argument('--data', type=str, default='../data', help='location of the data corpus')
parser.add_argument('--batch_size', type=int, default=64, help='batch size')
parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate')
parser.add_argument('--learning_rate_min', type=float, default=0.001, help='min learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay')
parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
parser.add_argument('--init_channels', type=int, default=16, help='num of init channels')
parser.add_argument('--layers', type=int, default=8, help='total number of layers')
parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
parser.add_argument('--drop_path_prob', type=float, default=0.3, help='drop path probability')
parser.add_argument('--save', type=str, default='EXP', help='experiment name')
parser.add_argument('--seed', type=int, default=2, help='random seed')
parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
parser.add_argument('--unrolled', action='store_true', default=True, help='use one-step unrolled validation loss')
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
args = parser.parse_args(args=[])

args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
    format=log_format, datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)


CIFAR_CLASSES = 10

Experiment dir : search-EXP-20190702-195936


In [45]:
if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

In [46]:
np.random.seed(args.seed)
torch.cuda.set_device(args.gpu)
cudnn.benchmark = True
torch.manual_seed(args.seed)
cudnn.enabled=True
torch.cuda.manual_seed(args.seed)
logging.info('gpu device = %d' % args.gpu)
logging.info("args = %s", args)

criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()
model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
model = model.cuda()
logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

optimizer = torch.optim.SGD(
  model.parameters(),
  args.learning_rate,
  momentum=args.momentum,
  weight_decay=args.weight_decay)

train_transform, valid_transform = utils._data_transforms_cifar10(args)
train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

num_train = len(train_data)
indices = list(range(num_train))
split = int(np.floor(args.train_portion * num_train))

train_queue = torch.utils.data.DataLoader(
  train_data, batch_size=args.batch_size,
  sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
  pin_memory=True, num_workers=2)

valid_queue = torch.utils.data.DataLoader(
  train_data, batch_size=args.batch_size,
  sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
  pin_memory=True, num_workers=2)

train_iter = iter(train_queue)
valid_iter = iter(valid_queue)



scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, float(args.epochs), eta_min=args.learning_rate_min)

architect = Architect(model, args)

07/02 07:59:46 PM gpu device = 0
07/02 07:59:46 PM args = Namespace(arch_learning_rate=0.0003, arch_weight_decay=0.001, batch_size=64, cutout=False, cutout_length=16, data='../data', drop_path_prob=0.3, epochs=50, gpu=0, grad_clip=5, init_channels=16, layers=8, learning_rate=0.025, learning_rate_min=0.001, model_path='saved_models', momentum=0.9, report_freq=50, save='search-EXP-20190702-195936', seed=2, train_portion=0.5, unrolled=True, weight_decay=0.0003)
07/02 07:59:47 PM param size = 1.930618MB
Files already downloaded and verified


In [47]:
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = Variable(input, requires_grad=False).cuda()
        target = Variable(target, requires_grad=False).cuda(async=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = Variable(input_search, requires_grad=False).cuda()
        target_search = Variable(target_search, requires_grad=False).cuda(async=True)

        architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)

        optimizer.zero_grad()
        logits = model(input)
        loss = criterion(logits, target)

        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), args.grad_clip)
        optimizer.step()

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

    return top1.avg, objs.avg


In [9]:
for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)
    
    train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr)
    utils.save(model, os.path.join(args.save, 'weights.pt'))



07/01 03:02:36 PM epoch 1 lr 2.490538e-02
07/01 03:02:36 PM genotype = Genotype(normal=[('avg_pool_3x3', 0), ('dil_conv_5x5', 1), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 1), ('avg_pool_3x3', 0), ('dil_conv_5x5', 1), ('avg_pool_3x3', 0)], normal_concat=range(2, 6), reduce=[('avg_pool_3x3', 1), ('avg_pool_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 2), ('sep_conv_3x3', 2), ('avg_pool_3x3', 3), ('max_pool_3x3', 4), ('dil_conv_5x5', 0)], reduce_concat=range(2, 6))
tensor([[0.1249, 0.1249, 0.1252, 0.1251, 0.1250, 0.1250, 0.1250, 0.1249],
        [0.1250, 0.1248, 0.1251, 0.1250, 0.1250, 0.1250, 0.1251, 0.1251],
        [0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1251, 0.1249],
        [0.1249, 0.1249, 0.1249, 0.1250, 0.1249, 0.1250, 0.1253, 0.1251],
        [0.1249, 0.1251, 0.1251, 0.1250, 0.1249, 0.1249, 0.1249, 0.1251],
        [0.1250, 0.1250, 0.1252, 0.1251, 0.1249, 0.1249, 0.1250, 0.1249],
        [0.1249, 0.1253, 0.1250, 0.1248, 0.1248, 0.1251, 0.1251, 0.1

07/01 03:03:45 PM train 000 2.484944e+00 18.750000 43.750000




07/01 03:07:58 PM train 050 2.371466e+00 19.362745 69.730392
07/01 03:12:13 PM train 100 2.212683e+00 21.782177 74.195541
07/01 03:16:25 PM train 150 2.151035e+00 22.392384 76.117546
07/01 03:20:39 PM train 200 2.076057e+00 24.347013 78.451492
07/01 03:24:52 PM train 250 2.044914e+00 24.327690 79.830681
07/01 03:29:05 PM train 300 2.015373e+00 25.373755 80.398666
07/01 03:33:17 PM train 350 1.986691e+00 26.371082 81.303421
07/01 03:37:29 PM train 400 1.957622e+00 27.462595 81.857857
07/01 03:41:42 PM train 450 1.940889e+00 27.896341 82.026054
07/01 03:45:56 PM train 500 1.922935e+00 28.468063 82.435127
07/01 03:50:09 PM train 550 1.898903e+00 29.378403 83.076225
07/01 03:54:23 PM train 600 1.880595e+00 30.147669 83.381859
07/01 03:58:37 PM train 650 1.868862e+00 30.741167 83.563751
07/01 04:02:50 PM train 700 1.851412e+00 31.339159 83.978249
07/01 04:07:04 PM train 750 1.838183e+00 31.849199 84.287613
07/01 04:11:18 PM train 800 1.821393e+00 32.529652 84.683205
07/01 04:15:30 PM train 

In [27]:
def get_weights_from_arch(model, arch):
    k = sum(1 for i in range(model._steps) for n in range(2+i))
    num_ops = len(genotypes.PRIMITIVES)
    n_nodes = model._steps

    alphas_normal = Variable(torch.zeros(k, num_ops).cuda(), requires_grad=False)
    alphas_reduce = Variable(torch.zeros(k, num_ops).cuda(), requires_grad=False)

    offset = 0
    for i in range(n_nodes):
        normal1 = arch[0][2*i]
        normal2 = arch[0][2*i+1]
        reduce1 = arch[1][2*i]
        reduce2 = arch[1][2*i+1]
        alphas_normal[offset+normal1[0], normal1[1]] = 1
        alphas_normal[offset+normal2[0], normal2[1]] = 1
        alphas_reduce[offset+reduce1[0], reduce1[1]] = 1
        alphas_reduce[offset+reduce2[0], reduce2[1]] = 1
        offset += (i+2)

    arch_parameters = [
      alphas_normal,
      alphas_reduce,
    ]
    return arch_parameters


def set_model_weights(model, weights):
    model.alphas_normal = weights[0]
    model.alphas_reduce = weights[1]
    model._arch_parameters = [model.alphas_normal, model.alphas_reduce]
    
def random_sample_arch(model):
    k = sum(1 for i in range(model._steps) for n in range(2+i))
    num_ops = len(genotypes.PRIMITIVES)
    n_nodes = model._steps

    normal = []
    reduction = []
    for i in range(n_nodes):
        ops = np.random.choice(range(num_ops), 4)
        nodes_in_normal = np.random.choice(range(i+2), 2, replace=False)
        nodes_in_reduce = np.random.choice(range(i+2), 2, replace=False)
        normal.extend([(nodes_in_normal[0], ops[0]), (nodes_in_normal[1], ops[1])])
        reduction.extend([(nodes_in_reduce[0], ops[2]), (nodes_in_reduce[1], ops[3])])
    return (normal, reduction)        
    
    


In [30]:
def evaluate(model,arch,valid_queue,valid_iter):
  # Return error since we want to minimize obj val
    logging.info(arch)
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    weights = get_weights_from_arch(model,arch)
    set_model_weights(model,weights)

    model.eval()


    n_batches = len(valid_queue)

    for step in range(n_batches):
        try:
            input, target = next(valid_iter)
        except Exception as e:
            logging.info('looping back over valid set')
            valid_iter = iter(valid_queue)
            input, target = next(valid_iter)
        input = Variable(input).cuda()
        target = Variable(target).cuda(async=True)

        logits = model(input)
        loss = criterion(logits, target)

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)


    return 1-(top1.avg)/100




#data_size = 929589
#ime_steps = 35
data_size = 25000
time_steps = 1
B = int(args.epochs * data_size / args.batch_size / time_steps)   

def random_selection(model,valid_queue,valid_iter, rounds=None):
    #n_rounds = int(self.B / 7 / 1000)
    if rounds is None:
        n_rounds = max(1,int(B/10000))
    else:
        n_rounds = rounds
    best_rounds = []
    for r in range(n_rounds):
        sample_vals = []
        for _ in range(1000):
            arch = random_sample_arch(model)
            try:
                ppl = evaluate(model,arch,valid_queue,valid_iter)
            except Exception as e:
                ppl = 1000000
            logging.info(arch)
            logging.info('objective_val: %.3f' % ppl)
            sample_vals.append((arch, ppl))
        sample_vals = sorted(sample_vals, key=lambda x:x[1])

        full_vals = []
        best_rounds.append(sample_vals[0])
    return best_rounds


In [31]:
def get_init_pop(model,num_pop,n_nodes,valid_queue,valid_iter):
    pop=np.empty((num_pop,8*n_nodes))
    fitness=np.zeros((num_pop,))
    for m in range(num_pop):         
        k = sum(1 for i in range(model._steps) for n in range(2+i))
        num_ops = len(genotypes.PRIMITIVES)
        normal = []
        reduction = []
        for i in range(n_nodes):
            ops = np.random.choice(range(num_ops), 4)
            nodes_in_normal = np.random.choice(range(i+2), 2, replace=False)
            nodes_in_reduce = np.random.choice(range(i+2), 2, replace=False)
            normal.extend([(nodes_in_normal[0], ops[0]), (nodes_in_normal[1], ops[1])])
            reduction.extend([(nodes_in_reduce[0], ops[2]), (nodes_in_reduce[1], ops[3])])
            pop[m,4*i]=nodes_in_normal[0]
            pop[m,4*i+1]=ops[0]
            pop[m,4*i+2]=nodes_in_normal[1]
            pop[m,4*i+3]=ops[1]
            pop[m,4*i+4*n_nodes]=nodes_in_reduce[0]
            pop[m,4*i+1+4*n_nodes]=ops[2]
            pop[m,4*i+2+4*n_nodes]=nodes_in_reduce[1]
            pop[m,4*i+3+4*n_nodes]=ops[3]                            
        arch=(normal, reduction) 
        fitness[m,]=evaluate(model,arch,valid_queue,valid_iter)
    return pop,fitness

In [32]:
n_nodes = model._steps    
archs =random_selection(model,valid_queue,valid_iter)

07/01 08:05:40 PM ([(1, 6), (0, 3), (2, 2), (1, 3), (0, 5), (3, 0), (0, 2), (1, 7)], [(1, 4), (0, 5), (0, 3), (2, 0), (0, 0), (3, 6), (3, 3), (4, 7)])
07/01 08:05:40 PM looping back over valid set
07/01 08:09:05 PM ([(1, 6), (0, 3), (2, 2), (1, 3), (0, 5), (3, 0), (0, 2), (1, 7)], [(1, 4), (0, 5), (0, 3), (2, 0), (0, 0), (3, 6), (3, 3), (4, 7)])
07/01 08:09:05 PM objective_val: 0.510
07/01 08:09:05 PM ([(1, 3), (0, 0), (1, 4), (0, 4), (1, 1), (0, 1), (1, 4), (0, 6)], [(0, 6), (1, 0), (0, 7), (2, 3), (2, 2), (0, 1), (2, 3), (1, 1)])
07/01 08:09:05 PM looping back over valid set
07/01 08:12:33 PM ([(1, 3), (0, 0), (1, 4), (0, 4), (1, 1), (0, 1), (1, 4), (0, 6)], [(0, 6), (1, 0), (0, 7), (2, 3), (2, 2), (0, 1), (2, 3), (1, 1)])
07/01 08:12:33 PM objective_val: 0.516
07/01 08:12:33 PM ([(1, 3), (0, 2), (2, 7), (0, 3), (2, 7), (3, 3), (1, 5), (3, 1)], [(0, 2), (1, 5), (1, 5), (0, 4), (3, 1), (1, 3), (0, 7), (2, 2)])
07/01 08:12:33 PM looping back over valid set
07/01 08:15:56 PM ([(1, 3), (

In [33]:
def corssover(model,pop,fitness,num_cross):
    index=np.argsort(fitness)
    pop_select=pop[index[0:num_cross],]


    inde_cross=np.arange(num_cross)
    np.random.shuffle(inde_cross)
    pop_select=pop_select[inde_cross,]
    pop_cross=np.empty((num_cross,pop.shape[1]))


    for i in range(np.int(num_cross/2)):
        cross1=pop_select[2*i,]
        cross2=pop_select[2*i+1,]

        cross_points=np.arange(4*model._steps)
        np.random.shuffle(cross_points)
        cross_points=cross_points[0:2]
        cross_points=np.sort(cross_points)
        p1=2*cross_points[0]
        p2=2*cross_points[1]

        cross1_=cross1
        cross2_=cross2

        cross1_[p1:p2]=cross2[p1:p2]
        cross2_[p1:p2]=cross1[p1:p2]

        pop_cross[2*i,]= cross1_       
        pop_cross[2*i+1,]= cross2_   

    return pop_cross

def mutation(model,pop,fitness,num_mutation):
    index=np.argsort(fitness)
    pop_select=pop[index[0:num_mutation],]
    pop_mutation=np.empty((num_mutation,pop.shape[1]))
    num_ops = len(genotypes.PRIMITIVES)


    for i in range(num_mutation):
        pop_mutation[i,]=pop_select[i,]

        for j in range(pop.shape[1]):
            if j>((pop.shape[1])/2-1):
                q=j-(pop.shape[1])/2
            else:
                q=j
            m=q//4+2
            if np.random.rand()<0.2:#################genes with mutation probability 0.2
                if j%2==0:
                    pop_mutation[i,j]=np.random.randint(m)
                else:
                    pop_mutation[i,j]=np.random.randint(num_ops)            
    return pop_mutation


def get_fitness(model,pop):
    num_pop=pop.shape[0]
    fitness=np.zeros((num_pop))
    for m in range(num_pop):
        indiv=pop[m,]
        normal=[]
        reduction=[]
        for i in range(model._steps):
            s1=np.int(indiv[4*i,])
            s2=np.int(indiv[4*i+1,])
            s3=np.int(indiv[4*i+2,])
            s4=np.int(indiv[4*i+3,])
            s5=np.int(indiv[4*i+16,])
            s6=np.int(indiv[4*i+1+16,])
            s7=np.int(indiv[4*i+2+16,])
            s8=np.int(indiv[4*i+3+16,])
            normal.extend([(s1,s2), (s3,s4)])
            reduction.extend([(s5,s6), (s7,s8)]) 
        arch=(normal, reduction)
        fitness[m,]=evaluate(model,arch,valid_queue,valid_iter) 

    return fitness


In [42]:
def EA_arch_search(model,num_pop,num_ite,num_cross,num_mutation):
    k = sum(1 for i in range(model._steps) for n in range(2+i))
    num_ops = len(genotypes.PRIMITIVES)
    n_nodes = model._steps    

    pop,fitness=get_init_pop(model,num_pop,n_nodes,valid_queue,valid_iter)

    for it in range(num_ite):
        pop_cross=corssover(model,pop,fitness,num_cross)
        fitness_cross=get_fitness(model,pop_cross)
        pop_mutate=mutation(model,pop,fitness,num_mutation)
        fitness_mutate=get_fitness(model,pop_mutate) 
        pop_comb=np.concatenate((pop,pop_cross,pop_mutate),axis=0)
        fitness_comb=np.concatenate((fitness,fitness_cross,fitness_mutate),axis=0)
        index=np.argsort(fitness_comb)
        pop_comb=pop_comb[index,]
        pop=pop_comb[0:num_pop,]
        fitness=fitness_comb[0:num_pop,]

    index=np.argsort(fitness)
    indi_final=pop[index[0],]

    normal = []
    normal_struc=[]
    reduction = []
    reduction_struc=[]
    for i in range(model._steps):

        s1=np.int(indi_final[4*i,])
        s2=np.int(indi_final[4*i+1,])
        s3=np.int(indi_final[4*i+2,])
        s4=np.int(indi_final[4*i+3,])
        s5=np.int(indi_final[4*i+16,])
        s6=np.int(indi_final[4*i+1+16,])
        s7=np.int(indi_final[4*i+2+16,])
        s8=np.int(indi_final[4*i+3+16,])
        normal.extend([(s1,s2), (s3,s4)])
        normal_struc.append((genotypes.PRIMITIVES[s1], s2))
        normal_struc.append((genotypes.PRIMITIVES[s3], s4))

        reduction.extend([(s5,s6), (s7,s8)])            
        reduction_struc.append((genotypes.PRIMITIVES[s5], s6))
        reduction_struc.append((genotypes.PRIMITIVES[s7], s8))

    concat = range(2, model._steps+2)
    genotype = genotypes.Genotype(normal=normal_struc, normal_concat=concat,reduce=reduction_struc, reduce_concat=concat)
    best_arch=genotype

    return best_arch

In [43]:
archs = EA_arch_search(model,num_pop=4,num_ite=2,num_cross=2,num_mutation=2)

07/01 09:59:06 PM ([(1, 0), (0, 3), (0, 1), (1, 4), (1, 4), (0, 5), (0, 0), (4, 1)], [(1, 2), (0, 7), (0, 7), (2, 4), (3, 1), (0, 2), (2, 0), (0, 2)])
07/01 09:59:06 PM looping back over valid set
07/01 10:02:30 PM ([(0, 2), (1, 2), (2, 2), (0, 1), (2, 2), (3, 2), (0, 2), (2, 5)], [(1, 0), (0, 2), (0, 6), (2, 6), (3, 3), (1, 2), (4, 7), (0, 0)])
07/01 10:02:30 PM looping back over valid set
07/01 10:05:53 PM ([(0, 2), (1, 1), (2, 5), (0, 6), (3, 4), (0, 1), (4, 3), (2, 3)], [(1, 7), (0, 6), (0, 5), (2, 0), (0, 3), (2, 5), (3, 4), (4, 4)])
07/01 10:05:53 PM looping back over valid set
07/01 10:09:15 PM ([(0, 0), (1, 5), (0, 5), (2, 4), (3, 0), (1, 6), (0, 5), (1, 2)], [(1, 5), (0, 3), (1, 1), (2, 0), (3, 1), (1, 1), (1, 6), (2, 2)])
07/01 10:09:15 PM looping back over valid set
07/01 10:12:36 PM ([(1, 0), (0, 3), (0, 1), (1, 4), (1, 4), (0, 5), (0, 0), (4, 1)], [(1, 5), (0, 7), (0, 7), (2, 4), (3, 1), (0, 2), (2, 0), (0, 2)])
07/01 10:12:36 PM looping back over valid set
07/01 10:15:58 

NameError: name 'self' is not defined