In [1]:
import os
import sys
import time
import glob
import numpy as np
import torch
import utils
import logging
import argparse
import torch.nn as nn
import torch.utils
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn

from torch.autograd import Variable
from model_search import Network
from architect import Architect

import genotypes


import copy


parser = argparse.ArgumentParser("cifar")
parser.add_argument('--data', type=str, default='../data', help='location of the data corpus')
parser.add_argument('--batch_size', type=int, default=48, help='batch size')
parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate')
parser.add_argument('--learning_rate_min', type=float, default=0.001, help='min learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay')
parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
parser.add_argument('--init_channels', type=int, default=16, help='num of init channels')
parser.add_argument('--layers', type=int, default=8, help='total number of layers')
parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
parser.add_argument('--drop_path_prob', type=float, default=0.3, help='drop path probability')
parser.add_argument('--save', type=str, default='EXP', help='experiment name')
parser.add_argument('--seed', type=int, default=2, help='random seed')
parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
parser.add_argument('--unrolled', action='store_true', default=True, help='use one-step unrolled validation loss')
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
args = parser.parse_args(args=[])

args.save = 'EA_search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
    format=log_format, datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CIFAR_CLASSES = 10

Experiment dir : EA_search-EXP-20191021-220816


In [2]:
def random_arch_generate():
    num_ops = len(genotypes.PRIMITIVES)
    n_nodes = 4####model._step

    arch_gene = []
    for i in range(n_nodes):
        ops = np.random.choice(range(num_ops), 2)
        nodes_in_normal = np.random.choice(range(i+2), 2, replace=False)
        arch_gene.extend([(ops[0],nodes_in_normal[0]), (ops[1],nodes_in_normal[1])])
    return arch_gene  

def get_weights_from_arch(arch_comb):
    k = sum(1 for i in range(model._steps) for n in range(2+i))
    num_ops = len(genotypes.PRIMITIVES)
    n_nodes = model._steps

    alphas_normal = Variable(torch.zeros(k, num_ops).cuda(), requires_grad=False)
    alphas_reduce = Variable(torch.zeros(k, num_ops).cuda(), requires_grad=False)

    offset = 0
    for i in range(n_nodes):
        normal1 = np.int_(arch_comb[0][2*i])
        normal2 = np.int_(arch_comb[0][2*i+1])
        reduce1 = np.int_(arch_comb[1][2*i])
        reduce2 = np.int_(arch_comb[1][2*i+1])
        alphas_normal[offset+normal1[1],normal1[0]] = 1
        alphas_normal[offset+normal2[1],normal2[0]] = 1
        alphas_reduce[offset+reduce1[1],reduce1[0]] = 1
        alphas_reduce[offset+reduce2[1],reduce2[0]] = 1
        offset += (i+2)

    model_weights = [
      alphas_normal,
      alphas_reduce,
    ]
    return model_weights


def set_model_weights(model, weights):
    model.alphas_normal = weights[0]
    model.alphas_reduce = weights[1]
    model._arch_parameters = [model.alphas_normal, model.alphas_reduce]
    return model



def train(train_queue, valid_queue, test_queue, model, architect, arch_archive, criterion, optimizer, lr):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    for step, (input, target) in enumerate(train_queue):
        #model_save=copy.deepcopy(model)
        model.train()
        #premodel.train
        n = input.size(0)

        input = Variable(input, requires_grad=False).cuda()
        target = Variable(target, requires_grad=False).cuda(async=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = Variable(input_search, requires_grad=False).cuda()
        target_search = Variable(target_search, requires_grad=False).cuda(async=True)
        

        arch_gene=random_arch_generate
        model_weights=get_weights_from_arch(arch_gene)        
        model=set_model_weights(model,model_weights)#####        
        
                       
        logits = model(input)
        
        loss=criterion(logits, target)
                
        
        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), args.grad_clip)
        optimizer.step()
        
        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)


        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

    return top1.avg, objs.avg



def infer(valid_queue, model,arch_gen_compa, criterion):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model.eval()
    
    model_weights=get_weights_from_arch(arch_gen_compa)        ###########################
    model=set_model_weights(model,model_weights)#############
    
    

    for step, (input, target) in enumerate(valid_queue):
        input = Variable(input, volatile=True).cuda()
        target = Variable(target, volatile=True).cuda(async=True)

        logits = model(input)
        loss = criterion(logits, target)

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

    valid_acc=top1.avg
    
    return valid_acc.cpu().numpy()

In [3]:
if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

np.random.seed(args.seed)
torch.cuda.set_device(args.gpu)
cudnn.benchmark = True
torch.manual_seed(args.seed)
cudnn.enabled=True
torch.cuda.manual_seed(args.seed)
logging.info('gpu device = %d' % args.gpu)
logging.info("args = %s", args)

criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()

model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
model = model.cuda()
logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

optimizer = torch.optim.SGD(
    model.parameters(),
    args.learning_rate,
    momentum=args.momentum,
    weight_decay=args.weight_decay)

10/21 10:08:16 PM gpu device = 0
10/21 10:08:16 PM args = Namespace(arch_learning_rate=0.0003, arch_weight_decay=0.001, batch_size=48, cutout=False, cutout_length=16, data='../data', drop_path_prob=0.3, epochs=50, gpu=0, grad_clip=5, init_channels=16, layers=8, learning_rate=0.025, learning_rate_min=0.001, model_path='saved_models', momentum=0.9, report_freq=50, save='EA_search-EXP-20191021-220816', seed=2, train_portion=0.5, unrolled=True, weight_decay=0.0003)
10/21 10:08:18 PM param size = 1.930618MB


In [4]:
train_transform, valid_transform = utils._data_transforms_cifar10(args)
train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

num_train = len(train_data)
indices = list(range(num_train))
split = int(np.floor(args.train_portion* num_train))###

train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=2)

valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
      pin_memory=True, num_workers=2)

test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

test_queue = torch.utils.data.DataLoader(
      test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
      optimizer, float(args.epochs), eta_min=args.learning_rate_min)

architect = Architect(model, args)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
#load the trained supernet model

model.load_state_dict(torch.load('/data/jiahzhao/NSAS/lifelong-randomnas/cnn/CL_CNN_SUPERNET/randomNAS_CL_supernet/randomNAS_CL_constraint.pt'))

In [6]:
def EA_arch_search(model,num_pop,num_ite,num_cross,num_mutation):

    def get_init_pop(num_pop,n_nodes):
        pop=np.empty((num_pop,8*n_nodes))
        fitness=np.zeros((num_pop,))
        for m in range(num_pop):         
            num_ops = len(genotypes.PRIMITIVES)
            normal = []
            reduction = []
            for i in range(n_nodes):
                ops = np.random.choice(range(num_ops), 4)
                nodes_in_normal = np.random.choice(range(i+2), 2, replace=False)
                nodes_in_reduce = np.random.choice(range(i+2), 2, replace=False)
                normal.extend([(ops[0],nodes_in_normal[0]), (ops[1],nodes_in_normal[1])])
                reduction.extend([(ops[2],nodes_in_reduce[0]), (ops[3],nodes_in_reduce[1])])
                pop[m,4*i]=ops[0]
                pop[m,4*i+1]=nodes_in_normal[0]
                pop[m,4*i+2]=ops[1]
                pop[m,4*i+3]=nodes_in_normal[1]
                pop[m,4*i+4*n_nodes]=ops[2]
                pop[m,4*i+1+4*n_nodes]=nodes_in_reduce[0]
                pop[m,4*i+2+4*n_nodes]=ops[3]
                pop[m,4*i+3+4*n_nodes]=nodes_in_reduce[1]      
            arch=[normal, reduction]
            fitness[m,]=infer(valid_queue, model,arch, criterion)     
        return pop,fitness


    def corssover(pop,fitness,num_cross):
        index=np.argsort(fitness)
        pop_select=pop[index[0:num_cross],]


        inde_cross=np.arange(num_cross)
        np.random.shuffle(inde_cross)
        pop_select=pop_select[inde_cross,]
        pop_cross=np.empty((num_cross,pop.shape[1]))


        for i in range(np.int(num_cross/2)):
            cross1=pop_select[2*i,]
            cross2=pop_select[2*i+1,]

            cross_points=np.arange(4*4)##self.model.model._steps
            np.random.shuffle(cross_points)
            cross_points=cross_points[0:2]
            cross_points=np.sort(cross_points)
            p1=2*cross_points[0]
            p2=2*cross_points[1]

            cross1_=cross1
            cross2_=cross2

            cross1_[p1:p2]=cross2[p1:p2]
            cross2_[p1:p2]=cross1[p1:p2]

            pop_cross[2*i,]= cross1_       
            pop_cross[2*i+1,]= cross2_   

        return pop_cross


    def mutation(pop,fitness,num_mutation):
        index=np.argsort(fitness)
        pop_select=pop[index[0:num_mutation],]
        pop_mutation=np.empty((num_mutation,pop.shape[1]))
        num_ops = len(genotypes.PRIMITIVES)


        for i in range(num_mutation):
            pop_mutation[i,]=pop_select[i,]

            for j in range(pop.shape[1]):
                if j>((pop.shape[1])/2-1):
                    q=j-(pop.shape[1])/2
                else:
                    q=j
                m=q//4+2
                if np.random.rand()<0.2:#################genes with mutation probability 0.2
                    if j%2==0:
                        pop_mutation[i,j]=np.random.randint(num_ops)
                    else:
                        pop_mutation[i,j]=np.random.randint(m)            
        return pop_mutation


    def get_fitness(pop):
        num_pop=pop.shape[0]
        fitness=np.zeros((num_pop))
        for m in range(num_pop):
            indiv=pop[m,]
            normal=[]
            reduction=[]
            for i in range(4):########self.model.model._steps
                s1=np.int(indiv[4*i,])
                s2=np.int(indiv[4*i+1,])
                s3=np.int(indiv[4*i+2,])
                s4=np.int(indiv[4*i+3,])
                s5=np.int(indiv[4*i+16,])
                s6=np.int(indiv[4*i+1+16,])
                s7=np.int(indiv[4*i+2+16,])
                s8=np.int(indiv[4*i+3+16,])
                normal.extend([(s1,s2), (s3,s4)])
                reduction.extend([(s5,s6), (s7,s8)]) 
            arch=[normal, reduction]
            fitness[m,]=infer(valid_queue, model,arch, criterion)

        return fitness
    
    def regulize_pop(pop):
        num_pop=pop.shape[0]
        fitness=np.zeros((num_pop))
        for m in range(num_pop):
            
            for j in range(8):
                if j>3:
                    kk=j-4
                else:
                    kk=j
                while pop[m][4*j+1]==pop[m][4*j+3]:                   
                    pop[m][4*j+3]=np.random.randint(kk+2)       
        return pop   
    
    
    

    n_nodes = 4######self.model.model._steps    

    pop,fitness=get_init_pop(num_pop,n_nodes)

    for it in range(num_ite):
        pop_cross=corssover(pop,fitness,num_cross)
        fitness_cross=get_fitness(pop_cross)
        pop_mutate=mutation(pop,fitness,num_mutation)
        fitness_mutate=get_fitness(pop_mutate) 
        pop_comb=np.concatenate((pop,pop_cross,pop_mutate),axis=0)
        fitness_comb=np.concatenate((fitness,fitness_cross,fitness_mutate),axis=0)
        index=np.argsort(fitness_comb)
        pop_comb=pop_comb[index,]
        pop=pop_comb[0:num_pop,]
        fitness=fitness_comb[0:num_pop,]

    index=np.argsort(fitness)
    
    best_arch=[]
    
    for b in range(2):
        indi_final=pop[index[b],]

        normal = []
        normal_struc=[]
        reduction = []
        reduction_struc=[]
        for i in range(4):####self.model.model._steps

            s1=np.int(indi_final[4*i,])
            s2=np.int(indi_final[4*i+1,])
            s3=np.int(indi_final[4*i+2,])
            s4=np.int(indi_final[4*i+3,])
            s5=np.int(indi_final[4*i+16,])
            s6=np.int(indi_final[4*i+1+16,])
            s7=np.int(indi_final[4*i+2+16,])
            s8=np.int(indi_final[4*i+3+16,])
            normal.extend([(s1,s2), (s3,s4)])
            normal_struc.append((genotypes.PRIMITIVES[s1],s2))
            normal_struc.append((genotypes.PRIMITIVES[s3],s4))

            reduction.extend([(s5,s6), (s7,s8)])            
            reduction_struc.append((genotypes.PRIMITIVES[s5],s6))
            reduction_struc.append((genotypes.PRIMITIVES[s7],s8))

        concat = range(2, 6)
        genotype = genotypes.Genotype(normal=normal_struc, normal_concat=concat,reduce=reduction_struc, reduce_concat=concat)
        best_arch.append(genotype)

    return best_arch

In [None]:
p=EA_arch_search(model,100,50,60,40)