In [9]:
import torch
import numpy as np
import pickle
#from torchsummary import summary
from collections import OrderedDict
from torch.utils.tensorboard import SummaryWriter
import datetime
import time
import copy
import torchvision.datasets as datasets
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=None)
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=None)

In [10]:
class DNN(torch.nn.Module):
    def __init__(self, layers):
        super(DNN, self).__init__()
        # depth of the neural network and activation function
        self.depth = len(layers)-1
        self.activation = torch.nn.ReLU
        # layers: (depth-1)x 'linear' + 'activation' + last one 'linear'
        layer_list = list()
        for i in range(self.depth-1): 
            layer_list.append(
                ('layer_%d' % i, torch.nn.Linear(layers[i], layers[i+1]))
            )
            layer_list.append(('activation_%d' % i, self.activation())) 
        layer_list.append(
            ('layer_%d' % (self.depth - 1), torch.nn.Linear(layers[-2], layers[-1]))
        )
        layerDict = OrderedDict(layer_list)
        self.layers = torch.nn.Sequential(layerDict)
        
    def forward(self, x):
        out = self.layers(x)
        return out

In [11]:
## Asssume the dim of Traing and Testing are in shape [N,C,H,W]

class DE_MLP():
    def __init__(self, outdim=1,maxdepth=70,mindepth=5,minneuron=4,maxneuron=10,bsize=10,epoch=100,initSize=20,maxiter=10,stopcount=3,\
                 trainingset=None,validationset=None,trainingTarget=None,validateTarget=None,crossover=1):
        self.best=[]
        self.mean=[]
        self.outdim=outdim
        self.maxdepth=maxdepth
        self.mindepth=mindepth
        self.minneuron = minneuron
        self.maxneuron = maxneuron
        self.bsize = bsize
        self.epoch = epoch
        self.stopcount = stopcount
        self.pplSize = initSize
        self.maxiter = maxiter
        self.training = trainingset.reshape((trainingset.shape[0],-1))
        self.validationSet = validationset.reshape((validationset.shape[0],-1))
        self.target=trainingTarget
        self.validationTarget = validateTarget
        self.MLPlayerlist = []
        self.depthlist=np.random.choice(range(self.mindepth,self.maxdepth),self.pplSize,replace=True)
        self.crossover=crossover
        self.adap_conf = (0.1,0.1,0.9)
        self.tb = SummaryWriter('./Statistic/'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
        
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')

        # Generate initial population
        for i in range(self.pplSize):
            depth = self.depthlist[i]
            tmp = []
            # the number of neurons for the first layer is the dimension of the element in training data (in our case the size of the image)
            tmp.append(self.training.shape[1])
            for j in range(depth):
                # generate the number of neurons for each layer
                tmp.append(np.random.choice(range(self.minneuron,self.maxneuron),1,replace=False)[0])
            tmp.append(self.outdim) # last layer consist of 1 neuron by default
            tmp=np.array(tmp)
            self.MLPlayerlist.append(tmp)
    
    # define fit function - it calculates the fitness of one individual (one NN) 
    def fit(self,config,id_,p=None):
        dnn = DNN(config) # define DNN based on configurations (layers and neurons)
        dnn.layers.to(self.device)
        best = float('inf')
        stop=0
        opt = torch.optim.Adam(dnn.layers.parameters(), lr=0.001)
        loss = torch.nn.BCEWithLogitsLoss()
        batch = self.training.shape[0]//self.bsize
        vbatch = self.validationSet.shape[0]//self.bsize
        idxs = [x for x in range(self.training.shape[0])]
        vidxs = [x for x in range(self.validationSet.shape[0])]
        for e in range(self.epoch):
            start=time.time()
            # training
            np.random.shuffle(idxs)
            dnn.layers.train()
            batchloss=0
            for i in range(batch):
                idx=idxs[i*self.bsize:i*self.bsize+self.bsize]
                opt.zero_grad()
                data = torch.tensor(self.training[idx]).float().to(self.device)
                y = torch.tensor(self.target[idx]).float().to(self.device)
                yhat = dnn(data)
                l = loss(yhat,y)
                batchloss+=l.item()
                l.backward()
                opt.step()
            # validating
            dnn.layers.eval()
            np.random.shuffle(vidxs)
            vloss=0
            for i in range(vbatch):
                vidx=vidxs[i*self.bsize:i*self.bsize+self.bsize]
                vdata = torch.tensor(self.validationSet[vidx]).float().to(self.device)
                vy = torch.tensor(self.validationTarget[vidx]).float().to(self.device)
                vyhat = dnn(vdata)
                vl = loss(vyhat,vy)
                vloss += vl.item()
            vloss=vloss/vbatch
            # updating best loss
            if(vloss<best): best=vloss
            # updating stopping condition
            else: stop+=1
            end=time.time()
            if(p is not None):
                print(f'Process: {p:3d}, ConfigID: {id_:3d}, Epoch: {e:3d}, Training Loss: {(batchloss/batch):10.8f}, Validation Loss: {(vloss):10.8f},\
                    Best: {best:10.8f}, StopCount/Limit: {stop:3d}/{self.stopcount:3d}, Time:{(end-start):10.8f}')
            else:
                print(f'ConfigID: {id_:3d}, Epoch: {e:3d}, Training Loss: {(batchloss/batch):10.8f}, Validation Loss: {(vloss):10.8f},\
                    Best: {best:10.8f}, StopCount/Limit: {stop:3d}/{self.stopcount:3d}, Time:{(end-start):10.8f}')
            # stopping condition and stopping
            if(stop>=self.stopcount):
                return best,config,id_ 

    def self_adaptive_beta(self,beta):
        tau,beta1,betau = self.adap_conf
        r1,r2 = np.random.uniform(0,1,2)
        if(r2 < tau): beta = round(beta1 + r1 * betau,3) # else, keep the beta same
        return beta
    
    def mutation_rand_1_z(self,x1,xs,beta,debug=False):
        indim = x1[0]
        x1 = x1[1:-1] # remove in/out dim
        xs[0] = xs[0][1:-1]
        xs[1] = xs[1][1:-1]
        if(debug):
            print(f'M1 : x1 len {x1.shape[0]} xs0 len {xs[0].shape[0]} xs1 len {xs[1].shape[0]}')
            print(f'M1 : x1 {x1} \nM1 : xs0 {xs[0]} \nM1 : xs1 len {xs[1]}')
        #
        # A. Mutating the # of layers
        minlen = np.min([x1.shape[0],xs[0].shape[0],xs[1].shape[0]])
        if(debug): print(f'M1 : minlen {minlen}')          
        newminlen = minlen
        targetlen=int(np.floor((x1.shape[0]) + beta * (xs[0].shape[0] - xs[1].shape[0])))
        # check the sign of targetlen: if the new length == 0 , set it back to target len , if <0 , take abs
        if(targetlen==0): targetlen=x1.shape[0]
        elif(targetlen<0): targetlen=abs(targetlen)
        # check if new length is between mindepth and maxdepth
        if(targetlen < self.mindepth): targetlen = self.mindepth
        elif(targetlen > self.maxdepth): targetlen = self.maxdepth
        # new minimum length is min of minlen and targetlen
        if(targetlen < minlen): newminlen=targetlen
        if(debug): print(f'M1 : New Min Len :{newminlen}, Length Mutation :{targetlen}')
        #    
        # B. Mutating the # of neurons
        # As lengths of x1, xs[0], xs[1] and new length can possibly be different, 
        # 1) do the mutation for # of neurons for new minlen, 
        # 2) apply the same rule to remaining if needed
        xa = x1[:newminlen] + beta * (xs[0][:newminlen] - xs[1][:newminlen]) 
        if(targetlen>minlen):
            xaa = np.zeros((targetlen-minlen))
            a,b,c=None,None,None
            for i in range(targetlen-newminlen): # if number of neurons missing in vector, generate random from range (min)
                if(x1.shape[0]<=newminlen+i): a=np.random.choice(range(self.minneuron,self.maxneuron),1,replace=False)[0]
                elif(x1.shape[0]>newminlen+i): a=x1[newminlen+i] 
                if(xs[0].shape[0]<=newminlen+i): b=np.random.choice(range(self.minneuron,self.maxneuron),1,replace=False)[0]
                elif(xs[0].shape[0]>newminlen+i): b=xs[0][newminlen+i]  
                if(xs[1].shape[0]<=newminlen+i): c=np.random.choice(range(self.minneuron,self.maxneuron),1,replace=False)[0]
                elif(xs[1].shape[0]>newminlen+i): c=xs[1][newminlen+i]      
                xaa[i]=a + beta * (b - c)
            xa = np.concatenate((xa, xaa), axis=None)
        # check if numbers of neurons are in allowed range  
        for i in range(xa.shape[0]):
            if(xa[i]>self.maxneuron): xa[i]=self.maxneuron
            elif(xa[i]<self.minneuron): xa[i]=self.minneuron
            xa[i] = np.floor(xa[i])
        xa = np.concatenate((np.array(indim,dtype=int),np.array(xa,dtype=int),np.array(self.outdim,dtype=int)), axis=None,dtype=int)
        return xa
    
    def crossoverMean(self,parent,u):
        order = [parent[1:-1],u[1:-1]]
        if(parent.shape[0] > u.shape[0]): order = [u[1:-1],parent[1:-1]]
        order[0] = np.resize(order[0],order[1].shape[0])
        middle = np.mean(order,axis=0,dtype=int)
        child=np.insert(middle,0,parent[0])
        child=np.append(child,parent[-1])
        return child.copy()
    
    def crossoverRandomSwap(self,parent,u):
        # the first one is with min len
        order = [parent[1:-1],u[1:-1]]
        child = [parent[0]]
        if(parent.shape[0] > u.shape[0]): order = [u[1:-1],parent[1:-1]]
        order[0] = np.resize(order[0],order[1].shape[0])
        swap = np.random.randint(0,2,order[0].shape[0])
        for i in range(len(swap)):
            if(swap[i]==0): child.append(order[0][i])
            else: child.append(order[1][i])
        child.append(parent[-1])
        return np.array(child).copy()
    
    def run(self,beta=0.5):
        current_gen=self.MLPlayerlist
        scores = np.zeros((self.pplSize))
        #initial Run
        print('Initial Run Start')
        for i in range(len(current_gen)):
            b,_,_ = self.fit(current_gen[i],i)
            scores[i]=b
        print('Initial Run End')
        currentbest = np.min(scores)
        currentmean = np.mean(scores)
        currentbestidx = np.argmin(scores)
        print(f'Init Run Best: {currentbest}, Mean: {currentmean}, ID:{currentbestidx}, config: {current_gen[currentbestidx]}')
        #Generation Run
        for i in range(self.maxiter):
            structureStatistic=np.zeros((self.pplSize,5))
            updatecount=0
            start=time.time()
            print(f'Gen {i} Run Start')
            betas = np.ones(self.pplSize)*beta
            print(beta)
            for j in range(self.pplSize):
                parent = current_gen[j]
                idx0,idx1,idxt = np.random.choice(range(0,self.pplSize),3,replace=False)
                target = current_gen[idxt]
                diff = [current_gen[idx0],current_gen[idx1]]
                betas[j] = self.self_adaptive_beta(betas[j])
                unitvector = self.mutation_rand_1_z(target,diff,betas[j])
                if(self.crossover==1): nextGen = self.crossoverMean(parent,unitvector)
                else: nextGen = self.crossoverRandomSwap(parent,unitvector)
                print(f'Next Gen: {nextGen}')
                structureStatistic[j,0]= nextGen.shape[0]-2
                structureStatistic[j,1]= np.mean(nextGen[1:-1])
                structureStatistic[j,2]= np.median(nextGen[1:-1])
                structureStatistic[j,3]= np.quantile(nextGen[1:-1],0.25)
                structureStatistic[j,4]= np.quantile(nextGen[1:-1],0.75)
                s,_,_ = self.fit(nextGen,j)
                if(s<scores[j]):
                    updatecount+=1
                    scores[j]=s
                    current_gen[j]=nextGen
            print(f'Gen {i} Run End')
            end=time.time()
            currentbest = np.min(scores)
            currentmean = np.mean(scores)
            currentmedian = np.median(scores)
            currentq25 = np.quantile(scores,0.25)
            currentq75 = np.quantile(scores,0.75)
            currentbestidx = np.argmin(scores)
            genMeanLen = np.mean(structureStatistic[:,0])
            genMedianLen = np.median(structureStatistic[:,0])
            genq25Len = np.quantile(structureStatistic[:,0],0.25)
            genq75Len = np.quantile(structureStatistic[:,0],0.75)
            genMeanNode=np.median(structureStatistic[:,1])
            genMedianNode=np.median(structureStatistic[:,2])
            genq25Node = np.median(structureStatistic[:,3])
            genq75Node = np.median(structureStatistic[:,4])
            print(f'Run {i:3d} Best: {currentbest}, Mean: {currentmean}, ID:{currentbestidx}, config: {current_gen[currentbestidx]}, updatecount: {updatecount:3d}, Generation RunTime: {(end-start):10.8f}')
            self.tb.add_scalars("Scores Statistic (Generation)", {'best':currentbest,'mean':currentmean,'median':currentmedian,'q25':currentq25,'q75':currentq75}, i)
            self.tb.add_scalars("Structure Statistic (Generation) #HiddenLayer", {'mean':genMeanLen,'median':genMedianLen,'q25':genq25Len,'q75':genq75Len}, i)
            self.tb.add_scalars("Structure Statistic (Generation) #Node", {'mean':genMeanNode,'median':genMedianNode,'q25':genq25Node,'q75':genq75Node}, i)
            self.tb.add_scalar('Update Count',updatecount,i)
            self.tb.add_scalar('RunTime',(end-start),i)
        print(f'Run Completed : Best Score(loss): {np.min(scores)} , Config: {current_gen[np.argmin(scores)]}')    
        return 
                

In [12]:
trainingt = torch.nn.functional.one_hot(mnist_trainset.targets,num_classes=10)
validationt = torch.nn.functional.one_hot(mnist_testset.targets,num_classes=10)
# (self, outdim=1,maxdepth=70,mindepth=5,minneuron=4,maxneuron=10,bsize=10,epoch=100,initSize=20,maxiter=10,stopcount=3,
#   trainingset=None,validationset=None,trainingTarget=None,validateTarget=None)
d = DE_MLP(outdim=10,maxdepth=10,initSize=3,trainingset=mnist_trainset.data, validationset=mnist_testset.data, trainingTarget=trainingt,validateTarget=validationt)

In [13]:
d.run()

Initial Run Start


  data = torch.tensor(self.training[idx]).float().to(self.device)
  y = torch.tensor(self.target[idx]).float().to(self.device)
  vdata = torch.tensor(self.validationSet[vidx]).float().to(self.device)
  vy = torch.tensor(self.validationTarget[vidx]).float().to(self.device)


ConfigID:   0, Epoch:   0, Training Loss: 0.20284305, Validation Loss: 0.14536845,                    Best: 0.14536845, StopCount/Limit:   0/  3, Time:30.09706712
ConfigID:   0, Epoch:   1, Training Loss: 0.13462636, Validation Loss: 0.12746175,                    Best: 0.12746175, StopCount/Limit:   0/  3, Time:21.23149705
ConfigID:   0, Epoch:   2, Training Loss: 0.12511847, Validation Loss: 0.12712025,                    Best: 0.12712025, StopCount/Limit:   0/  3, Time:25.90751696
ConfigID:   0, Epoch:   3, Training Loss: 0.12084876, Validation Loss: 0.12104005,                    Best: 0.12104005, StopCount/Limit:   0/  3, Time:16.60395718
ConfigID:   0, Epoch:   4, Training Loss: 0.11875900, Validation Loss: 0.11824605,                    Best: 0.11824605, StopCount/Limit:   0/  3, Time:17.59861898
ConfigID:   0, Epoch:   5, Training Loss: 0.11681714, Validation Loss: 0.11515583,                    Best: 0.11515583, StopCount/Limit:   0/  3, Time:16.48181105
ConfigID:   0, Epoch: 