# Learning rate/momentum schedules trained from scratch
The default to investigate will be resnet50 with CIFAR10

Look at a number of epochs just short of where we think it will converge, so we are aiming for a high accuracy, but not taking too long. (Aim for 94% accuracy.)

As a compromise between a fully flexible model for learning rates (which won't work with evolutionary algorithms), or using something inflexible like choosing a global learning rate, we choose two learning rates for each epoch, and linearly scale between these during the epoch.

Try:
* Finding optimal learning rate given common momemtum initialization. 
* Find optimal momentum given a sensible learning rate schedule.
* Try optimizing both at the same time.

Maybe:
* Look at how the learning rates evolve from a bad initialization to a sensible one (gif)
* look at how optimal learning rate schedule changes based on the momemtum used.


** Be careful to reinitialize the pytorch/fastai model each time, so we don't start fine tuning an existing model**

In [1]:
import os
import sys
import glob
import random
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm

from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

from torchvision import datasets, models, transforms

fast_ai_dir = '/media/rene/Data/fastai/'
sys.path.append(fast_ai_dir)

# ??????????????? this may be causing an error:
SEED = 101
np.random.seed(SEED)

%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Add the src directory for functions
src_dir = os.path.join(os.path.dirname(os.path.dirname(os.getcwd())), 'src')
print(src_dir)
sys.path.append(src_dir)

# import my functions:
from genetic import*

torch.cuda.set_device(1)
print(torch.cuda.is_available())
print(torch.cuda.current_device())

/media/rene/Data/learn-lr/src
True
1


In [2]:
PATH = "/media/rene/Data/data/cifar10/"

In [3]:
def conv_layer(ni, nf, ks=3, stride=1):
    return nn.Sequential(
        nn.Conv2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding=ks//2),
        nn.BatchNorm2d(nf, momentum=0.01),
        nn.LeakyReLU(negative_slope=0.1, inplace=True))

class ResLayer(nn.Module):
    def __init__(self, ni):
        super().__init__()
        self.conv1=conv_layer(ni, ni//2, ks=1)
        self.conv2=conv_layer(ni//2, ni, ks=3)
        
    def forward(self, x): 
        return x.add(self.conv2(self.conv1(x)))
#        return x.add_(self.conv2(self.conv1(x)))

class Darknet(nn.Module):
    def make_group_layer(self, ch_in, num_blocks, stride=1):
        return [conv_layer(ch_in, ch_in*2,stride=stride)
               ] + [(ResLayer(ch_in*2)) for i in range(num_blocks)]

    def __init__(self, num_blocks, num_classes, nf=32):
        super().__init__()
        layers = [conv_layer(3, nf, ks=3, stride=1)]
        for i,nb in enumerate(num_blocks):
            layers += self.make_group_layer(nf, nb, stride=2-(i==1))
            nf *= 2
        layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
        self.layers = nn.Sequential(*layers)
    
    def forward(self, x): return self.layers(x)

In [4]:
def phases_linear(lr_sch, mom_sch):
    training_phase_schedule = []
    for ind in range(0, len(lr_sch), 2):
        ind = int(ind)
        curr_sch = TrainingPhase(epochs=1, opt_fn=optim.SGD, lr=(lr_sch[ind], lr_sch[ind+1]), lr_decay=DecayType.LINEAR, 
                          momentum=(mom_sch[ind],mom_sch[ind+1]), momentum_decay=DecayType.LINEAR)
        
        training_phase_schedule.append(curr_sch)
    return training_phase_schedule

In [5]:
def get_darknet_perf(PATH, lr_sch_list, mom_sch_list, downsample, acc_dict={}, bs=512):
    num_workers = 4
    sz=32

    pop_perf = []

    for ind in range(len(lr_sch_list)):
        # if schedule already tested, return this acc
        if(tuple(lr_sch_list[ind]+mom_sch_list[ind]) in acc_dict):
            acc = acc_dict[tuple(lr_sch_list[ind]+mom_sch_list[ind])]
            pop_perf.append([acc, lr_sch_list[ind], mom_sch_list[ind]])
        else:
            stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))
            tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=32//8)
            data = ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)

            m = Darknet([1, 2, 4, 6, 3], num_classes=10, nf=32)
            data = ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)
            learn = ConvLearner.from_model_data(m, data)
            learn.crit = nn.CrossEntropyLoss()
            learn.metrics = [accuracy]

            learn.fit_opt_sched(phases_linear(lr_sch_list[ind], mom_sch_list[ind]))
#             learn.sched.plot_lr(show_text=False)
            preds, y = learn.predict_with_targs()
            acc = accuracy_np(preds, y)
            
            pop_perf.append([acc, lr_sch_list[ind], mom_sch_list[ind]])
            acc_dict[tuple(lr_sch_list[ind]+mom_sch_list[ind])] = acc
            
    return pop_perf, acc_dict

In [6]:
def run_genetic_darknet(PATH, out_loc, generations, epochs, init_lr_sch, init_mom_sch, downsample=1, evolve_lr=True, evolve_mom=False):
    """Run the genetic algorithm on schedules for number of generations. 
    Save best model at each generation and final best 5 models
    
    Take as input lr and mom schedules that are the correct length (=epochs)
    
    Downsampling is only used for the evolution part. All other parts use the full length of schedules.
    """
    bs=512

    lr_sch = init_lr_sch
    mom_sch = init_mom_sch

    # Store the top schedule and accuracy (tuples) as elements in a list.
    history=[]
    # same info, but easier format to search:
    acc_dict = {}

    # Evolve the generation.
    for i in range(generations):
        print('Running generation: ', i)
            
        pop_perf, acc_dict_tmp = get_darknet_perf(PATH, lr_sch, mom_sch, downsample, acc_dict, bs)
        pop_perf = [x for x in sorted(pop_perf, key=lambda x: x[0], reverse=True)]
        history.append(pop_perf)
        acc_dict.update(acc_dict_tmp)

        # save the intermediate result every generation
        out_file = os.path.join(out_loc, 'cifar_darknet_'+'on_gen_'+str(i))
        pickle.dump(history, open(out_file, 'wb'))

        # print average accuracy, best accuracy, and best schedule
        perf_only = [x[0] for x in pop_perf]
        avg = sum(perf_only)/len(perf_only)
        print('Avg acc: ', avg, 'best acc: ', pop_perf[0][0])
        print('LR Schedule: ',[ '%.5f' % elem for elem in pop_perf[0][1]])

        # Evolve
        lr_perf = [[x[0], x[1]] for x in pop_perf]
        mom_perf = [[x[0], x[2]] for x in pop_perf]
        if evolve_lr:
            # downsample it for evolution
            lr_perf = [[x[0], x[1][::downsample]] for x in lr_perf]
            lr_sch = evolve(lr_perf, breed_slice)
            # upsample it back to normal
            lr_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in lr_sch]
        if evolve_mom:
            # downsample it for evolution
            mom_perf = [[x[0], x[1][::downsample]] for x in mom_perf]
            mom_sch = evolve(mom_perf, breed_slice)
            # upsample it back to normal
            mom_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in mom_sch]

    # get final accuracy, and print the top 5 sorted
    pop_perf = get_darknet_perf(PATH, lr_sch, mom_sch, downsample=downsample)
    pop_perf = [x for x in sorted(pop_perf, key=lambda x: x[0], reverse=True)]

    # Print out the top 5 networks.=
    print('Final Results: ', pop_perf[:5])

    # save history as a pickle file
    out_file = os.path.join(out_loc, 'cifar_dark_'+str(generations)+'_numsch_'+str(epochs)+'_on_gen_'+str(i+29))
    pickle.dump(history, open(out_file, 'wb'))

In [7]:
# PATH = "/media/rene/Data/data/cifar10/"
# out_loc = '/media/rene/Data/data/learn-lr/output'
# num_schedules = 12
# epochs= 50 # actually 25, but requires 2 lrs for each epoch
# generations = 30

# init_lr_sch = [[.001]*epochs]*num_schedules
# init_mom_sch = [[.9]*epochs]*num_schedules

# run_genetic(PATH, out_loc, generations, num_schedules, epochs, init_lr_sch, init_mom_sch)

## try evolving with a downsample to make it converge faster
* Only 10 epochs

In [None]:
PATH = "/media/rene/Data/data/cifar10/"
out_loc = '/media/rene/Data/data/learn-lr/output'
num_schedules = 10
epochs = 10
generations = 30
downsample = 2

# Initialize schedules. Need 2 points for every epoch. 
size = int(2*epochs/downsample)
init_lr_sch = create_population(num_schedules, size=size, rate_range=(-3, -1.5))
init_mom_sch = [[.9]*size]*num_schedules

init_lr_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_lr_sch]
init_mom_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_mom_sch]

run_genetic(PATH, out_loc, generations, epochs, init_lr_sch, init_mom_sch,
            downsample=downsample, evolve_lr=True, evolve_mom=False)

Running generation:  0


HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

 20%|██        | 10/49 [00:10<00:39,  1.03s/it, loss=2.24]



epoch      trn_loss   val_loss   accuracy                 
    0      1.76272    2.336124   0.1       
    1      1.488335   2.464975   0.1                      
    2      1.302561   2.62875    0.1                      
    3      1.204306   2.850268   0.1                      
    4      1.154069   3.131063   0.1                      
    5      1.081753   3.50639    0.1335                   
    6      0.912411   3.344451   0.2092                    
    7      0.889544   1.522101   0.4901                    
    8      0.768719   0.836405   0.7167                    
    9      0.655722   0.774927   0.7384                    



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.750896   2.345319   0.1       
    1      1.502292   2.482707   0.1                      
    2      1.359872   2.665594   0.1                      
    3      1.289969   2.936185   0.1                      
    4      1.176245   3.557883   0.1                      
    5      0.966499   3.511549   0.1071                    
    6      0.853685   3.10309    0.154                     
    7      0.802646   2.362345   0.216                     
    8      0.764081   1.549014   0.4787                    
    9      0.680907   0.912557   0.6882                    



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.705489   2.359227   0.1       
    1      1.427817   2.542624   0.1                      
    2      1.262211   2.750753   0.1                      
    3      1.173734   3.003311   0.1                      
    4      1.107216   3.297558   0.1                      
    5      1.361537   2.523078   0.2047                   
    6      1.173647   2.689376   0.2295                   
    7      1.036351   2.608458   0.221                    
    8      0.946574   1.871012   0.3733                    
    9      0.839522   1.444472   0.5291                    



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.62957    2.353747   0.1       
    1      1.332967   2.470502   0.1                      
    2      1.115864   2.64283    0.1                      
    3      0.998165   2.863271   0.1                       
    4      1.023671   3.209939   0.1037                   
    5      0.873969   3.402843   0.1507                    
    6      0.811806   3.087544   0.1869                    
    7      0.740539   2.13206    0.3465                    
    8      0.635742   1.161597   0.6077                    
    9      0.648281   0.787671   0.7227                    



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      2.029406   2.307782   0.1       
    1      1.753898   2.349877   0.1                      
    2      1.528346   2.503102   0.1                      
 47%|████▋     | 23/49 [00:20<00:23,  1.11it/s, loss=1.43]

## More epochs, initialization as the 30th epoch as last

In [None]:
PATH = "/media/rene/Data/data/cifar10/"
out_loc = '/media/rene/Data/data/learn-lr/output'
num_schedules = 10
epochs = 10
generations = 100
downsample = 2

data = pickle.load( open( "/media/rene/Data/data/learn-lr/output/cifar_darknet_30_numsch_10on_gen_29", "rb" ) )
# get last lr initializaions
init_lr_sch = [x[1] for x in data[len(data)-1]]
init_mom_sch = [x[2] for x in data[len(data)-1]]

run_genetic(PATH, out_loc, generations, epochs, init_lr_sch, init_mom_sch,
            downsample=downsample, evolve_lr=True, evolve_mom=False)

Running generation:  0


HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

 14%|█▍        | 7/49 [00:09<00:59,  1.40s/it, loss=2.35]



epoch      trn_loss   val_loss   accuracy                 
    0      2.283515   2.305108   0.1       
    1      1.999742   2.336768   0.1                      
    2      1.718451   2.557775   0.1                      
    3      1.365937   2.782262   0.13                     
    4      1.157016   3.111579   0.1273                   
    5      1.808099   2601986.54 0.1                      
    6      1.766698   42.128527  0.1104                   
    7      1.645405   2.863282   0.2313                   
    8      1.454962   1.704333   0.3907                   
    9      1.33504    1.338116   0.5076                   



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      2.262697   2.30328    0.1       
    1      2.050677   2.317272   0.1                      
    2      2.12572    2.150761   0.1761                   
    3      1.887874   2.111128   0.2042                   
    4      1.735553   2.135879   0.2025                   
    5      2.351773   27546304.8768 0.1                   
    6      2.195941   480.970344 0.1065                   
    7      2.104715   13.629117  0.1169                   
    8      2.052984   2.875001   0.1747                   
    9      2.024229   2.157213   0.1893                   



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      2.247615   2.304168   0.1       
    1      2.145903   2.312491   0.1                      
    2      2.139246   236.702749 0.136                    
    3      1.92858    3.06865    0.1906                   
    4      1.802234   2.181834   0.1781                   
    5      1.728671   2.053153   0.2152                   
    6      1.691644   1.947203   0.2984                   
    7      1.61286    1.984522   0.3204                   
    8      1.540898   2.031003   0.3007                   
    9      1.435026   1.852354   0.3533                   



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      2.290313   2.304025   0.0997    
    1      1.931442   2.359478   0.1                      
    2      1.646011   2.629134   0.1276                   
    3      1.308254   2.863509   0.1097                   
    4      1.100997   3.140386   0.1292                   
    5      1.734123   156095.919525 0.1                   
    6      1.636671   259.029155 0.1597                   
    7      1.593059   2248.519328 0.1001                  
    8      1.431822   5.0846     0.1679                   
    9      1.333839   1.606923   0.4221                   



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      2.27029    2.304161   0.1       
    1      1.980644   2.3326     0.1                      
    2      1.919986   2.448439   0.1566                   
    3      1.637478   2.741674   0.1326                   
    4      1.462859   2.697503   0.2098                   
    5      463.16035  1.7570913218788446e+19 0.1          
    6      653.742768 1967362.622 0.1                    
    7      413.399526 89432.0833 0.1101                  
    8      221.86745  334.733305 0.1152                  
    9      95.882285  110.823814 0.113                    



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      2.252823   2.30379    0.1       
    1      1.926873   2.353877   0.1                      
    2      1.824195   2.632744   0.1047                   
    3      1.442593   2.798235   0.1335                   
    4      1.221251   3.019904   0.1248                   
    5      11.752094  43622084896.3584 0.1                
    6      8.108212   264505.05995 0.1                    
    7      4.714086   1057.362969 0.094                   
    8      3.236821   21.51793   0.1007                   
    9      2.668886   3.282271   0.1214                   

Avg acc:  0.25525 best acc:  0.5076
LR Schedule:  ['0.00014', '0.00014', '0.00160', '0.00160', '0.03087', '0.03087', '0.00913', '0.00913', '0.00617', '0.00617', '0.07097', '0.07097', '0.01987', '0.01987', '0.12157', '0.12157', '0.02205', '0.02205', '0.00362', '0.00362']
Running generation:  1


HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.795454   2.325986   0.1       
    1      1.555438   2.41376    0.0999                   
    2      1.452129   2.563471   0.1                      
    3      1.180025   2.731787   0.1                      
    4      1.009069   2.929607   0.1001                   
    5      1.848306   52156.878862 0.0892                 
    6      1.867432   388.992247 0.1412                   
    7      1.810211   7.021652   0.1566                   
    8      1.633434   2.246714   0.2513                   
    9      1.526381   1.641023   0.389                    



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      2.308778   2.303168   0.1       
    1      1.980542   2.322263   0.1                      
    2      1.705143   2.370481   0.1071                   
    3      1.336987   2.46793    0.1                      
    4      1.133111   2.560596   0.1182                   
    5      1.440676   80.417326  0.0993                   
    6      1.23687    1.369675   0.4924                   
    7      1.18844    5.434407   0.3557                   
    8      0.979861   1.30236    0.5806                    
    9      0.858317   0.884255   0.6887                    



HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      2.296268   2.303198   0.1       
    1      1.935737   2.349488   0.1                      
    2      1.686553   2.558487   0.1092                   
    3      1.364555   2.744092   0.1033                   
    4      1.162883   2.987279   0.1755                   
    5      1.760678   225444.742675 0.1                   
    6      1.678678   50.542768  0.1048                   
 94%|█████████▍| 46/49 [00:38<00:02,  1.18it/s, loss=1.56]

## From scratch - I don't know whats going on

In [None]:
# now try with bs of 512, instead of 2048. Seems large batch might be less stable?

PATH = "/media/rene/Data/data/cifar10"
out_loc = '/media/rene/Data/data/learn-lr/output/cifar_dark_10epoch_ds2_more_var'
num_schedules = 12
epochs = 10
generations = 100
downsample = 2

# Initialize schedules. Need 2 points for every epoch. 
size = int(2*epochs/downsample)
init_lr_sch = create_population(num_schedules, size=size, rate_range=(-3, -1))
init_mom_sch = [[.9]*size]*num_schedules

init_lr_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_lr_sch]
init_mom_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_mom_sch]

run_genetic_darknet(PATH, out_loc, generations, epochs, init_lr_sch, init_mom_sch,
            downsample=downsample, evolve_lr=True, evolve_mom=False)

Running generation:  0


HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

 22%|██▏       | 22/98 [00:09<00:34,  2.20it/s, loss=1.98]



epoch      trn_loss   val_loss   accuracy                 
    0      1.483868   2.485877   0.1       
    1      1.174265   2.991347   0.1                      
    2      0.919863   3.290612   0.1014                    
    3      0.82754    2.822511   0.2439                    
    4      0.913501   1.354136   0.6017                    
    5      0.884934   13.35806   0.2856                    
                                                           

# Only a Test:

In [None]:
PATH = "/media/rene/Data/data/cifar10/"
out_loc = '/media/rene/Data/data/learn-lr/output_test'# /cifar_dark_10epoch_ds2'
num_schedules = 4
epochs = 10
generations = 100
downsample = 2

size = int(2*epochs/downsample)
init_lr_sch = [[.003]*size]*num_schedules
init_mom_sch = [[.9]*size]*num_schedules

init_lr_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_lr_sch]
init_mom_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_mom_sch]

print(init_lr_sch)
print(init_mom_sch)

get_darknet_perf(PATH, init_lr_sch, init_mom_sch, downsample, acc_dict={}, bs=256)

In [11]:
PATH = "/media/rene/Data/data/cifar10/"
out_loc = '/media/rene/Data/data/learn-lr/output_test'# /cifar_dark_10epoch_ds2'
num_schedules = 4
epochs = 10
generations = 100
downsample = 2

size = int(2*epochs/downsample)
init_lr_sch = [[.01]*size]*num_schedules
init_mom_sch = [[.9]*size]*num_schedules

init_lr_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_lr_sch]
init_mom_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_mom_sch]

print(init_lr_sch)
print(init_mom_sch)

get_darknet_perf(PATH, init_lr_sch, init_mom_sch, downsample, acc_dict={}, bs=512)

[[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01], [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01], [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01], [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]]
[[0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9]]




learning rate:  [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]
momentum rate:  [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9]


HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.498015   2.536074   0.1       
    1      1.197061   3.03305    0.1                      
    2      0.991927   3.265555   0.1652                    
    3      0.848125   1.896418   0.373                     
    4      0.740859   0.813936   0.7063                    
    5      0.657433   0.738824   0.7399                    
 86%|████████▌ | 84/98 [00:32<00:05,  2.60it/s, loss=0.604]

KeyboardInterrupt: 

In [15]:
PATH = "/media/rene/Data/data/cifar10/"
out_loc = '/media/rene/Data/data/learn-lr/output_test'# /cifar_dark_10epoch_ds2'
num_schedules = 4
epochs = 10
generations = 100
downsample = 2

size = int(2*epochs/downsample)

init_lr_sch = create_population(num_schedules, size=size, rate_range=(-3, -1))
init_mom_sch = [[.9]*size]*num_schedules

init_lr_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_lr_sch]
init_mom_sch = [np.repeat(np.array(x), int(downsample)).tolist() for x in init_mom_sch]

print(init_lr_sch)
print(init_mom_sch)

get_darknet_perf(PATH, init_lr_sch, init_mom_sch, downsample, acc_dict={}, bs = 512)

[[0.0025140611366266216, 0.0025140611366266216, 0.026619074181636178, 0.026619074181636178, 0.04904760368374687, 0.04904760368374687, 0.005461262881954834, 0.005461262881954834, 0.0074810100472590185, 0.0074810100472590185, 0.0051664868523782145, 0.0051664868523782145, 0.0030163019417040566, 0.0030163019417040566, 0.020265451432025683, 0.020265451432025683, 0.006037795530203566, 0.006037795530203566, 0.020680738149151282, 0.020680738149151282], [0.005303306287370897, 0.005303306287370897, 0.005164211587438329, 0.005164211587438329, 0.002457554744482135, 0.002457554744482135, 0.09432471486941595, 0.09432471486941595, 0.00111329686162439, 0.00111329686162439, 0.01247845714104641, 0.01247845714104641, 0.013237799189890483, 0.013237799189890483, 0.0014109426947985428, 0.0014109426947985428, 0.060889445339874344, 0.060889445339874344, 0.0016643544559448587, 0.0016643544559448587], [0.017846247104587633, 0.017846247104587633, 0.0015911064527667493, 0.0015911064527667493, 0.02640617993584234,



learning rate:  [0.0025140611366266216, 0.0025140611366266216, 0.026619074181636178, 0.026619074181636178, 0.04904760368374687, 0.04904760368374687, 0.005461262881954834, 0.005461262881954834, 0.0074810100472590185, 0.0074810100472590185, 0.0051664868523782145, 0.0051664868523782145, 0.0030163019417040566, 0.0030163019417040566, 0.020265451432025683, 0.020265451432025683, 0.006037795530203566, 0.006037795530203566, 0.020680738149151282, 0.020680738149151282]
momentum rate:  [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9]


HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                 
    0      1.654705   2.392083   0.1       
    1      1.426524   2.80177    0.1332                   
    2      1.147152   1.808245   0.372                    
    3      0.853652   1.19573    0.588                     
    4      0.757423   0.994693   0.644                     
    5      0.690759   0.731386   0.7455                    
    6      0.640746   0.706621   0.749                     
 93%|█████████▎| 91/98 [00:34<00:02,  2.61it/s, loss=0.694]

KeyboardInterrupt: 