## Simulate DaskClassifier based on Timing Data

In [1]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
import csv
import importlib
import pandas as pd
from time import time
from copy import copy
from model import Wide_ResNet

In [2]:
# import simulator
os.chdir('..')
classifier = importlib.import_module("exp-dask.classifier")
from classifier import DaskClassifierSimulator

In [3]:
# import
from dask.distributed import Client
# training client
def _prep():
    from distributed.protocol import torch

client = Client(processes=False, n_workers=16, threads_per_worker=1, memory_limit="32GB")
client.run(_prep)

#from model import Wide_ResNet
client.upload_file("./exp-dask/model.py")

client

0,1
Client  Scheduler: inproc://172.31.16.127/4804/1  Dashboard: http://172.31.16.127:8787/status,Cluster  Workers: 16  Cores: 16  Memory: 512.00 GB


In [4]:
# Gather and prepare data
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
train_set = torchvision.datasets.CIFAR10(root='./exp-dask/data', train=True, download=True, transform=transform_train)
test_set = torchvision.datasets.CIFAR10(root='./exp-dask/data', train=False, download=True, transform=transform_test)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [5]:
def write_stats(hist, directory, filename):
    with open('./exp-dask/{}/{}'.format(directory, filename), 'w', encoding='utf8', newline='') as output_file:
        fc = csv.DictWriter(output_file, fieldnames=hist[0].keys())
        fc.writeheader()
        fc.writerows(hist)

In [6]:
def train(model, train_set, test_set, stats_df, n_epochs=200, log_interval=1, stats_dir=''):
    """
    Train based on expiriment params
    """
    
    # stats to load into the simulator
    stats = list(stats_df.T.to_dict().values())
    print("[SETUP] Loaded {} epochs of stats".format(len(stats)))
    
    # preprocess data
    start = time()
    train_set = model.preprocess(train_set)
    test_set = model.preprocess(test_set)
    print("[SETUP] Pre-Processed in {} seconds".format(time() - start))
    
    history = []
    # run simulations over all sim data
    for epoch in range(min(n_epochs, len(stats))):
        
        start = time()
        
        # set stats
        stat = stats.pop(0)
        model.set_sim(stat)
        
        # run
        print("[Epoch {}]".format(epoch), end="")
        model.partial_fit(train_set)
        score = model.score(test_set)
        
        # store data
        datum = {"epoch": epoch, "score": score, **model.get_params(), **model.meta_}
        print(" Score: {} in {} seconds".format(score, time() - start))
        history.append(datum)
        
        if epoch % log_interval == 0:
            # write_stats(history, stats_dir, 'results-ep{}.csv'.format(epoch))
            pass

    return history

In [7]:
def run_sim(stats_df_loc, out_folder, timings, n_epochs=200, grads_per_worker=128, max_bs=99999999):
    """
    Sets up the simulator model, transforms stats (to account for changes in the base classifier and how
    it saves data), and calls the train function.
    """
    
    print("Running simulation....\n\n")
    start = time()
    
    # stats from an actual training run
    stats_df = pd.read_csv(stats_df_loc)
    
    # old versions of the DaskClassifier had different collumn names
    if 'lr_' in stats_df.columns:
        stats_df = stats_df.rename(columns={"lr_": "partial_fit__lr", "batch_size_": "partial_fit__batch_size"})
    
    # pull initial stats from the CSV
    init_lr = stats_df['partial_fit__lr'][0]
    init_bs = stats_df['partial_fit__batch_size'][0]
    momentum = stats_df['optimizer__momentum'][0]
    device = "cpu" if not torch.cuda.is_available() else "cuda:0"

    # args
    args = dict(
        module=Wide_ResNet,
        module__depth=16,
        module__widen_factor=4,
        module__dropout_rate=0.3,
        module__num_classes=len(classes),
        loss=torch.nn.CrossEntropyLoss,
        optimizer=torch.optim.SGD,
        optimizer__lr=init_lr,
        optimizer__momentum=momentum,
        optimizer__nesterov=True,
        optimizer__weight_decay=0.5e-3,
        batch_size=init_bs,
        max_epochs=200,
        device=device,
        grads_per_worker=grads_per_worker,
        client=client,
        lr=init_lr,
        max_batch_size=max_bs
    )
    
    # create model and set initial timings
    model = DaskClassifierSimulator(**args)
    model.set_times(timings['mult'], timings['score'], timings['deepcopy'], timings['grad128'])
    
    # train based on normal train function
    hist = train(model, 
             train_set, 
             test_set, 
             stats_df,
             n_epochs=n_epochs, 
             log_interval=20,
             stats_dir=out_folder
            )
    # write
    with open(out_folder, 'w', encoding='utf8', newline='') as output_file:
        fc = csv.DictWriter(output_file, fieldnames=hist[0].keys())
        fc.writeheader()
        fc.writerows(hist)
    
    print('Finished simulation in {} seconds'.format(time() - start))
    

In [None]:
# test exp
timings = { 'mult': True, 'score': 0.0, 'deepcopy': 0.0, 'grad128': 0.0 }
stats_path = './exp-dask/stats/increasing-bs-final.csv'
out_path = 'sim-results/test'
run_sim(stats_path, out_path, timings, n_epochs=1)

# Siming grads per worker
Simulate the bs=512 experiments with differen numbers of grads per worker

In [8]:
## Timings for new simulation
SCORE_TIME = 0.0 # 5.39407711148262
DEEPCOPY_TIME = 0.127488e-3  # seconds
GRAD_TIME_128 = 0.5 * 78.32e-3
timings = { 'mult': True, 'score': SCORE_TIME, 'deepcopy': DEEPCOPY_TIME, 'grad128': GRAD_TIME_128 }

In [9]:
stats_path = './exp-dask/stats/dec-lr-512bs-final.csv'
for grads_per_worker in [32, 64, 128, 256, 512]:
    out_path = 'exp-dask/sim-results/grads_per_worker/grads_per_worker_{}-final.csv'.format(grads_per_worker)
    run_sim(stats_path, out_path, timings, n_epochs=200, grads_per_worker=grads_per_worker)

Running simulation....


[SETUP] Loaded 200 epochs of stats
[SETUP] Pre-Processed in 19.061187744140625 seconds
[Epoch 0] Score: 0.489799976348877 in 13.980453729629517 seconds
[Epoch 1] Score: 0.6232999563217163 in 10.961429119110107 seconds
[Epoch 2] Score: 0.6603999733924866 in 10.977453470230103 seconds
[Epoch 3] Score: 0.7120999693870544 in 11.118677377700806 seconds
[Epoch 4] Score: 0.7465999722480774 in 11.09188437461853 seconds
[Epoch 5] Score: 0.7512999773025513 in 11.020839929580688 seconds
[Epoch 6] Score: 0.7569999694824219 in 10.94118857383728 seconds
[Epoch 7] Score: 0.7965999841690063 in 11.161171913146973 seconds
[Epoch 8] Score: 0.7978999614715576 in 11.282478094100952 seconds
[Epoch 9] Score: 0.8180999755859375 in 10.946798086166382 seconds
[Epoch 10] Score: 0.8219999670982361 in 10.951257944107056 seconds
[Epoch 11] Score: 0.8235999941825867 in 11.395576477050781 seconds
[Epoch 12] Score: 0.8337999582290649 in 11.041080236434937 seconds
[Epoch 13] Score: 0.8313999772



 Score: 0.905299961566925 in 12.330555200576782 seconds
[Epoch 95] Score: 0.9052000045776368 in 10.896064519882202 seconds
[Epoch 96]



 Score: 0.9052000045776368 in 12.31101393699646 seconds
[Epoch 97] Score: 0.9028999805450441 in 10.958357572555542 seconds
[Epoch 98] Score: 0.9045999646186828 in 10.958157300949097 seconds
[Epoch 99]



 Score: 0.9059000015258788 in 12.35818362236023 seconds
[Epoch 100] Score: 0.9034000039100648 in 11.077972173690796 seconds
[Epoch 101]



 Score: 0.9061999917030334 in 12.395473718643188 seconds
[Epoch 102] Score: 0.9066999554634094 in 10.973371982574463 seconds
[Epoch 103] Score: 0.9053999781608582 in 11.012974262237549 seconds
[Epoch 104]



 Score: 0.9042999744415284 in 12.379153966903687 seconds
[Epoch 105] Score: 0.9081999659538268 in 10.937299966812134 seconds
[Epoch 106]



 Score: 0.9077000021934508 in 12.352702379226685 seconds
[Epoch 107] Score: 0.9039999842643738 in 10.99967908859253 seconds
[Epoch 108] Score: 0.9030999541282654 in 10.991552591323853 seconds
[Epoch 109]



 Score: 0.9081999659538268 in 12.521826982498169 seconds
[Epoch 110] Score: 0.9027999639511108 in 10.9948148727417 seconds
[Epoch 111] Score: 0.9048999547958374 in 10.98786973953247 seconds
[Epoch 112]



 Score: 0.9065999984741212 in 12.502586126327515 seconds
[Epoch 113] Score: 0.9044999480247498 in 11.014880180358887 seconds
[Epoch 114]



 Score: 0.9016000032424928 in 12.789852619171143 seconds
[Epoch 115] Score: 0.901099979877472 in 11.078612089157104 seconds
[Epoch 116] Score: 0.904699981212616 in 10.97553563117981 seconds
[Epoch 117]



 Score: 0.9018999934196472 in 12.523133754730225 seconds
[Epoch 118] Score: 0.9063999652862548 in 10.961885929107666 seconds
[Epoch 119] Score: 0.9024999737739564 in 11.023693323135376 seconds
[Epoch 120]



 Score: 0.9045999646186828 in 12.653643131256104 seconds
[Epoch 121] Score: 0.9104999899864196 in 10.97364854812622 seconds
[Epoch 122]



 Score: 0.9047999978065492 in 12.61484956741333 seconds
[Epoch 123] Score: 0.9095999598503112 in 10.973676919937134 seconds
[Epoch 124] Score: 0.9077999591827391 in 10.97212553024292 seconds
[Epoch 125]



 Score: 0.9114999771118164 in 12.630521535873413 seconds
[Epoch 126] Score: 0.9092999696731568 in 11.047952890396118 seconds
[Epoch 127] Score: 0.9088000059127808 in 11.007213115692139 seconds
[Epoch 128]



 Score: 0.9088000059127808 in 12.714829683303833 seconds
[Epoch 129] Score: 0.9088000059127808 in 11.090700149536133 seconds
[Epoch 130] Score: 0.9072999954223632 in 11.03724479675293 seconds
[Epoch 131]



 Score: 0.910099983215332 in 12.637057542800903 seconds
[Epoch 132] Score: 0.9099999666213988 in 10.995158672332764 seconds
[Epoch 133] Score: 0.9073999524116516 in 11.019011974334717 seconds
[Epoch 134]



 Score: 0.9126999974250792 in 12.683862686157227 seconds
[Epoch 135] Score: 0.9103999733924866 in 10.972424268722534 seconds
[Epoch 136] Score: 0.9085999727249146 in 10.942176342010498 seconds
[Epoch 137]



 Score: 0.9103999733924866 in 12.763888835906982 seconds
[Epoch 138] Score: 0.9097999930381776 in 10.978268146514893 seconds
[Epoch 139] Score: 0.9088999629020692 in 10.988025426864624 seconds
[Epoch 140]



 Score: 0.9089999794960022 in 12.69076418876648 seconds
[Epoch 141] Score: 0.910099983215332 in 10.990357160568237 seconds
[Epoch 142] Score: 0.9107999801635742 in 11.002029657363892 seconds
[Epoch 143]



 Score: 0.9115999937057496 in 13.41951060295105 seconds
[Epoch 144] Score: 0.9121999740600586 in 11.023685216903687 seconds
[Epoch 145] Score: 0.9092999696731568 in 10.98939561843872 seconds
[Epoch 146]



 Score: 0.9103999733924866 in 12.859052181243896 seconds
[Epoch 147] Score: 0.910099983215332 in 10.96691632270813 seconds
[Epoch 148] Score: 0.9122999906539916 in 10.972348690032959 seconds
[Epoch 149]



 Score: 0.909500002861023 in 12.814122200012207 seconds
[Epoch 150] Score: 0.909500002861023 in 10.992818832397461 seconds
[Epoch 151] Score: 0.9085999727249146 in 11.100997924804688 seconds
[Epoch 152]



 Score: 0.9068999886512756 in 12.802420854568481 seconds
[Epoch 153] Score: 0.9056999683380128 in 10.955113410949707 seconds
[Epoch 154] Score: 0.9092999696731568 in 10.98620080947876 seconds
[Epoch 155]



 Score: 0.9095999598503112 in 13.004130363464355 seconds
[Epoch 156] Score: 0.911199986934662 in 11.089705467224121 seconds
[Epoch 157] Score: 0.9109999537467957 in 10.97256588935852 seconds
[Epoch 158]



 Score: 0.9084999561309814 in 12.93846583366394 seconds
[Epoch 159] Score: 0.9098999500274658 in 11.081862688064575 seconds
[Epoch 160] Score: 0.9091999530792236 in 10.997564792633057 seconds
[Epoch 161]



 Score: 0.9091999530792236 in 13.027574300765991 seconds
[Epoch 162] Score: 0.9093999862670898 in 11.06058955192566 seconds
[Epoch 163] Score: 0.90829998254776 in 11.001221895217896 seconds
[Epoch 164] Score: 0.9067999720573424 in 10.992085933685303 seconds
[Epoch 165]



 Score: 0.9075999855995178 in 13.017246007919312 seconds
[Epoch 166] Score: 0.9106999635696412 in 10.92345404624939 seconds
[Epoch 167] Score: 0.9101999998092652 in 10.970479488372803 seconds
[Epoch 168]



 Score: 0.9084999561309814 in 13.016010999679565 seconds
[Epoch 169] Score: 0.9075999855995178 in 10.973253965377808 seconds
[Epoch 170] Score: 0.9084999561309814 in 10.98985242843628 seconds
[Epoch 171] Score: 0.9079999923706056 in 10.96037769317627 seconds
[Epoch 172]



 Score: 0.910099983215332 in 13.22722601890564 seconds
[Epoch 173] Score: 0.9072999954223632 in 10.982634782791138 seconds
[Epoch 174] Score: 0.9102999567985536 in 11.001189708709717 seconds
[Epoch 175]



 Score: 0.9085999727249146 in 13.117083311080933 seconds
[Epoch 176] Score: 0.9095999598503112 in 10.969146728515625 seconds
[Epoch 177] Score: 0.9098999500274658 in 10.991974115371704 seconds
[Epoch 178]



 Score: 0.9089999794960022 in 13.065093755722046 seconds
[Epoch 179] Score: 0.911899983882904 in 11.026676416397095 seconds
[Epoch 180] Score: 0.9106999635696412 in 11.0737464427948 seconds
[Epoch 181] Score: 0.9107999801635742 in 11.084790229797363 seconds
[Epoch 182]



 Score: 0.911199986934662 in 13.100131273269653 seconds
[Epoch 183] Score: 0.9102999567985536 in 11.06227159500122 seconds
[Epoch 184] Score: 0.9080999493598938 in 11.083775043487549 seconds
[Epoch 185] Score: 0.9108999967575072 in 10.981946229934692 seconds
[Epoch 186]



 Score: 0.9093999862670898 in 13.233626127243042 seconds
[Epoch 187] Score: 0.9106000065803528 in 10.971113681793213 seconds
[Epoch 188] Score: 0.9110999703407288 in 10.991002798080444 seconds
[Epoch 189]



 Score: 0.911300003528595 in 13.301803588867188 seconds
[Epoch 190] Score: 0.9133999943733216 in 10.977530479431152 seconds
[Epoch 191] Score: 0.9126999974250792 in 11.074278354644775 seconds
[Epoch 192] Score: 0.9115999937057496 in 11.034584522247314 seconds
[Epoch 193]



 Score: 0.9107999801635742 in 13.369499683380127 seconds
[Epoch 194] Score: 0.9101999998092652 in 11.104374170303345 seconds
[Epoch 195] Score: 0.9081999659538268 in 11.003286123275757 seconds
[Epoch 196] Score: 0.9106999635696412 in 10.973462343215942 seconds
[Epoch 197]



 Score: 0.9104999899864196 in 13.319203853607178 seconds
[Epoch 198] Score: 0.9109999537467957 in 11.072362422943115 seconds
[Epoch 199] Score: 0.9107999801635742 in 11.104134798049927 seconds
Finished simulation in 2317.1177775859833 seconds
Running simulation....


[SETUP] Loaded 200 epochs of stats
[SETUP] Pre-Processed in 19.227740049362183 seconds
[Epoch 0] Score: 0.489799976348877 in 7.876420736312866 seconds
[Epoch 1] Score: 0.6232999563217163 in 7.625124454498291 seconds
[Epoch 2]



 Score: 0.6603999733924866 in 10.022790908813477 seconds
[Epoch 3] Score: 0.7120999693870544 in 7.685020446777344 seconds
[Epoch 4] Score: 0.7465999722480774 in 7.628223180770874 seconds
[Epoch 5] Score: 0.7512999773025513 in 7.647035837173462 seconds
[Epoch 6] Score: 0.7569999694824219 in 7.633426189422607 seconds
[Epoch 7] Score: 0.7965999841690063 in 7.710443735122681 seconds
[Epoch 8] Score: 0.7978999614715576 in 7.650815725326538 seconds
[Epoch 9] Score: 0.8180999755859375 in 7.754627704620361 seconds
[Epoch 10] Score: 0.8219999670982361 in 7.652550220489502 seconds
[Epoch 11] Score: 0.8235999941825867 in 7.634932041168213 seconds
[Epoch 12] Score: 0.8337999582290649 in 7.67616081237793 seconds
[Epoch 13] Score: 0.8313999772071838 in 7.654984951019287 seconds
[Epoch 14] Score: 0.8382999897003174 in 7.671922445297241 seconds
[Epoch 15]



 Score: 0.8403999805450441 in 10.078736782073975 seconds
[Epoch 16] Score: 0.8459999561309814 in 7.691287517547607 seconds
[Epoch 17] Score: 0.8436999917030334 in 7.66588830947876 seconds
[Epoch 18] Score: 0.8467999696731567 in 7.636392831802368 seconds
[Epoch 19] Score: 0.8513999581336975 in 7.673330783843994 seconds
[Epoch 20] Score: 0.8542999625205994 in 7.597484827041626 seconds
[Epoch 21] Score: 0.8543999791145325 in 7.64453649520874 seconds
[Epoch 22] Score: 0.8554999828338623 in 7.640585899353027 seconds
[Epoch 23] Score: 0.863599956035614 in 7.658650159835815 seconds
[Epoch 24] Score: 0.8666999936103821 in 7.614661693572998 seconds
[Epoch 25] Score: 0.8707999587059021 in 7.603116750717163 seconds
[Epoch 26] Score: 0.8693000078201294 in 7.630862474441528 seconds
[Epoch 27] Score: 0.8611999750137329 in 7.663468837738037 seconds
[Epoch 28]



 Score: 0.8693999648094177 in 10.130000829696655 seconds
[Epoch 29] Score: 0.8661999702453613 in 7.703763723373413 seconds
[Epoch 30] Score: 0.8639999628067017 in 7.717268228530884 seconds
[Epoch 31] Score: 0.8593999743461609 in 7.67634391784668 seconds
[Epoch 32] Score: 0.8725999593734741 in 7.6994616985321045 seconds
[Epoch 33] Score: 0.8689999580383301 in 7.628260135650635 seconds
[Epoch 34] Score: 0.8748999834060669 in 7.642248868942261 seconds
[Epoch 35] Score: 0.8736000061035156 in 7.663224935531616 seconds
[Epoch 36] Score: 0.8833000063896179 in 7.628540992736816 seconds
[Epoch 37] Score: 0.8728999495506287 in 7.656515836715698 seconds
[Epoch 38] Score: 0.8733999729156494 in 7.6348876953125 seconds
[Epoch 39] Score: 0.8812999725341797 in 7.634474992752075 seconds
[Epoch 40] Score: 0.8770999908447266 in 7.6316211223602295 seconds
[Epoch 41] Score: 0.8791999816894531 in 7.625760555267334 seconds
[Epoch 42]



 Score: 0.8736000061035156 in 10.237916707992554 seconds
[Epoch 43] Score: 0.8817999958992004 in 7.649866819381714 seconds
[Epoch 44] Score: 0.8812999725341797 in 7.647878408432007 seconds
[Epoch 45] Score: 0.8811999559402466 in 7.631601572036743 seconds
[Epoch 46] Score: 0.8750999569892883 in 7.655789852142334 seconds
[Epoch 47] Score: 0.8782999515533447 in 7.6479387283325195 seconds
[Epoch 48] Score: 0.8828999996185303 in 7.642312288284302 seconds
[Epoch 49] Score: 0.8728999495506287 in 7.6317079067230225 seconds
[Epoch 50] Score: 0.8775999546051025 in 7.625086307525635 seconds
[Epoch 51] Score: 0.8805999755859375 in 7.6573286056518555 seconds
[Epoch 52] Score: 0.8757999539375305 in 7.639531373977661 seconds
[Epoch 53] Score: 0.8779000043869019 in 7.6245810985565186 seconds
[Epoch 54] Score: 0.8847999572753906 in 7.667242765426636 seconds
[Epoch 55]



 Score: 0.8851000070571899 in 10.249308109283447 seconds
[Epoch 56] Score: 0.8819999694824219 in 7.6733479499816895 seconds
[Epoch 57] Score: 0.882099986076355 in 7.661723375320435 seconds
[Epoch 58] Score: 0.8784999847412109 in 7.652313232421875 seconds
[Epoch 59] Score: 0.8831999897956848 in 7.660414934158325 seconds
[Epoch 60] Score: 0.9025999903678894 in 7.6720263957977295 seconds
[Epoch 61] Score: 0.9042999744415284 in 7.65195369720459 seconds
[Epoch 62] Score: 0.9083999991416932 in 7.640193462371826 seconds
[Epoch 63] Score: 0.9065999984741212 in 7.699592590332031 seconds
[Epoch 64] Score: 0.9041999578475952 in 7.669189453125 seconds
[Epoch 65] Score: 0.9070999622344972 in 7.629862308502197 seconds
[Epoch 66] Score: 0.9077999591827391 in 7.670933246612549 seconds
[Epoch 67] Score: 0.9038999676704408 in 7.683232307434082 seconds
[Epoch 68] Score: 0.9078999757766724 in 7.678803205490112 seconds
[Epoch 69]



 Score: 0.90829998254776 in 10.335681915283203 seconds
[Epoch 70] Score: 0.9052000045776368 in 7.691906690597534 seconds
[Epoch 71] Score: 0.9041000008583068 in 7.692845582962036 seconds
[Epoch 72] Score: 0.9052000045776368 in 7.628974676132202 seconds
[Epoch 73] Score: 0.9065999984741212 in 7.64960503578186 seconds
[Epoch 74] Score: 0.9071999788284302 in 7.670116662979126 seconds
[Epoch 75] Score: 0.903599977493286 in 7.673335790634155 seconds
[Epoch 76] Score: 0.9041000008583068 in 7.698579788208008 seconds
[Epoch 77] Score: 0.9049999713897704 in 7.676590919494629 seconds
[Epoch 78] Score: 0.9048999547958374 in 7.669848918914795 seconds
[Epoch 79] Score: 0.9080999493598938 in 7.6497344970703125 seconds
[Epoch 80] Score: 0.9048999547958374 in 7.6478564739227295 seconds
[Epoch 81] Score: 0.9057999849319458 in 7.631267309188843 seconds
[Epoch 82] Score: 0.9047999978065492 in 7.629146337509155 seconds
[Epoch 83]



 Score: 0.901799976825714 in 10.408070087432861 seconds
[Epoch 84] Score: 0.9053999781608582 in 7.69789457321167 seconds
[Epoch 85] Score: 0.9061999917030334 in 7.652296304702759 seconds
[Epoch 86] Score: 0.9088000059127808 in 7.645434379577637 seconds
[Epoch 87] Score: 0.9049999713897704 in 7.636725664138794 seconds
[Epoch 88] Score: 0.9025999903678894 in 7.676135301589966 seconds
[Epoch 89] Score: 0.9055999517440796 in 7.650813817977905 seconds
[Epoch 90] Score: 0.9070000052452089 in 7.64389705657959 seconds
[Epoch 91] Score: 0.9059999585151672 in 7.663691520690918 seconds
[Epoch 92] Score: 0.9067999720573424 in 7.676034688949585 seconds
[Epoch 93] Score: 0.9062999486923218 in 7.624976396560669 seconds
[Epoch 94] Score: 0.905299961566925 in 7.6332173347473145 seconds
[Epoch 95] Score: 0.9052000045776368 in 7.6653220653533936 seconds
[Epoch 96] Score: 0.9052000045776368 in 7.6248719692230225 seconds
[Epoch 97] Score: 0.9028999805450441 in 7.684947490692139 seconds
[Epoch 98] Score: 0.

## Simulations with faster network bandwidth #2 (comment).

In [11]:
## Timings for new simulation
SCORE_TIME = 0.0 # 5.39407711148262
DEEPCOPY_TIME = 0.127488e-3  # seconds
GRAD_TIME_128 = 78.32e-3  # seconds
timings = { 'mult': True, 'score': SCORE_TIME, 'deepcopy': DEEPCOPY_TIME, 'grad128': GRAD_TIME_128 }

In [None]:
# exp 2.1 - dec-lr-P-machine-fast
stats_path = './exp-dask/stats/decreasing-lr/exp--final.csv'
out_path = 'sim-results/dec-lr-mult-machines-fast'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 2.2 - increasing batch p machines
stats_path = './exp-dask/stats/increasing-bs/exp-final.csv'
out_path = 'sim-results/inc-bs-mult-machines-fast'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 2.3 - hybrid 1 P machine
stats_path = './exp-dask/stats/hybrid/exp-final.csv'
out_path = 'sim-results/hybrid1-mult-machines-fast'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 2.4 - hybrid 2 P machine
stats_path = './exp-dask/stats/hybrid-2/exp-final.csv'
out_path = 'sim-results/hybrid2-mult-machines-fast'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 11 - large bs 1 P machine
stats_path = './exp-dask/stats/large-bs-0/exp-final.csv'
out_path = 'sim-results/large-bs1-mult-machines-fast'
run_sim(stats_path, out_path, timings, max_bs=5120)

In [None]:
# exp 12 - large bs 2 P machine
stats_path = './exp-dask/stats/large-bs-1/exp-final.csv'
out_path = 'sim-results/large-bs2-mult-machines-fast'
run_sim(stats_path, out_path, timings, max_bs=5120)







## Simulations for 1 machine with normal network bandwidth

In [11]:
## TIMINGS FOR NORMAL EXPERIMENTS
SCORE_TIME = 0.0 # 5.39407711148262
DEEPCOPY_TIME = 0.05855  # seconds
GRAD_TIME_128 = 0.07832  # seconds
timings = { 'mult': False, 'score': SCORE_TIME, 'deepcopy': DEEPCOPY_TIME, 'grad128': GRAD_TIME_128 }

In [None]:
# exp 1 - dec-lr-1-machine 
stats_path = './exp-dask/stats/decreasing-lr/exp--final.csv'
out_path = 'sim-results/dec-lr-1-machine'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 5 - large bs 1 1 machine
stats_path = './exp-dask/stats/large-bs-0/exp-final.csv'
out_path = 'sim-results/large-bs1-1-machine'
run_sim(stats_path, out_path, timings, max_bs=5120)

In [None]:
# exp 2 - inc-bs
stats_path = './exp-dask/stats/increasing-bs/exp-final.csv'
out_path = 'sim-results/inc-bs-1-machine'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 3 - hybrid 1 1 machine
stats_path = './exp-dask/stats/hybrid/exp-final.csv'
out_path = 'sim-results/hybrid1-1-machine'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 4 - hybrid 2 1 machine
stats_path = './exp-dask/stats/hybrid-2/exp-final.csv'
out_path = 'sim-results/hybrid2-1-machine'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 6 - large bs 2 1 machine
stats_path = './exp-dask/stats/large-bs-1/exp-final.csv'
out_path = 'sim-results/large-bs2-1-machine'
run_sim(stats_path, out_path, timings, max_bs=5120)

## Simulations for multiple machines with normal network bandwidth

In [16]:
# timings for multi machine set ups
SCORE_TIME = 0.0 # 5.39407711148262
DEEPCOPY_TIME = 0.0 # set internall based on N workers
GRAD_TIME_128 = 0.07832  # seconds
timings = { 'mult': True, 'score': SCORE_TIME, 'deepcopy': DEEPCOPY_TIME, 'grad128': GRAD_TIME_128 }

In [None]:
# exp 11 - large bs 1 P machine
stats_path = './exp-dask/stats/large-bs-0/exp-final.csv'
out_path = 'sim-results/large-bs1-mult-machines'
run_sim(stats_path, out_path, timings, max_bs=5120)

In [None]:
# exp 7 - dec-lr-P-machine 
stats_path = './exp-dask/stats/decreasing-lr/exp--final.csv'
out_path = 'sim-results/dec-lr-mult-machines'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 8 - increasing batch p machines
stats_path = './exp-dask/stats/increasing-bs/exp-final.csv'
out_path = 'sim-results/inc-bs-mult-machines'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 9 - hybrid 1 P machine
stats_path = './exp-dask/stats/hybrid/exp-final.csv'
out_path = 'sim-results/hybrid1-mult-machines'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 10 - hybrid 2 P machine
stats_path = './exp-dask/stats/hybrid-2/exp-final.csv'
out_path = 'sim-results/hybrid2-mult-machines'
run_sim(stats_path, out_path, timings)

In [None]:
# exp 12 - large bs 2 P machine
stats_path = './exp-dask/stats/large-bs-1/exp-final.csv'
out_path = 'sim-results/large-bs2-mult-machines'
run_sim(stats_path, out_path, timings, max_bs=5120)