Incredibly importand `resources` definition in order not to let Dask assign multiple tasks (i.e. learning Tensorflow simulations) on one worker because it will die due to low RAM. See docs [Resources](https://distributed.dask.org/en/stable/resources.html) and relevant *stackoverflow* question [one task per worker](https://stackoverflow.com/questions/45052535/dask-distributed-how-to-run-one-task-per-worker-making-that-task-running-on-a).

In [1]:
from distributed import LocalCluster
import dask

with dask.config.set({"distributed.worker.resources.PROCESS": 1}):
    cluster = LocalCluster(
        n_workers=4,
        threads_per_worker=2,
        memory_limit='4GB'
    )

In [2]:
cluster

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 14.90 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:55983,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 14.90 GiB

0,1
Comm: tcp://127.0.0.1:56036,Total threads: 2
Dashboard: http://127.0.0.1:56038/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:55986,
Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-wvqm0rdm,Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-wvqm0rdm

0,1
Comm: tcp://127.0.0.1:56033,Total threads: 2
Dashboard: http://127.0.0.1:56034/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:55987,
Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-m04ww1jx,Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-m04ww1jx

0,1
Comm: tcp://127.0.0.1:56042,Total threads: 2
Dashboard: http://127.0.0.1:56045/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:55988,
Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-t4fon4cz,Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-t4fon4cz

0,1
Comm: tcp://127.0.0.1:56037,Total threads: 2
Dashboard: http://127.0.0.1:56043/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:55989,
Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-tvf9dtn4,Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-tvf9dtn4


In [3]:
from dask.distributed import Client

client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 14.90 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:55983,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 14.90 GiB

0,1
Comm: tcp://127.0.0.1:56036,Total threads: 2
Dashboard: http://127.0.0.1:56038/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:55986,
Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-wvqm0rdm,Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-wvqm0rdm

0,1
Comm: tcp://127.0.0.1:56033,Total threads: 2
Dashboard: http://127.0.0.1:56034/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:55987,
Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-m04ww1jx,Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-m04ww1jx

0,1
Comm: tcp://127.0.0.1:56042,Total threads: 2
Dashboard: http://127.0.0.1:56045/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:55988,
Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-t4fon4cz,Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-t4fon4cz

0,1
Comm: tcp://127.0.0.1:56037,Total threads: 2
Dashboard: http://127.0.0.1:56043/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:55989,
Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-tvf9dtn4,Local directory: C:\Users\miket\AppData\Local\Temp\dask-worker-space\worker-tvf9dtn4


In [5]:
import tensorflow as tf
from dask import delayed

@delayed
def load_data():
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
    X_train, X_test = X_train / 255.0, X_test / 255.0

    return X_train, y_train, X_test, y_test

In [6]:
data_delayed = load_data()

In [7]:
client.upload_file('TF_Simulation_FDA_CNN.py')

{'tcp://127.0.0.1:56033': {'status': 'OK'},
 'tcp://127.0.0.1:56036': {'status': 'OK'},
 'tcp://127.0.0.1:56037': {'status': 'OK'},
 'tcp://127.0.0.1:56042': {'status': 'OK'}}

In [8]:
def worker_training_function(data_delayed, num_clients):
    import TF_Simulation_FDA_CNN as sim
    import gc

    X_train, y_train, X_test, y_test = data_delayed.compute()
    
    train_dataset, test_dataset = sim.convert_to_tf_dataset(X_train, y_train, X_test, y_test)
    
    del X_train, y_train, X_test, y_test
    
    all_epoch_metrics, all_round_metrics = sim.run_tests(
        train_dataset=train_dataset,
        test_dataset=test_dataset,
        num_clients_list=[num_clients],
        batch_size_list=[32],
        num_steps_until_rtc_check_list=[1],
        theta_list=[1.],
        num_epochs=1,
        sketch_width=500,
        sketch_depth=7
    )
    
    del train_dataset, test_dataset
    
    gc.collect()  # force garbage collection
    sim.tf.keras.backend.clear_session()  # Clear TensorFlow session
    
    return all_epoch_metrics, all_round_metrics

In [10]:
futures = []

In [11]:
for num_clients in [20, 5, 9, 15, 4, 3, 21, 12, 11, 16, 21]:
    future = client.submit(
        worker_training_function,
        data_delayed=data_delayed, 
        num_clients=num_clients,
        resources={'PROCESS': 1}  # Tell Dask that the resource `PROCESS` is consumed in one task!
    )

    futures.append(future)

In [12]:
results = client.gather(futures)

In [13]:
from itertools import chain

all_tests_epoch_metrics, all_tests_round_metrics = zip(*results)

all_epoch_metrics = chain.from_iterable(all_tests_epoch_metrics)  # flatten, careful, iterator
all_round_metrics = chain.from_iterable(all_tests_round_metrics)  # flatten, careful, iterator

In [14]:
import pandas as pd

epoch_metrics_df = pd.DataFrame(all_epoch_metrics)
round_metrics_df = pd.DataFrame(all_round_metrics)

In [15]:
epoch_metrics_df

Unnamed: 0,dataset_name,fda_name,num_clients,batch_size,num_steps_until_rtc_check,theta,nn_num_weights,sketch_width,sketch_depth,epoch,total_rounds,total_fda_steps,accuracy
0,EMNIST,naive,20,32,1,1.0,2592202,-1,-1,1,1,5,0.1009
1,EMNIST,linear,20,32,1,1.0,2592202,-1,-1,1,1,5,0.1135
2,EMNIST,sketch,20,32,1,1.0,2592202,500,7,1,1,5,0.1135
3,EMNIST,naive,5,32,1,1.0,2592202,-1,-1,1,1,5,0.1135
4,EMNIST,linear,5,32,1,1.0,2592202,-1,-1,1,1,5,0.101
5,EMNIST,sketch,5,32,1,1.0,2592202,500,7,1,1,5,0.0958
6,EMNIST,naive,9,32,1,1.0,2592202,-1,-1,1,1,5,0.0982
7,EMNIST,linear,9,32,1,1.0,2592202,-1,-1,1,1,5,0.0982
8,EMNIST,sketch,9,32,1,1.0,2592202,500,7,1,1,5,0.0982
9,EMNIST,naive,15,32,1,1.0,2592202,-1,-1,1,1,5,0.1135


In [16]:
round_metrics_df

Unnamed: 0,dataset_name,fda_name,num_clients,batch_size,num_steps_until_rtc_check,theta,nn_num_weights,sketch_width,sketch_depth,epoch,round,total_fda_steps,est_var,actual_var
0,EMNIST,naive,20,32,1,1.0,2592202,-1,-1,2,1,5,0.170307,0.161004
1,EMNIST,linear,20,32,1,1.0,2592202,-1,-1,2,1,5,0.179762,0.169987
2,EMNIST,sketch,20,32,1,1.0,2592202,500,7,2,1,5,0.168098,0.167949
3,EMNIST,naive,5,32,1,1.0,2592202,-1,-1,2,1,5,0.707836,0.558714
4,EMNIST,linear,5,32,1,1.0,2592202,-1,-1,2,1,5,0.711119,0.565493
5,EMNIST,sketch,5,32,1,1.0,2592202,500,7,2,1,5,0.600189,0.594663
6,EMNIST,naive,9,32,1,1.0,2592202,-1,-1,2,1,5,0.331107,0.292949
7,EMNIST,linear,9,32,1,1.0,2592202,-1,-1,2,1,5,0.357298,0.316725
8,EMNIST,sketch,9,32,1,1.0,2592202,500,7,2,1,5,0.306861,0.305906
9,EMNIST,naive,15,32,1,1.0,2592202,-1,-1,2,1,5,0.215757,0.200694


In [18]:
client.close()
cluster.close()

# TODO:
1. Fix loop
2. Comments
3. Test func in module TF_Sim*
4. TF_*.ipynb back to the modules
5. Go Dask and .py to simulation dir


THINK ABOUT APPROACH! TRY TO RUN MANY TESTS FROM ONE TASK INSTEAD OF MANY. 4G RAM HANDLED 20 CLIENTS JUST FINE!