## Create the 2k and full factorial experiments

With the help of pyDOE create all the combinations needed and save them to csv format
so they are easy to read and launch from the testing notebook

In [None]:
import numpy as np
import pandas as pd
import csv
import pyDOE2 as doe


### Create the 2k factorial

The factors are:

- Number of CPUs (or machines, we need to talk about that)
- Batch size
- Number of jobs

In [None]:
# Declare the levels
cpu_levels = [1, 2, 4, 8]
batch_levels = [64, 128, 256, 512]
njobs_levels = [1, 3, 5]
network_levels = ['lenet5', 'simplenet']

# Declare the columns for the output file
columns=['cpu', 'batch', 'njobs', 'network']

In [None]:
# take the extremes for the 2k factorial design
cpu = {-1:cpu_levels[0], 1:cpu_levels[-1]}
batch = {-1:batch_levels[0], 1:batch_levels[-1]}
jobs = {-1:njobs_levels[0], 1:njobs_levels[-1]}
networks = {-1:network_levels[0], 1:network_levels[-1]}


design = doe.ff2n(4)
df = pd.DataFrame(design, columns=columns)

experiments = pd.DataFrame(columns=columns)
for idx, row in df.iterrows():
    print(idx)
    experiments.loc[idx] = (cpu[row.cpu], batch[row.batch], jobs[row.njobs], networks[row.network])

experiments.to_csv('./experiment_designs/2k_design.csv')

### Create the Full Factorial Design with all the levels and stuff

In [None]:
# Create the full factorial design
ff = doe.fullfact([len(cpu_levels), len(batch_levels), len(njobs_levels), len(network_levels)])
ff_df = pd.DataFrame(ff, columns=columns)

experiments = pd.DataFrame(columns=columns)
for idx, row in ff_df.iterrows():
    experiments.loc[idx] = (cpu_levels[int(row.cpu)], batch_levels[int(row.batch)], njobs_levels[int(row.njobs)], 
                            network_levels[int(row.network)])

# save to csv file
experiments.to_csv('experiment_designs/fullfact.csv')


## Create the final experiments to test Courier

We will have a dataframe with interarrival times for 10 jobs sampled from an exponential distrbution.
We couple that with a random sample of the levels and the number of cpus. We compare Courier with a Random batch choice.



In [None]:
np.random.seed(0)

NUM_TASKS = 5

inter_arrival = np.random.exponential(scale=20, size=NUM_TASKS)
cpus = np.random.choice([1,2,4,8], NUM_TASKS)
njobs = np.random.choice(range(1, 6), NUM_TASKS)
network = np.random.choice(network_levels, NUM_TASKS)
latency = np.random.normal(300, 100, size=NUM_TASKS).astype(int)

exp = {
    'inter_arrival':inter_arrival,
    'cpu':cpus,
    'njobs':njobs,
    'network':network,
    'latency':latency
}

final_exp = pd.DataFrame(exp)
final_exp.to_csv('./experiment_designs/courier_exp.csv')
final_exp