In [3]:
from dataclasses import dataclass
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 


## Read the final dataset

In [4]:
final_df = pd.read_pickle('./experiments/fullfact/complete_data.pkl')

##  Extract the train and test features

In [5]:
scaler = StandardScaler()

# Create the interaction column
final_df['processes'] = final_df['cpu'] * final_df['njobs']

# Get the data split into train and test
train_df = final_df[['processes', 'cpu', 'njobs', 'batch', 'network']]
train_df['network'] = train_df['network'].apply(lambda net: 1 if net == 'lenet5' else -1)

labels_time = final_df['time']
labels_acc = final_df['final_accuracy']

train_df = scaler.fit_transform(train_df)
train_df

# Fit just the Random Forest regressor with a GridSearch crossval to find the best hyperparams
x_train, x_test, y_train_acc, y_test_acc = train_test_split(train_df, labels_acc, test_size=0.2, random_state = 42)
x_train, x_test, y_train_time, y_test_time = train_test_split(train_df, labels_time, test_size=0.2, random_state = 42)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


## Train the random forests with the test and train sets

In [6]:
# Fit the regressor for time with the best params
r_t = RandomForestRegressor(n_estimators=2000, 
                            criterion='mae', 
                            max_features='auto', 
                            max_depth=6, 
                            random_state=42, n_jobs=6).fit(x_train, y_train_time)

# Fit the regressor for Accuracy with the best params
r_acc= RandomForestRegressor(n_estimators=150, 
                             criterion='mae', 
                             max_features='auto', 
                             max_depth=9, 
                             random_state=42, n_jobs=6).fit(x_train, y_train_acc)

## Define the courier classes

In [7]:
@dataclass
class Job:
    cpu: int
    njobs: int
    network: int


class Courier:
    """ Courier has 3 parameters to account for utilization, accuracy and response time,
    based on which, and their weights, it chooses the optimal batch size for the task

    labels are in format dict
    accuracy -> labels
    time -> labels
    utilization -> labels
    """

    def __init__(self, models: dict, scaler, alpha: float = 0.5, beta: float = 0.5, batches=[64, 128, 256, 512]):

        self.batches = np.array(batches).reshape(len(batches), 1)

        if alpha + beta != 1:
            raise ValueError('The hyperparameters need to add up to 1')

        # Configure the hyperparams and the training data
        self.alpha = alpha
        self.beta = beta

        self.scaler = scaler

        # Compute the predictors
        self.acc_model = models['accuracy']
        self.time_model = models['time']

    def optimize(self, job: Job, latency=None):
        acc, t = self._predict(job)

        b = self.batches

        if latency:
            # Just take the options that satisfy the requirements
            fit = t[t < latency]
            if len(fit) == 0:
                print('Not a single value fulfills the '
                      'time requirements, selecting minimum time')
                # Return the minimum time
                min_t_idx = np.where(t == t.min())[0][0]
                return self.batches[min_t_idx], (acc[min_t_idx],
                                                 t[min_t_idx])

            else:
                acc = acc[t < latency]
                b = self.batches[t < latency]
                t = t[t < latency]

                # Return the best accuracy under that latency
                idx = np.where(acc == acc.max())[0][0]
                return b[idx], (acc[idx], t[idx])

        # Return the batch that better fulfills the requirements and
        # also return a tuple with the predictions
        sc_a = acc / np.max(acc)
        sc_t = 1 - (t / np.max(t))

        sc = self.alpha * sc_a + self.beta * sc_t

        # Get the index of the max score
        max_sc_idx = np.where(sc == sc.max())[0][0]
        return b[max_sc_idx], (acc[max_sc_idx],
                               t[max_sc_idx])

    def _fit_model(self, labels):
        """Fit the model to particular labels"""
        reg = self.model
        reg.fit(self.X, labels)
        return reg

    def _preprocess_data(self, X):
        """Standardizes the data"""
        scaler = StandardScaler()
        return scaler.fit_transform(X)

    def _predict(self, job: Job):
        """Predicts the time, accuracy and util with different batches
        and returns the best one given the optims"""

        acc = []
        t = []

        # predict the performance of the job with different batches
        for b in self.batches:
            data_point = self.scaler.transform([[job.cpu * job.njobs, job.cpu, job.njobs, b, job.network]])
            _acc = self.acc_model.predict(data_point)
            _t = self.time_model.predict(data_point)
            print(f'Batch {b}, acc = {_acc} and t = {_t}')

            acc.append(_acc)
            t.append(_t)

        return np.array(acc), np.array(t)

## Create the courier with the models and the fitted scaler

In [8]:
models = {'accuracy': r_acc, 'time': r_t}

c = Courier(models, scaler)

In [9]:
experiments = pd.read_csv('./experiment_designs/courier_exp2.csv')
experiments

Unnamed: 0.1,Unnamed: 0,inter_arrival,cpu,njobs,network,latency
0,0,15.91749,4,2,simplenet,666
1,1,25.118615,8,5,simplenet,413
2,2,18.464463,8,4,simplenet,348
3,3,15.744023,4,1,simplenet,258
4,4,11.02097,1,4,lenet5,564
5,5,20.763186,2,1,simplenet,373
6,6,11.510384,2,3,lenet5,441
7,7,44.470488,2,4,simplenet,111
8,8,66.298244,2,1,simplenet,470
9,9,9.672042,1,2,simplenet,467


In [14]:
for idx, row in experiments.iterrows():
    j = Job(cpu= row.cpu, njobs = row.njobs, network= 1 if row.network == 'lenet5' else -1)
    best_batch, (acc, t) = c.optimize(j, latency=row.latency)
    print(j, best_batch, acc, t, row.latency)



Batch [64], acc = [0.948773] and t = [491.51971142]




Batch [128], acc = [0.93950367] and t = [262.86111188]




Batch [256], acc = [0.92640033] and t = [135.35441678]




Batch [512], acc = [0.91327533] and t = [77.43176466]
Job(cpu=4, njobs=2, network=-1) 64 0.9487729999999993 491.51971141630327 666




Batch [64], acc = [0.94939567] and t = [948.40955838]




Batch [128], acc = [0.938567] and t = [487.52589785]




Batch [256], acc = [0.92693733] and t = [234.91161999]




Batch [512], acc = [0.91433067] and t = [132.07748953]
Job(cpu=8, njobs=5, network=-1) 256 0.9269373333333344 234.91161999072884 413




Batch [64], acc = [0.949366] and t = [697.69231777]




Batch [128], acc = [0.93917267] and t = [368.79799073]




Batch [256], acc = [0.92734367] and t = [190.23914562]




Batch [512], acc = [0.91435667] and t = [102.64413852]
Job(cpu=8, njobs=4, network=-1) 256 0.9273436666666665 190.2391456172209 348




Batch [64], acc = [0.94781867] and t = [462.53476099]




Batch [128], acc = [0.93886967] and t = [240.56504316]




Batch [256], acc = [0.92701467] and t = [130.2199139]




Batch [512], acc = [0.91293433] and t = [72.42922182]
Job(cpu=4, njobs=1, network=-1) 128 0.938869666666668 240.5650431586837 258




Batch [64], acc = [0.96293033] and t = [532.95802037]




Batch [128], acc = [0.94699233] and t = [325.45537262]




Batch [256], acc = [0.92645567] and t = [225.40031269]




Batch [512], acc = [0.88876867] and t = [174.84621277]
Job(cpu=1, njobs=4, network=1) 64 0.9629303333333339 532.958020370519 564




Batch [64], acc = [0.94911767] and t = [456.04946752]




Batch [128], acc = [0.93816033] and t = [241.78534338]




Batch [256], acc = [0.926803] and t = [130.44412309]




Batch [512], acc = [0.912939] and t = [74.14611834]
Job(cpu=2, njobs=1, network=-1) 128 0.9381603333333337 241.78534338455273 373




Batch [64], acc = [0.95874367] and t = [546.4496712]




Batch [128], acc = [0.94807933] and t = [312.90964436]




Batch [256], acc = [0.926017] and t = [191.27093362]




Batch [512], acc = [0.88270833] and t = [123.81628796]
Job(cpu=2, njobs=3, network=1) 128 0.9480793333333336 312.9096443607596 441




Batch [64], acc = [0.94942167] and t = [514.89185945]




Batch [128], acc = [0.93892767] and t = [272.24502248]




Batch [256], acc = [0.92745767] and t = [143.64801889]




Batch [512], acc = [0.913337] and t = [81.22967146]
Job(cpu=2, njobs=4, network=-1) 512 0.9133369999999995 81.22967145921956 111




Batch [64], acc = [0.94911767] and t = [456.04946752]




Batch [128], acc = [0.93816033] and t = [241.78534338]




Batch [256], acc = [0.926803] and t = [130.44412309]




Batch [512], acc = [0.912939] and t = [74.14611834]
Job(cpu=2, njobs=1, network=-1) 64 0.9491176666666663 456.04946752380647 470




Batch [64], acc = [0.94925867] and t = [445.43145259]




Batch [128], acc = [0.93825567] and t = [246.76851279]




Batch [256], acc = [0.92628367] and t = [130.26957132]




Batch [512], acc = [0.91302133] and t = [74.40285912]
Job(cpu=1, njobs=2, network=-1) 64 0.9492586666666664 445.4314525930379 467
