In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import pandas as pd
import numpy as np
from imblearn.under_sampling import RandomUnderSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.model_selection import cross_val_score

from autosklearn.classification import AutoSklearnClassifier
from autosklearn.experimental.askl2 import AutoSklearn2Classifier

In [12]:
root = "../../data/other/"
data_root = "../../data/other/"
models = [AutoSklearnClassifier(time_left_for_this_task=30, n_jobs=-1, memory_limit=4096), AutoSklearn2Classifier(time_left_for_this_task=30, n_jobs=-1, memory_limit=4096)]

In [5]:
"""
Expanse >>  From TACC to SDSC (rtt: 58ms)
Chameleon >>  From TACC to UC (rtt: 32ms)
Labels 
Code <> Name
1    <> Normal
2    <> Duplicate
3    <> Reorder
4    <> Corrupt
5    <> Loss
6    <> Jitter
"""

bws = {
    "expanse": [1000],
    "chameleon": [1000],
}


rtts = {
    "expanse": [60],
    "chameleon": [35],
}

cc_algos = ["cubic", "htcp", "bbr"]
feature_file = root+"ss_7_features.csv"
labels = [1,2,3,4,5,6]

features = pd.read_csv(feature_file)
feature_list = list(features.feature.values)

In [6]:
def read_file(env, transform=True):
    global data
    
    for cc_algo in cc_algos:      
        folder = "{0}/sslog_ps/{1}/".format(env, cc_algo)

        for bw in bws[env]:
            for rtt in rtts[env]:
                combo = "b{0}d{1}".format(bw,rtt)
                df = pd.read_csv(data_root+"{0}/{1}.csv".format(folder, combo))
                df = df.sort_values(by=['label']).reset_index().drop(columns=["index"])
                df = df[df.label.isin(labels)] 
                df.fillna(df.groupby(['label'], as_index=False).mean(), inplace=True)
    
                if transform:
                    for index, row in features.iterrows():
#                         if "rtt" in row['normalizer']:
#                             df[row['feature']] = df[row['feature']] / rtt

#                         else:
                        df[row['feature']] = df[row['feature']] / df[row['normalizer']]

                df.replace([np.inf, -np.inf, np.nan], 0, inplace=True)
                if cc_algo not in data[env]:
                    data[env][cc_algo] = {}
                
                data[env][cc_algo][combo] = df

In [7]:
def feature_score(env, transform=True, size=0.8):
    clfs = []
    read_file(env, transform)
    for cc_algo in cc_algos:
        print("CC Algorithms: {0}".format(cc_algo))
        results = []
        score = {}
        for bw in bws[env]:
            for rtt in rtts[env]:
                combo = "b{0}d{1}".format(bw,rtt)
                df = data[env][cc_algo][combo].copy()
                df = df[df.report_sec == 10]
                y = df.label
                X = df[feature_list]#.drop(columns="label") # 
                X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=size)
                X_train, y_train = RandomUnderSampler(sampling_strategy="all").fit_resample(X_train, y_train)
#                 clf = make_pipeline(PowerTransformer(), RandomForestClassifier(n_estimators=100))
                for model in models:
                    score[str(model)] = {}
                    clf = model
                    clf.fit(X_train, y_train)
                    clfs.append(clf)
                    y_pred = clf.predict(X_test)
                    accuracy = np.round(metrics.accuracy_score(y_test, y_pred) * 100, 2)
                    f1_score = np.round(metrics.f1_score(y_test, y_pred, average='micro')* 100,2)
                    results.append(f1_score)
    #                 print(metrics.confusion_matrix(y_test, y_pred))
                    imp = np.round(clf.feature_importances_,2)
                    columns = X.columns

                    for i in range(len(columns)):
                        score[model][columns[i]] = [imp[i]]

        print("Sample Size: {0}, Average: {1}, Stdv: {2}".format(size, np.round(np.mean(results), 2),
                                                                 np.round(np.std(results), 2)))
        print("Importance Score:")
        for model in score:
            print('\t', model)
            for key in score[model]:
                print("\t\t{0}: {1}".format(key, np.round(np.mean(score[key]), 2)))
        print()
#     return clfs

In [8]:
from copy import deepcopy
output = {}
def tl_evaluate(env1, env2, model, t_limit=10, verbose=True, file=None):
    for cc_algo in cc_algos:
        print("CC Algorithms: {0}".format(cc_algo), end='\t')
        output[cc_algo] = {f"{env1} -> {env2}" : {}}
        for bw in bws[env1]:

            for rtt in rtts[env1]:
                train = "b{0}d{1}".format(bw,rtt)
                df = data[env1][cc_algo][train].copy()
                df = df[df.report_sec == 10]
                y = df.label
                X = df[feature_list]
                output[cc_algo][f"{env1} -> {env2}"] = {}
                clf = make_pipeline(PowerTransformer(), model)
                clf.fit(X, y)
                results = []
                total = []

                for bw1 in bws[env2]:
                    for rtt1 in rtts[env2]:
                        combo = "b{0}d{1}".format(bw1,rtt1)
                        if train == combo:
                            continue

                        if rtt1 != 0:
                            df = data[env2][cc_algo][combo].copy()
                            df = df[df.report_sec == t_limit]
                            y = df.label
                            X = df[feature_list]
                            y_pred = clf.predict(X)
                            accuracy = np.round(metrics.accuracy_score(y, y_pred)* 100,2)
                            f_score = np.round(metrics.f1_score(y, y_pred, average='micro')* 100,2)
                            precision = np.round(metrics.precision_score(y, y_pred, average='weighted')* 100,2)
                            recall = np.round(metrics.recall_score(y, y_pred, average='weighted')* 100,2) 

                            results.append(accuracy)
                            total.append(results[-1])
                            if file is not None:
                                msg = "{0},{1},{2},{3},{4},{5},{6}\n".format(env1,env2,train,combo,cc_algo,
                                                                                t_limit,f_score)
                                file.write(msg)
#                             print("{0},{1},{2},{3},{4},{5}".format(train, combo, accuracy, f_score, precision, recall))
            
                if verbose:
                    print(model, train, np.round(np.mean(results), 2), np.round(np.std(results), 2), results)
                output[cc_algo][f"{env1} -> {env2}"]["Scores"] = deepcopy(total)
                output[cc_algo][f"{env1} -> {env2}"]["Average"] = np.mean(total)
                output[cc_algo][f"{env1} -> {env2}"]["Std Dev"] = np.std(total)
                try: output[cc_algo][f"{env1} -> {env2}"]["Final Ensemble"] = str(clf[1].show_models())
                except Exception as e: print(e)
                try: output[cc_algo][f"{env1} -> {env2}"]["Leaderboard"] = str(clf[1].leaderboard())
                except Exception as e: print(e)

    if verbose == False:
        for cc in output:
            for transfer in output[cc]:
                for model in output[cc][transfer]:
                    print('\t', model, end='')
                    print("\tAverage:{0}\n\tStd Dev: {1}\n".format(output[cc][transfer]["Average"], output[cc][transfer]["Std Dev"]))
    
    return output

# Before Transformation Applied

In [13]:
data = {
    "chameleon": {},
    "expanse": {}
}
read_file("chameleon", transform=False)
read_file("expanse", transform=False)

### Chameleon

In [None]:
testbed = "chameleon"
#feature_score(testbed, transform=False)

### Expanse

In [None]:
testbed = "expanse"
#feature_score(testbed, transform=False)

## Same Network

In [9]:
mySameNetworkNoTransformation = dict()
for env in bws:
    mySameNetworkNoTransformation[env] = dict()
    for cc_algo in cc_algos:
        mySameNetworkNoTransformation[env][cc_algo] = dict()
        print(env, cc_algo)
        for bw in bws[env]:
            for rtt in rtts[env]:
                train = "b{0}d{1}".format(bw,rtt)
                mySameNetworkNoTransformation[env][cc_algo][train] = dict()
                df = data[env][cc_algo][train].copy()
                df = df[df.report_sec == 10]
                y = df.label
                X = df[feature_list]
                for model in models:
                    print('\t', model)
                    clf = make_pipeline(PowerTransformer(), model)
                    cv_res = np.round(cross_val_score(clf, X, y, cv=3),2)
                    mySameNetworkNoTransformation[env][cc_algo][train][str(model)] = {"Accuracy": np.mean(cv_res), "Std Dev": np.std(cv_res)}
                            
                    print("\t\t", train, cv_res)
                    print("\t\t Average: {0}, Std dev: {1}".format(np.round(np.mean(cv_res), 2),
                                                                np.round(np.std(cv_res), 2)))
                      

expanse cubic
	 AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.97 0.97 0.98]
		 Average: 0.97, Std dev: 0.0
	 AutoSklearn2Classifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.97 0.97 0.98]
		 Average: 0.97, Std dev: 0.0
expanse htcp
	 AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.93 0.94 0.97]
		 Average: 0.95, Std dev: 0.02
	 AutoSklearn2Classifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.95 0.95 0.97]
		 Average: 0.96, Std dev: 0.01
expanse bbr
	 AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.98 0.99 0.99]
		 Average: 0.99, Std dev: 0.0
	 AutoSklearn2Classifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.99 0.99 0.99]
		 Average: 0.99, Std dev: 0.0
chameleon cubic
	 AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b10

In [10]:
import json
with open("Same Network No Transformation.json", "w") as f:
  f.write(json.dumps(mySameNetworkNoTransformation))

## Transfer Learning 

### Expanse to Chameleon

In [14]:
expanseToChameleon = dict()
for model in models:
    #print("Repeat: {0}".format(i+1))
    expanseToChameleon[str(model)] = deepcopy(tl_evaluate("expanse", "chameleon", model))

CC Algorithms: cubic	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=120,
                      time_left_for_this_task=300) b1000d60 58.1 0.0 [58.1]
CC Algorithms: htcp	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=120,
                      time_left_for_this_task=300) b1000d60 33.11 0.0 [33.11]
CC Algorithms: bbr	

Process ForkServerProcess-650:
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/nick/.local/lib/python3.8/site-packages/autosklearn/util/logging_.py", line 320, in start_log_server
    receiver.serve_until_stopped()
  File "/home/nick/.local/lib/python3.8/site-packages/autosklearn/util/logging_.py", line 350, in serve_until_stopped
    rd, wr, ex = select.select([self.socket.fileno()],
KeyboardInterrupt


KeyboardInterrupt: 

In [10]:
import json
with open("Expanse to Chameleon No Transformation.json", "w") as f:
  f.write(json.dumps(expanseToChameleon))

### Chameleon to Expanse

In [11]:
chameleonToExpanse = dict()
for model in models:
    #print("Repeat: {0}".format(i+1))
    chameleonToExpanse[str(model)] = deepcopy(tl_evaluate("chameleon", "expanse", model))

CC Algorithms: cubic	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d35 55.59 0.0 [55.59]
CC Algorithms: htcp	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d35 73.93 0.0 [73.93]
CC Algorithms: bbr	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d35 63.07 0.0 [63.07]
CC Algorithms: cubic	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
                       per_run_time_limit=12, time_left_for_this_task=30) b1000d35 54.67 0.0 [54.67]
CC Algorithms: htcp	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
                       per_run_time_limit=12, time_left_for_this_task=30) b1000d35 57.52 0.0 [57.52]
CC Algorithms: bbr	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
            

In [12]:
import json
with open("Chameleon to Expanse No Transformation.json", "w") as f:
  f.write(json.dumps(chameleonToExpanse))

# After Transformation Applied

In [9]:
data = {
    "chameleon": {},
    "expanse": {}
}

read_file("chameleon", transform=True)
read_file("expanse", transform=True)

### Chameleon

In [None]:
testbed = "chameleon"
#feature_score(testbed, transform=True)

### Expanse

In [None]:
testbed = "expanse"
#feature_score(testbed, transform=True)

### Same Network

In [10]:
mySameNetworkWithTransformation = dict()
for env in bws:
    mySameNetworkWithTransformation[env] = dict()
    for cc_algo in cc_algos:
        mySameNetworkWithTransformation[env][cc_algo] = dict()
        print(env, cc_algo)
        for bw in bws[env]:
            for rtt in rtts[env]:
                train = "b{0}d{1}".format(bw,rtt)
                mySameNetworkWithTransformation[env][cc_algo][train] = dict()
                df = data[env][cc_algo][train].copy()
                df = df[df.report_sec == 10]
                y = df.label
                X = df[feature_list]
                for model in models:
                    print('\t', model)
                    clf = make_pipeline(PowerTransformer(), model)
                    cv_res = np.round(cross_val_score(clf, X, y, cv=3),2)
                    mySameNetworkWithTransformation[env][cc_algo][train][str(model)] = np.mean(cv_res)
                            
                    print("\t\t", train, cv_res)
                    print("\t\t Average: {0}, Std dev: {1}".format(np.round(np.mean(cv_res), 2),
                                                                np.round(np.std(cv_res), 2)))
                      

expanse cubic
	 AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.91 0.88 0.92]
		 Average: 0.9, Std dev: 0.02
	 AutoSklearn2Classifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.92 0.88 0.94]
		 Average: 0.91, Std dev: 0.02
expanse htcp
	 AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.93 0.93 0.95]
		 Average: 0.94, Std dev: 0.01
	 AutoSklearn2Classifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.94 0.95 0.95]
		 Average: 0.95, Std dev: 0.0
expanse bbr
	 AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.98 0.97 0.97]
		 Average: 0.97, Std dev: 0.0
	 AutoSklearn2Classifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b1000d60 [0.98 0.98 0.98]
		 Average: 0.98, Std dev: 0.0
chameleon cubic
	 AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30)
		 b10

In [11]:
import json
with open("Same Network With Transformation.json", "w") as f:
  f.write(json.dumps(mySameNetworkWithTransformation))

### Chameleon to Expanse

In [14]:
chameleonToExpanse2 = dict()
for model in models:
    #print("Repeat: {0}".format(i+1))
    chameleonToExpanse2[str(model)] = deepcopy(tl_evaluate("chameleon", "expanse", model))

CC Algorithms: cubic	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d35 83.52 0.0 [83.52]
CC Algorithms: htcp	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d35 84.81 0.0 [84.81]
CC Algorithms: bbr	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d35 94.41 0.0 [94.41]
CC Algorithms: cubic	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
                       per_run_time_limit=12, time_left_for_this_task=30) b1000d35 80.11 0.0 [80.11]
CC Algorithms: htcp	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
                       per_run_time_limit=12, time_left_for_this_task=30) b1000d35 85.93 0.0 [85.93]
CC Algorithms: bbr	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
            

In [15]:
import json
with open("Chameleon to Expanse With Transformation.json", "w") as f:
  f.write(json.dumps(chameleonToExpanse2))

### Expanse to Chameleon

In [16]:
expanseToChameleon2 = dict()
for model in models:
    #print("Repeat: {0}".format(i+1))
    expanseToChameleon2[str(model)] = tl_evaluate("expanse", "chameleon", model)

CC Algorithms: cubic	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d60 85.33 0.0 [85.33]
CC Algorithms: htcp	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d60 67.37 0.0 [67.37]
CC Algorithms: bbr	AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30) b1000d60 93.52 0.0 [93.52]
CC Algorithms: cubic	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
                       per_run_time_limit=12, time_left_for_this_task=30) b1000d60 83.92 0.0 [83.92]
CC Algorithms: htcp	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
                       per_run_time_limit=12, time_left_for_this_task=30) b1000d60 70.22 0.0 [70.22]
CC Algorithms: bbr	AutoSklearn2Classifier(memory_limit=4096, metric=accuracy, n_jobs=-1,
            

In [17]:
import json
with open("Expanse to Chameleon With Transformation.json", "w") as f:
  f.write(json.dumps(expanseToChameleon2))