In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
from imblearn.under_sampling import RandomUnderSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.model_selection import cross_val_score

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier

In [3]:
root = "../../data/other/"
data_root = "../../data/other/"
models = [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), LinearSVC(), MLPClassifier()]

In [4]:
"""
Expanse >>  From TACC to SDSC (rtt: 58ms)
Chameleon >>  From TACC to UC (rtt: 32ms)
Labels 
Code <> Name
1    <> Normal
2    <> Duplicate
3    <> Reorder
4    <> Corrupt
5    <> Loss
6    <> Jitter
"""

bws = {
    "expanse": [1000],
    "chameleon": [1000],
}


rtts = {
    "expanse": [60],
    "chameleon": [35],
}

cc_algos = ["cubic", "htcp", "bbr"]
feature_file = root+"ss_7_features.csv"
labels = [1,2,3,4,5,6]

features = pd.read_csv(feature_file)
feature_list = list(features.feature.values)

In [5]:
def read_file(env, transform=True):
    global data
    
    for cc_algo in cc_algos:      
        folder = "{0}/sslog_ps/{1}/".format(env, cc_algo)

        for bw in bws[env]:
            for rtt in rtts[env]:
                combo = "b{0}d{1}".format(bw,rtt)
                df = pd.read_csv(data_root+"{0}/{1}.csv".format(folder, combo))
                df = df.sort_values(by=['label']).reset_index().drop(columns=["index"])
                df = df[df.label.isin(labels)] 
                df.fillna(df.groupby(['label'], as_index=False).mean(), inplace=True)
    
                if transform:
                    for index, row in features.iterrows():
#                         if "rtt" in row['normalizer']:
#                             df[row['feature']] = df[row['feature']] / rtt

#                         else:
                        df[row['feature']] = df[row['feature']] / df[row['normalizer']]

                df.replace([np.inf, -np.inf, np.nan], 0, inplace=True)
                if cc_algo not in data[env]:
                    data[env][cc_algo] = {}
                
                data[env][cc_algo][combo] = df

In [6]:
def feature_score(env, transform=True, size=0.8):
    clfs = []
    read_file(env, transform)
    for cc_algo in cc_algos:
        print("CC Algorithms: {0}".format(cc_algo))
        results = []
        score = {}
        for bw in bws[env]:
            for rtt in rtts[env]:
                combo = "b{0}d{1}".format(bw,rtt)
                df = data[env][cc_algo][combo].copy()
                df = df[df.report_sec == 10]
                y = df.label
                X = df[feature_list]#.drop(columns="label") # 
                X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=size)
                X_train, y_train = RandomUnderSampler(sampling_strategy="all").fit_resample(X_train, y_train)
#                 clf = make_pipeline(PowerTransformer(), RandomForestClassifier(n_estimators=100))
                for model in models:
                    score[str(model)] = {}
                    clf = model
                    clf.fit(X_train, y_train)
                    clfs.append(clf)
                    y_pred = clf.predict(X_test)
                    accuracy = np.round(metrics.accuracy_score(y_test, y_pred) * 100, 2)
                    f1_score = np.round(metrics.f1_score(y_test, y_pred, average='micro')* 100,2)
                    results.append(f1_score)
    #                 print(metrics.confusion_matrix(y_test, y_pred))
                    imp = np.round(clf.feature_importances_,2)
                    columns = X.columns

                    for i in range(len(columns)):
                        score[model][columns[i]] = [imp[i]]

        print("Sample Size: {0}, Average: {1}, Stdv: {2}".format(size, np.round(np.mean(results), 2),
                                                                 np.round(np.std(results), 2)))
        print("Importance Score:")
        for model in score:
            print('\t', model)
            for key in score[model]:
                print("\t\t{0}: {1}".format(key, np.round(np.mean(score[key]), 2)))
        print()
#     return clfs

In [11]:
from copy import deepcopy
output = {}
def tl_evaluate(env1, env2, model, t_limit=10, verbose=True, file=None):
    for cc_algo in cc_algos:
        print("CC Algorithms: {0}".format(cc_algo), end='\t')
        output[cc_algo] = {f"{env1} -> {env2}" : {}}
        for bw in bws[env1]:

            for rtt in rtts[env1]:
                train = "b{0}d{1}".format(bw,rtt)
                df = data[env1][cc_algo][train].copy()
                df = df[df.report_sec == 10]
                y = df.label
                X = df[feature_list]
                output[cc_algo][f"{env1} -> {env2}"] = {}
                clf = make_pipeline(PowerTransformer(), model)
                clf.fit(X, y)
                results = []
                total = []

                for bw1 in bws[env2]:
                    for rtt1 in rtts[env2]:
                        combo = "b{0}d{1}".format(bw1,rtt1)
                        if train == combo:
                            continue

                        if rtt1 != 0:
                            df = data[env2][cc_algo][combo].copy()
                            df = df[df.report_sec == t_limit]
                            y = df.label
                            X = df[feature_list]
                            y_pred = clf.predict(X)
                            accuracy = np.round(metrics.accuracy_score(y, y_pred)* 100,2)
                            f_score = np.round(metrics.f1_score(y, y_pred, average='micro')* 100,2)
                            precision = np.round(metrics.precision_score(y, y_pred, average='weighted')* 100,2)
                            recall = np.round(metrics.recall_score(y, y_pred, average='weighted')* 100,2) 

                            results.append(accuracy)
                            total.append(results[-1])
                            if file is not None:
                                msg = "{0},{1},{2},{3},{4},{5},{6}\n".format(env1,env2,train,combo,cc_algo,
                                                                                t_limit,f_score)
                                file.write(msg)
#                             print("{0},{1},{2},{3},{4},{5}".format(train, combo, accuracy, f_score, precision, recall))
            
                if verbose:
                    print(model, train, np.round(np.mean(results), 2), np.round(np.std(results), 2), results)
                output[cc_algo][f"{env1} -> {env2}"]["Scores"] = deepcopy(total)
                output[cc_algo][f"{env1} -> {env2}"]["Average"] = np.mean(total)
                output[cc_algo][f"{env1} -> {env2}"]["Std Dev"] = np.std(total)

    if verbose == False:
        for cc in output:
            for transfer in output[cc]:
                for model in output[cc][transfer]:
                    print('\t', model, end='')
                    print("\tAverage:{0}\n\tStd Dev: {1}\n".format(output[cc][transfer]["Average"], output[cc][transfer]["Std Dev"]))
    
    return output

# Before Transformation Applied

In [33]:
data = {
    "chameleon": {},
    "expanse": {}
}
read_file("chameleon", transform=False)
read_file("expanse", transform=False)

### Chameleon

In [None]:
testbed = "chameleon"
#feature_score(testbed, transform=False)

### Expanse

In [None]:
testbed = "expanse"
#feature_score(testbed, transform=False)

## Same Network

In [15]:
mySameNetworkNoTransformation = dict()
for env in bws:
    mySameNetworkNoTransformation[env] = dict()
    for cc_algo in cc_algos:
        mySameNetworkNoTransformation[env][cc_algo] = dict()
        print(env, cc_algo)
        for bw in bws[env]:
            for rtt in rtts[env]:
                train = "b{0}d{1}".format(bw,rtt)
                mySameNetworkNoTransformation[env][cc_algo][train] = dict()
                df = data[env][cc_algo][train].copy()
                df = df[df.report_sec == 10]
                y = df.label
                X = df[feature_list]
                for model in models:
                    print('\t', model)
                    clf = make_pipeline(PowerTransformer(), model)
                    cv_res = np.round(cross_val_score(clf, X, y, cv=10),2)
                    mySameNetworkNoTransformation[env][cc_algo][train][str(model)] = {"Accuracy": np.mean(cv_res), "Std Dev": np.std(cv_res)}
                            
                    print("\t\t", train, cv_res)
                    print("\t\t Average: {0}, Std dev: {1}".format(np.round(np.mean(cv_res), 2),
                                                                np.round(np.std(cv_res), 2)))
                      

expanse cubic
	 RandomForestClassifier()
		 b1000d60 [0.92 0.91 0.94 0.95 0.94 0.91 0.9  0.93 0.99 0.94]
		 Average: 0.93, Std dev: 0.02
	 GradientBoostingClassifier()
		 b1000d60 [0.93 0.92 0.94 0.96 0.94 0.93 0.89 0.94 0.99 0.93]
		 Average: 0.94, Std dev: 0.02
	 DecisionTreeClassifier()
		 b1000d60 [0.91 0.91 0.9  0.94 0.9  0.88 0.89 0.93 0.99 0.9 ]
		 Average: 0.92, Std dev: 0.03
	 LinearSVC()
		 b1000d60 [0.89 0.91 0.87 0.91 0.88 0.84 0.79 0.9  0.96 0.86]
		 Average: 0.88, Std dev: 0.04
	 MLPClassifier()
		 b1000d60 [0.91 0.9  0.94 0.93 0.93 0.9  0.88 0.94 0.98 0.92]
		 Average: 0.92, Std dev: 0.03
expanse htcp
	 RandomForestClassifier()
		 b1000d60 [0.96 0.95 0.95 0.94 0.95 0.96 0.97 0.98 0.97 0.98]
		 Average: 0.96, Std dev: 0.01
	 GradientBoostingClassifier()
		 b1000d60 [0.96 0.95 0.96 0.95 0.96 0.96 0.97 0.97 0.97 0.97]
		 Average: 0.96, Std dev: 0.01
	 DecisionTreeClassifier()
		 b1000d60 [0.92 0.94 0.93 0.92 0.93 0.95 0.95 0.95 0.96 0.96]
		 Average: 0.94, Std dev: 0.01
	 L

In [19]:
copyResults = deepcopy(mySameNetworkNoTransformation)
c2 = dict()
for env in copyResults:
  c2[env] = dict()
  for cc_algo in copyResults[env]:
    c2[env][cc_algo] = dict()
    for train in copyResults[env][cc_algo]:
      c2[env][cc_algo][train] = dict()
      for model in copyResults[env][cc_algo][train]:
        c2[env][cc_algo][train][str(model)] = copyResults[env][cc_algo][train][model]

In [20]:
import json
with open("Same Network No Transformation.json", "w") as f:
  f.write(json.dumps(mySameNetworkNoTransformation))

## Transfer Learning 

### Expanse to Chameleon

In [34]:
expanseToChameleon = dict()
for model in models:
    #print("Repeat: {0}".format(i+1))
    expanseToChameleon[str(model)] = deepcopy(tl_evaluate("expanse", "chameleon", model))

CC Algorithms: cubic	RandomForestClassifier() b1000d60 59.32 0.0 [59.32]
CC Algorithms: htcp	RandomForestClassifier() b1000d60 29.0 0.0 [29.0]
CC Algorithms: bbr	RandomForestClassifier() b1000d60 48.78 0.0 [48.78]
CC Algorithms: cubic	GradientBoostingClassifier() b1000d60 50.02 0.0 [50.02]
CC Algorithms: htcp	GradientBoostingClassifier() b1000d60 41.89 0.0 [41.89]
CC Algorithms: bbr	GradientBoostingClassifier() b1000d60 50.89 0.0 [50.89]
CC Algorithms: cubic	DecisionTreeClassifier() b1000d60 64.25 0.0 [64.25]
CC Algorithms: htcp	DecisionTreeClassifier() b1000d60 50.0 0.0 [50.0]
CC Algorithms: bbr	DecisionTreeClassifier() b1000d60 49.96 0.0 [49.96]
CC Algorithms: cubic	LinearSVC() b1000d60 65.36 0.0 [65.36]
CC Algorithms: htcp	LinearSVC() b1000d60 16.67 0.0 [16.67]
CC Algorithms: bbr	LinearSVC() b1000d60 16.67 0.0 [16.67]
CC Algorithms: cubic	MLPClassifier() b1000d60 64.62 0.0 [64.62]
CC Algorithms: htcp	MLPClassifier() b1000d60 16.67 0.0 [16.67]
CC Algorithms: bbr	MLPClassifier() b1000

In [35]:
import json
with open("Expanse to Chameleon No Transformation.json", "w") as f:
  f.write(json.dumps(expanseToChameleon))

### Chameleon to Expanse

In [36]:
chameleonToExpanse = dict()
for model in models:
    #print("Repeat: {0}".format(i+1))
    chameleonToExpanse[str(model)] = deepcopy(tl_evaluate("chameleon", "expanse", model))

CC Algorithms: cubic	RandomForestClassifier() b1000d35 59.52 0.0 [59.52]
CC Algorithms: htcp	RandomForestClassifier() b1000d35 65.37 0.0 [65.37]
CC Algorithms: bbr	RandomForestClassifier() b1000d35 71.89 0.0 [71.89]
CC Algorithms: cubic	GradientBoostingClassifier() b1000d35 57.0 0.0 [57.0]
CC Algorithms: htcp	GradientBoostingClassifier() b1000d35 74.04 0.0 [74.04]
CC Algorithms: bbr	GradientBoostingClassifier() b1000d35 79.7 0.0 [79.7]
CC Algorithms: cubic	DecisionTreeClassifier() b1000d35 70.67 0.0 [70.67]
CC Algorithms: htcp	DecisionTreeClassifier() b1000d35 67.52 0.0 [67.52]
CC Algorithms: bbr	DecisionTreeClassifier() b1000d35 79.15 0.0 [79.15]
CC Algorithms: cubic	LinearSVC() b1000d35 16.67 0.0 [16.67]
CC Algorithms: htcp	LinearSVC() b1000d35 16.67 0.0 [16.67]
CC Algorithms: bbr	LinearSVC() b1000d35 16.67 0.0 [16.67]
CC Algorithms: cubic	MLPClassifier() b1000d35 16.67 0.0 [16.67]
CC Algorithms: htcp	MLPClassifier() b1000d35 16.67 0.0 [16.67]
CC Algorithms: bbr	MLPClassifier() b1000

In [37]:
import json
with open("Chameleon to Expanse No Transformation.json", "w") as f:
  f.write(json.dumps(chameleonToExpanse))

# After Transformation Applied

In [38]:
data = {
    "chameleon": {},
    "expanse": {}
}

read_file("chameleon", transform=True)
read_file("expanse", transform=True)

### Chameleon

In [None]:
testbed = "chameleon"
#feature_score(testbed, transform=True)

### Expanse

In [None]:
testbed = "expanse"
#feature_score(testbed, transform=True)

### Same Network

In [16]:
mySameNetworkWithTransformation = dict()
for env in bws:
    mySameNetworkWithTransformation[env] = dict()
    for cc_algo in cc_algos:
        mySameNetworkWithTransformation[env][cc_algo] = dict()
        print(env, cc_algo)
        for bw in bws[env]:
            for rtt in rtts[env]:
                train = "b{0}d{1}".format(bw,rtt)
                mySameNetworkWithTransformation[env][cc_algo][train] = dict()
                df = data[env][cc_algo][train].copy()
                df = df[df.report_sec == 10]
                y = df.label
                X = df[feature_list]
                for model in models:
                    print('\t', model)
                    clf = make_pipeline(PowerTransformer(), model)
                    cv_res = np.round(cross_val_score(clf, X, y, cv=10),2)
                    mySameNetworkWithTransformation[env][cc_algo][train][str(model)] = np.mean(cv_res)
                            
                    print("\t\t", train, cv_res)
                    print("\t\t Average: {0}, Std dev: {1}".format(np.round(np.mean(cv_res), 2),
                                                                np.round(np.std(cv_res), 2)))
                      

expanse cubic
	 RandomForestClassifier()
		 b1000d60 [0.89 0.94 0.94 0.89 0.93 0.91 0.92 0.92 0.93 0.94]
		 Average: 0.92, Std dev: 0.02
	 GradientBoostingClassifier()
		 b1000d60 [0.91 0.94 0.94 0.89 0.91 0.9  0.91 0.93 0.94 0.94]
		 Average: 0.92, Std dev: 0.02
	 DecisionTreeClassifier()
		 b1000d60 [0.89 0.9  0.92 0.88 0.9  0.87 0.88 0.89 0.91 0.92]
		 Average: 0.9, Std dev: 0.02
	 LinearSVC()
		 b1000d60 [0.89 0.9  0.89 0.85 0.86 0.88 0.91 0.89 0.92 0.92]
		 Average: 0.89, Std dev: 0.02
	 MLPClassifier()
		 b1000d60 [0.9  0.93 0.91 0.87 0.89 0.89 0.89 0.91 0.92 0.92]
		 Average: 0.9, Std dev: 0.02
expanse htcp
	 RandomForestClassifier()
		 b1000d60 [0.92 0.93 0.94 0.92 0.94 0.96 0.94 0.97 0.97 0.96]
		 Average: 0.94, Std dev: 0.02
	 GradientBoostingClassifier()
		 b1000d60 [0.93 0.94 0.96 0.94 0.94 0.96 0.96 0.99 0.97 0.96]
		 Average: 0.96, Std dev: 0.02
	 DecisionTreeClassifier()
		 b1000d60 [0.93 0.91 0.94 0.93 0.93 0.96 0.95 0.96 0.95 0.96]
		 Average: 0.94, Std dev: 0.02
	 Lin

In [19]:
import json
with open("Same Network With Transformation.json", "w") as f:
  f.write(json.dumps(mySameNetworkWithTransformation))

### Chameleon to Expanse

In [39]:
chameleonToExpanse2 = dict()
for model in models:
    #print("Repeat: {0}".format(i+1))
    chameleonToExpanse2[str(model)] = deepcopy(tl_evaluate("chameleon", "expanse", model))

CC Algorithms: cubic	RandomForestClassifier() b1000d35 82.37 0.0 [82.37]
CC Algorithms: htcp	RandomForestClassifier() b1000d35 86.15 0.0 [86.15]
CC Algorithms: bbr	RandomForestClassifier() b1000d35 94.48 0.0 [94.48]
CC Algorithms: cubic	GradientBoostingClassifier() b1000d35 76.7 0.0 [76.7]
CC Algorithms: htcp	GradientBoostingClassifier() b1000d35 85.33 0.0 [85.33]
CC Algorithms: bbr	GradientBoostingClassifier() b1000d35 93.59 0.0 [93.59]
CC Algorithms: cubic	DecisionTreeClassifier() b1000d35 82.59 0.0 [82.59]
CC Algorithms: htcp	DecisionTreeClassifier() b1000d35 70.85 0.0 [70.85]
CC Algorithms: bbr	DecisionTreeClassifier() b1000d35 93.0 0.0 [93.0]
CC Algorithms: cubic	LinearSVC() b1000d35 82.22 0.0 [82.22]
CC Algorithms: htcp	LinearSVC() b1000d35 83.48 0.0 [83.48]
CC Algorithms: bbr	LinearSVC() b1000d35 92.0 0.0 [92.0]
CC Algorithms: cubic	MLPClassifier() b1000d35 82.63 0.0 [82.63]
CC Algorithms: htcp	MLPClassifier() b1000d35 69.44 0.0 [69.44]
CC Algorithms: bbr	MLPClassifier() b1000d3

In [40]:
import json
with open("Chameleon to Expanse With Transformation.json", "w") as f:
  f.write(json.dumps(chameleonToExpanse2))

### Expanse to Chameleon

In [41]:
expanseToChameleon2 = dict()
for model in models:
    #print("Repeat: {0}".format(i+1))
    expanseToChameleon2[str(model)] = deepcopy(tl_evaluate("expanse", "chameleon", model))

CC Algorithms: cubic	RandomForestClassifier() b1000d60 80.96 0.0 [80.96]
CC Algorithms: htcp	RandomForestClassifier() b1000d60 79.44 0.0 [79.44]
CC Algorithms: bbr	RandomForestClassifier() b1000d60 96.96 0.0 [96.96]
CC Algorithms: cubic	GradientBoostingClassifier() b1000d60 77.92 0.0 [77.92]
CC Algorithms: htcp	GradientBoostingClassifier() b1000d60 78.78 0.0 [78.78]
CC Algorithms: bbr	GradientBoostingClassifier() b1000d60 96.59 0.0 [96.59]
CC Algorithms: cubic	DecisionTreeClassifier() b1000d60 62.06 0.0 [62.06]
CC Algorithms: htcp	DecisionTreeClassifier() b1000d60 78.7 0.0 [78.7]
CC Algorithms: bbr	DecisionTreeClassifier() b1000d60 93.74 0.0 [93.74]
CC Algorithms: cubic	LinearSVC() b1000d60 81.4 0.0 [81.4]
CC Algorithms: htcp	LinearSVC() b1000d60 78.96 0.0 [78.96]
CC Algorithms: bbr	LinearSVC() b1000d60 96.59 0.0 [96.59]
CC Algorithms: cubic	MLPClassifier() b1000d60 83.92 0.0 [83.92]
CC Algorithms: htcp	MLPClassifier() b1000d60 82.22 0.0 [82.22]
CC Algorithms: bbr	MLPClassifier() b1000

In [42]:
import json
with open("Expanse to Chameleon With Transformation.json", "w") as f:
  f.write(json.dumps(expanseToChameleon2))