# AutoML trained on every file, tested on one at a time (30 seconds, 4 cores)
All models are untuned. The model will be trained on file A, tested on files B,C,D.., then on B, tested on A,C,D.. etc

In [1]:
import sys
sys.path.append("../../")
from time import process_time
from os import listdir, chdir

def warn(*args, **kwargs):
  pass
import warnings
warnings.warn = warn

from modules.NetworkTraffic import NetworkTraffic
from sklearn import model_selection, metrics

from autosklearn.experimental.askl2 import AutoSklearn2Classifier

FilesToTest = list()
chdir("../../data")
for file in listdir():
  if file.endswith(".csv"):
    FilesToTest.append(file)

TestSize = [0.4]
ModelsToTest = [AutoSklearn2Classifier(time_left_for_this_task=300, n_jobs=-1, memory_limit=4096)]
OutputResults = dict()

In [2]:
def trainModel(model, x_train, y_train):
  #print(f"Testing {str(model)}", end=', ')
  start = process_time()

  ### Begin timing
  temp_clf = model
  temp_clf.fit(x_train, y_train)

  ### End timing

  stop = process_time()
  return [temp_clf, (stop-start)]


def testModel(model, x_test, y_test, runtime):
  y_pred = model.predict(x_test)
  # Results
  tempDict = {
    "Accuracy": metrics.accuracy_score(y_test, y_pred),
    "Balanced Accuracy": metrics.balanced_accuracy_score(y_test, y_pred),
    "F1 Micro": metrics.f1_score(y_test, y_pred, average='micro'),
    "Precision Micro": metrics.f1_score(y_test, y_pred, average='micro'),
    "Recall Micro": metrics.recall_score(y_test, y_pred, average='micro'),
    "Runtime": runtime,
  }
  try:
    tempDict["Final Ensemble"] = model.show_models()
  except KeyError:
    tempDict["Final Ensemble"] = None
  try:
    tempDict["Leaderboard"] = str(model.leaderboard())
  except:
    pass
  return {str(model): tempDict}

In [3]:
FileObjects = dict()

for file in FilesToTest:
  FileObjects[file] = NetworkTraffic(file, testSize=0.4, doNorm=True, doNormAll=True)

In [4]:
from copy import deepcopy

OutputResults.clear()

for index, file in enumerate(FilesToTest):
  print(file, end=' ')
  OutputResults[file] = dict()
  #currentFileData = NetworkTraffic(file, testSize=0.4, doNorm=True, doNormAll=True)
  currentFileData = FileObjects[file]
  restOfFiles = deepcopy(FilesToTest)
  restOfFiles.pop(index)
  api, runtime = trainModel(ModelsToTest[0], currentFileData.data, currentFileData.target)
  print('[', end='')
  for file2 in restOfFiles:
    print(file2, end=' ')
    OutputResults[file][file2] = dict()
    #testFileData = NetworkTraffic(file2, testSize=0.4, doNorm=True, doNormAll=True)
    testFileData = FileObjects[file2]
    x_test, y_test = testFileData.data, testFileData.target
    #print(f"{file} : {str(model)}...")
    results = testModel(api, x_test, y_test, runtime)
    OutputResults[file][file2] = results
  print(']')

b5000d100.csv [b5000d30.csv b100d10.csv b1000d10.csv b1000d100.csv b100d100.csv b5000d10.csv b1000d30.csv b100d30.csv ]
b5000d30.csv [b5000d100.csv b100d10.csv b1000d10.csv b1000d100.csv b100d100.csv b5000d10.csv b1000d30.csv b100d30.csv ]
b100d10.csv [b5000d100.csv b5000d30.csv b1000d10.csv b1000d100.csv b100d100.csv b5000d10.csv b1000d30.csv b100d30.csv ]
b1000d10.csv [b5000d100.csv b5000d30.csv b100d10.csv b1000d100.csv b100d100.csv b5000d10.csv b1000d30.csv b100d30.csv ]
b1000d100.csv [b5000d100.csv b5000d30.csv b100d10.csv b1000d10.csv b100d100.csv b5000d10.csv b1000d30.csv b100d30.csv ]
b100d100.csv [b5000d100.csv b5000d30.csv b100d10.csv b1000d10.csv b1000d100.csv b5000d10.csv b1000d30.csv b100d30.csv ]
b5000d10.csv [b5000d100.csv b5000d30.csv b100d10.csv b1000d10.csv b1000d100.csv b100d100.csv b1000d30.csv b100d30.csv ]
b1000d30.csv [b5000d100.csv b5000d30.csv b100d10.csv b1000d10.csv b1000d100.csv b100d100.csv b5000d10.csv b100d30.csv ]
b100d30.csv [b5000d100.csv b5000d30.csv 

In [5]:
copyOfOutput = OutputResults
for size in OutputResults:
  for file in OutputResults[size]:
    for model in OutputResults[size][file]:
      for attribute in OutputResults[size][file][model]:
        if type(OutputResults[size][file][model][attribute]) not in [str, int, float]:
          copyOfOutput[size][file][model][attribute] = str(OutputResults[size][file][model][attribute])

In [6]:
import json
with open("EveryFileTransfer_Untuned_AllTestResults.json", "w") as f:
  f.write(json.dumps(OutputResults, indent=2))

In [7]:
with open("EveryFileTransfer_Untuned_ModelResults.csv", "w") as f3:
  f3.write("Trained On,Tested On,Model,Accuracy,Runtime\n")
  for file in OutputResults:
    for file2 in OutputResults[file]:
      for model in OutputResults[file][file2]:
        f3.write(f"{file},{file2},{model.replace(',', '')},{OutputResults[file][file2][model]['Accuracy']},{OutputResults[file][file2][model]['Runtime']}\n")