# AutoML (askl) trained on every file individually, tested on the rest (4 cores, 30 seconds per file)
AutoML's data preprocessing is applied to these tests. The model will be trained on file A, tested on files B,C,D.., then on B, tested on A,C,D.. etc

In [1]:
import sys
sys.path.append("../../")
from time import process_time
from os import listdir, chdir, environ

def warn(*args, **kwargs):
  pass
import warnings
warnings.warn = warn

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    environ["PYTHONWARNINGS"] = "ignore"

from modules.NetworkTraffic import NT2
from sklearn import model_selection, metrics
from autosklearn.classification import AutoSklearnClassifier

FilesToTest = list()
chdir("../../data")
for file in listdir():
  if file.endswith(".csv"):
    FilesToTest.append(file)

TestSize = [0.4]
ModelsToTest = [AutoSklearnClassifier(time_left_for_this_task=30, memory_limit=4096, n_jobs=-1)]
OutputResults = dict()
#ModelResults = dict()

In [2]:
import pandas as pd
from copy import deepcopy
df_leader = None

def testMe():
  global df_leader
  df_leader = pd.DataFrame()
  OutputResults.clear()
  #ModelResults.clear()

  for index, file in enumerate(FilesToTest):
    print(file, end=', ')
    OutputResults[file] = dict()
    currentFileData = NT2(file, transform=True, drop=True)
    restOfFiles = deepcopy(FilesToTest)
    restOfFiles.pop(index)
    restOfTheFilesData = NT2(restOfFiles, transform=True, drop=True)

    for model in ModelsToTest:
      print(f"{file} : {str(model)}", end=", ")
      model.fit(currentFileData.data, currentFileData.target)
      y_pred = model.predict(restOfTheFilesData.data)
      score = metrics.accuracy_score(restOfTheFilesData.target, y_pred)
      OutputResults[file].update({str(model): {"Accuracy": score}})
      try:
        OutputResults[file][str(model)]["Final Ensemble"] = str(model.show_models())
      except:
        pass
      try:
        if df_leader.empty: df_leader = model.leaderboard()
        else: df_leader = pd.concat([df_leader, model.leaderboard()], ignore_index=False)
      except: pass
      try:
        OutputResults[file][str(model)]["Sprint"] = str(model.sprint_statistics())
      except KeyError:
        pass

In [3]:
import json
from os import path

def writeMe():
  with open("EveryFileTransfer_Untuned_AllTestResults.json", "a") as f:
    f.write(json.dumps(OutputResults, indent=2))

  df_leader.to_csv("Leaderboard.csv", mode='a')

  with open("EveryFileTransfer_Untuned_ModelResults.csv", "a") as f3:
    if path.exists("EveryFileTransfer_Untuned_ModelResults.csv"): f3.write("File Trained On,Model,Accuracy,Runtime\n")
    for file in OutputResults:
      for model in OutputResults[file]:
        f3.write(f"{file},{model},{OutputResults[file][model]['Accuracy']}\n")

In [4]:
REPEATS = 2
for _ in range(0, REPEATS):
  print(f"\n------{_}------")
  testMe()
  writeMe()


------0------
b5000d100.csv, b5000d100.csv : AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, time_left_for_this_task=30), b5000d30.csv, b5000d30.csv : AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30), b100d10.csv, b100d10.csv : AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30), b1000d10.csv, b1000d10.csv : AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30), b1000d100.csv, b1000d100.csv : AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30), b100d100.csv, b100d100.csv : AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limit=12,
                      time_left_for_this_task=30), b5000d10.csv, b5000d10.csv : AutoSklearnClassifier(memory_limit=4096, n_jobs=-1, per_run_time_limi