# Traditional Methods trained on every file, tested on one at a time (untuned)
MinMaxScaler is applied to these tests. All models are untuned. The model will be trained on file A, tested on files B,C,D.., then on B, tested on A,C,D.. etc

In [1]:
import sys
sys.path.append("../../")
from time import process_time
from os import listdir, chdir

def warn(*args, **kwargs):
  pass
import warnings
warnings.warn = warn

from modules.NetworkTraffic import NetworkTraffic
from sklearn import model_selection, metrics

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier

FilesToTest = list()
chdir("../../data")
for file in listdir():
  if file.endswith(".csv"):
    FilesToTest.append(file)

TestSize = [0.4]
ModelsToTest = [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC()]
OutputResults = dict()

In [2]:
def trainModel(model, x_train, y_train):
  #print(f"Testing {str(model)}", end=', ')
  start = process_time()

  ### Begin timing
  temp_clf = model
  temp_clf.fit(x_train, y_train)

  ### End timing

  stop = process_time()
  return [temp_clf, (stop-start)]


def testModel(model, x_test, y_test, runtime):
  y_pred = model.predict(x_test)
  # Results
  tempDict = {
    "Accuracy": metrics.accuracy_score(y_test, y_pred),
    "Balanced Accuracy": metrics.balanced_accuracy_score(y_test, y_pred),
    "F1 Micro": metrics.f1_score(y_test, y_pred, average='micro'),
    "Precision Micro": metrics.f1_score(y_test, y_pred, average='micro'),
    "Recall Micro": metrics.recall_score(y_test, y_pred, average='micro'),
    "Runtime": runtime,
  }

  return {str(model): tempDict}

In [3]:
FileObjects = dict()

for file in FilesToTest:
  FileObjects[file] = NetworkTraffic(file, testSize=0.4, doNorm=True, doNormAll=True, doTransform=True)

In [4]:
from copy import deepcopy

OutputResults.clear()

for index, file in enumerate(FilesToTest):
  print(file, end=', ')
  OutputResults[file] = dict()
  #currentFileData = NetworkTraffic(file, testSize=0.4, doNorm=True, doNormAll=True, doTransform=True)
  currentFileData = FileObjects[file]
  restOfFiles = deepcopy(FilesToTest)
  restOfFiles.pop(index)
  print('[', end='')
  for model in ModelsToTest:
    api, runtime = trainModel(model, currentFileData.data, currentFileData.target)
    print(str(model), end=', ')
    for file2 in restOfFiles:
      #print(file2, end=' ')
      OutputResults[file][file2] = dict()
      #testFileData = NetworkTraffic(file2, testSize=0.4, doNorm=True, doNormAll=True, doTransform=True)
      testFileData = FileObjects[file2]
      x_test, y_test = testFileData.data, testFileData.target
      results = testModel(api, x_test, y_test, runtime)
      OutputResults[file][file2] = results
  print(']')

b5000d100.csv, [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC(), ]
b5000d30.csv, [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC(), ]
b100d10.csv, [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC(), ]
b1000d10.csv, [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC(), ]
b1000d100.csv, [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC(), ]
b100d100.csv, [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC(), ]
b5000d10.csv, [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC(), ]
b1000d30.csv, [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(),

In [5]:
import json
with open("EveryFileTransfer_Untuned_AllTestResults.json", "w") as f:
  f.write(json.dumps(OutputResults, indent=2))

In [6]:
with open("EveryFileTransfer_Untuned_ModelResults.csv", "w") as f3:
  f3.write("Trained On,Tested On,Model,Accuracy,Runtime\n")
  for file in OutputResults:
    for file2 in OutputResults[file]:
      for model in OutputResults[file][file2]:
        f3.write(f"{file},{file2},{model},{OutputResults[file][file2][model]['Accuracy']},{OutputResults[file][file2][model]['Runtime']}\n")