# Traditional Methods trained on every file, tuned on the rest (untuned)
MinMaxScaler is applied to these tests. All models are untuned. The model will be trained on file A, tested on files B,C,D.., then on B, tested on A,C,D.. etc

In [1]:
import sys
sys.path.append("../../")
from time import process_time
from os import listdir, chdir

def warn(*args, **kwargs):
  pass
import warnings
warnings.warn = warn

from modules.NetworkTraffic import NetworkTraffic
from sklearn import model_selection, metrics

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier

FilesToTest = list()
chdir("../../data")
for file in listdir():
  if file.endswith(".csv"):
    FilesToTest.append(file)

TestSize = [0.4]
ModelsToTest = [RandomForestClassifier(), GradientBoostingClassifier(), DecisionTreeClassifier(), MLPClassifier(), LinearSVC()]
OutputResults = dict()
#ModelResults = dict()

In [2]:
def testModel(model, x_train, x_test, y_train, y_test):
  print(f"Testing {str(model)}", end=', ')
  start = process_time()

  ### Begin timing
  temp_clf = model
  temp_clf.fit(x_train, y_train)

  y_pred = temp_clf.predict(x_test)
  ### End timing

  stop = process_time()

  # Results
  tempDict = {
    "Accuracy": metrics.accuracy_score(y_test, y_pred),
    "Balanced Accuracy": metrics.balanced_accuracy_score(y_test, y_pred),
    "F1 Micro": metrics.f1_score(y_test, y_pred, average='micro'),
    "Precision Micro": metrics.f1_score(y_test, y_pred, average='micro'),
    "Recall Micro": metrics.recall_score(y_test, y_pred, average='micro'),
    "Runtime": stop-start,
  }
  return tempDict

In [3]:
def updateModelResults(size, model, results):
  def changeKeyValue():
    try:
      ModelResults[size][model][key] += results[key]
    except KeyError:
      ModelResults[size][model][key] = results[key]
  
  # For each metric, attempt to set or add to the value
  for key in results:
    try:
      changeKeyValue()
    except KeyError:
      ModelResults[size][model] = dict()
      changeKeyValue()

# Divide each metric by the total number of files tested
def findAveragesForModelResults(fileCount):
  for size in ModelResults:
    for model in ModelResults[size]:
      for metric in ModelResults[size][model]:
        ModelResults[size][model][metric] /= fileCount

In [4]:
from copy import deepcopy

OutputResults.clear()
#ModelResults.clear()

for size in TestSize:
  print(f"\nSearching with test size of {size*100}%...")
  OutputResults[size] = dict()
  #ModelResults[size] = dict()

  for index, file in enumerate(FilesToTest):
    print(file, end=', ')
    OutputResults[size][file] = dict()
    currentFileData = NetworkTraffic(file, testSize=size, doNorm=True, doNormAll=True, doTransform=True)
    restOfFiles = deepcopy(FilesToTest)
    restOfFiles.pop(index)
    restOfTheFilesData = NetworkTraffic(restOfFiles, testSize=size, doNorm=True, doNormAll=True, doTransform=True)
    x_train, y_train, x_test, y_test = currentFileData.data, currentFileData.target, restOfTheFilesData.data, restOfTheFilesData.target

    for model in ModelsToTest:
      #print(f"{file} : {str(model)}...")
      results = testModel(model, x_train, x_test, y_train, y_test)
      OutputResults[size][file].update({str(model): results})
      #updateModelResults(size, str(model), results)

#findAveragesForModelResults(len(FilesToTest))


Searching with test size of 40.0%...
b5000d100.csv, 
Testing RandomForestClassifier(), Testing GradientBoostingClassifier(), Testing DecisionTreeClassifier(), Testing MLPClassifier(), Testing LinearSVC(), b5000d30.csv, 
Testing RandomForestClassifier(), Testing GradientBoostingClassifier(), Testing DecisionTreeClassifier(), Testing MLPClassifier(), Testing LinearSVC(), b100d10.csv, 
Testing RandomForestClassifier(), Testing GradientBoostingClassifier(), Testing DecisionTreeClassifier(), Testing MLPClassifier(), Testing LinearSVC(), b1000d10.csv, 
Testing RandomForestClassifier(), Testing GradientBoostingClassifier(), Testing DecisionTreeClassifier(), Testing MLPClassifier(), Testing LinearSVC(), b1000d100.csv, 
Testing RandomForestClassifier(), Testing GradientBoostingClassifier(), Testing DecisionTreeClassifier(), Testing MLPClassifier(), Testing LinearSVC(), b100d100.csv, 
Testing RandomForestClassifier(), Testing GradientBoostingClassifier(), Testing DecisionTreeClassifier(), Testi

In [5]:
import json
with open("EveryFileTransfer_Untuned_AllTestResults.json", "w") as f:
  f.write(json.dumps(OutputResults, indent=2))

In [6]:
with open("EveryFileTransfer_Untuned_ModelResults.csv", "w") as f3:
  f3.write("File Trained On,Model,Accuracy,Runtime\n")
  for size in OutputResults:
    for file in OutputResults[size]:
      for model in OutputResults[size][file]:
        f3.write(f"{file},{model},{OutputResults[size][file][model]['Accuracy']},{OutputResults[size][file][model]['Runtime']}\n")