# Imports

In [1]:
import math
import matplotlib
import numpy as np
import pandas as pd
import pickle
import sklearn
import tensorflow as tf
import random
import shutil
import os
from enum import Enum
import imblearn
import time
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

# https://towardsdatascience.com/still-saving-your-data-in-csv-try-these-other-options-9abe8b83db3a

2023-01-05 17:16:28.398669: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-05 17:16:29.040137: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-01-05 17:16:29.040216: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


# Helper Functions

In [2]:
def makeCNNModel(evalMetrics, learningRate, inputSize):
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = inputSize))
  model.add(tf.keras.layers.AveragePooling2D((3, 3)))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(128, activation = 'relu'))
  model.add(tf.keras.layers.Dense(128, activation = 'relu'))
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer = tf.keras.optimizers.Adam(),
                loss = tf.keras.losses.BinaryCrossentropy(),
                metrics = evalMetrics)
  return model

def calculateMetrics(tp, fp, tn, fn):
  if tp == 0: #meaningless performance (always predict no viol or there is no viol in labels)
    return 0, 0, 0, 0, -1 #return precision, recall, accuracy, fscore, mcc
  precision = tp/(tp + fp)
  recall = tp/(tp + fn)
  accuracy = (tp + tn)/(tp + fn + tn + fp)
  fscore = (2 * precision * recall)/(precision + recall)
  sqrt = math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
  mcc = -1
  if sqrt != 0:
    mcc = ((tp * tn) - (fp * fn))/sqrt
  return precision, recall, accuracy, fscore, mcc

In [3]:
df = pd.read_csv('/data/CSV/bp/cts_bp_80_viol.csv', dtype=np.float32, header=None)


# Traning

# Generate Train and Test CSVs

allCircuits = ['/data/Pickle/'+x+'/' for x in os.listdir('/data/Pickle/')]
allCSVs = list()
for circuit in allCircuits:
  for file in os.listdir(circuit):
    if '.pkl' in file:
      continue
    allCSVs.append(circuit+file)

trainingCircuits = ['/data/Pickle/aes/', '/data/Pickle/jpeg/', '/data/Pickle/tinyRocket/',
  '/data/Pickle/bp/', '/data/Pickle/ibex/', '/data/Pickle/dynamic_node/',
  '/data/Pickle/bp_be/', '/data/Pickle/bp_fe/', '/data/Pickle/bp_multi/', '/data/Pickle/gcd/']
validationCircuits = ['/data/Pickle/swerv/']
testCircuits = ['/data/Pickle/swerv_wrapper/']

In [None]:
trainingCircuits = ['/data/Pickle/aes/', '/data/Pickle/jpeg/', '/data/Pickle/tinyRocket/',
  '/data/Pickle/bp/', '/data/Pickle/ibex/', '/data/Pickle/dynamic_node/',
  '/data/Pickle/bp_be/', '/data/Pickle/bp_fe/', '/data/Pickle/bp_multi/', '/data/Pickle/gcd/', '/data/Pickle/swerv/']

numEpochs = 100
inputSize = (22, 33, 33)
learningRate = 0.005
batchSize = 32
evalMetrics = [tf.keras.metrics.TruePositives(name='tp'),
               tf.keras.metrics.FalsePositives(name='fp'),
               tf.keras.metrics.TrueNegatives(name='tn'),
               tf.keras.metrics.FalseNegatives(name='fn'),
               tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.Precision(name='precision'),
               tf.keras.metrics.Recall(name='recall'),
               tf.keras.metrics.AUC(name='auc')]
model = makeCNNModel(evalMetrics, learningRate, inputSize)

trainingPickles = list()
for circuit in trainingCircuits:
  for pkl in os.listdir(circuit):
    if '.pkl' not in pkl:
      continue
    trainingPickles.append(circuit+pkl)
trainingPickles.sort()

historyDf = pd.DataFrame()
for epoch in range(numEpochs):
  random.shuffle(trainingPickles)
  train = trainingPickles
  for trainPickle in trainingPickles:
    trainDf = pd.read_pickle(trainPickle, compression='gzip')
    valDf = trainDf.sample(frac=0.2)
    trainDf = trainDf.drop(valDf.index)

    
    labels = trainDf.pop(trainDf.columns.values[-1])
    valLabels = valDf.pop(valDf.columns.values[-1])
    print('labels', sum(labels), 'valLabels', sum(valLabels))
    trainHyperImages = np.array(trainDf).reshape(len(trainDf),22,33,33)
    valHyperImages = np.array(valDf).reshape(len(valDf),22,33,33)
    train_history = model.fit(x=trainHyperImages,
                             y=labels,
                             batch_size=batchSize,
                             validation_data=(valHyperImages, valLabels))
    history = pd.DataFrame(train_history.history)
    historyDf = pd.concat([historyDf, history])
    historyDf.to_csv('model/history.csv', index=False)
  model.save('model/savedModel')
  model.save_weights('model/modelWeights/model.ckpt')