# Imports

In [None]:
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import sklearn
import tensorflow as tf
import random
import shutil
import os
import time

# Learning Model

In [None]:
def makeCNNModel(evalMetrics, learningRate, inputSize):
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(128, (3, 3), activation = 'relu', input_shape = inputSize))
  model.add(tf.keras.layers.MaxPooling2D((3, 3)))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer = tf.keras.optimizers.Adam(),
#               optimizer = tf.keras.optimizers.experimental.SGD(),
                loss = tf.keras.losses.BinaryCrossentropy(),
                metrics = evalMetrics)
  return model

# Load Data

In [None]:
allCircuits = ['/data/CSV/'+x+'/' for x in os.listdir('/data/CSV/')]
allPkls = []
for circuit in allCircuits:
  for pkl in os.listdir(circuit):
    if '.pkl' not in pkl:
      continue
    density = int(pkl[pkl.find('.')-2:pkl.find('.')])
    if density == 79 or density == 81:
      allPkls.append(circuit+pkl)

# Traning

In [None]:
sizeBatch = 64  # almost 10% of chance to have viol \
                # is important to ensure that each batch has a decent chance of containing a few positive samples
numEpochs = 50
weights = {0: 0.5, 1: 50}
learningRate = 0.001
evalMetrics = [tf.keras.metrics.TruePositives(name='tp'),
               tf.keras.metrics.FalsePositives(name='fp'),
               tf.keras.metrics.TrueNegatives(name='tn'),
               tf.keras.metrics.FalseNegatives(name='fn'),
               tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.Precision(name='precision'),
               tf.keras.metrics.Recall(name='recall'),
               tf.keras.metrics.AUC(name='auc')]


if os.path.exists('models/') == False:
  os.mkdir('models/')

models = [x for x in os.listdir('models/')]
lastRunEpoch = 0
inputSize = (22, 33, 33)
model = None
trainResultDF = pd.DataFrame()
if len(models) > 0:
  models.sort()
  lastModel = models[-1]
  lastRunEpoch = int(lastModel[lastModel.find('_')+1:lastModel.find('.')])
  model = pickle.load(open('models/model_'+str(lastRunEpoch)+'.pkl', 'rb'))
  trainResultDF = pickle.load(open('trainResultDF.pkl', 'rb'))
else:
  model = makeCNNModel(evalMetrics, learningRate, inputSize)

for epoch in range(lastRunEpoch+1, numEpochs):
  random.shuffle(allPkls)
  for pkl in allPkls:
    trainDf = pd.read_pickle(pkl, compression='zip')
    trainDf = trainDf.reset_index(drop=True)
    valDf = trainDf.sample(frac=0.2)
    trainDf = trainDf.drop(valDf.index)

    labels = trainDf.pop(trainDf.columns.values[-1])
    valLabels = valDf.pop(valDf.columns.values[-1])
    trainHyperImages = np.array(trainDf).reshape(len(trainDf),22,33,33)
    valHyperImages = np.array(valDf).reshape(len(valDf),22,33,33)
    print('Epoch: ',epoch,' Training with:', pkl)
    train_history = model.fit(x=trainHyperImages,
                             y=labels,
                             verbose=2, #0 = silent, 1 = progress bar, 2 = one line per epoch
                             batch_size=sizeBatch,
                             validation_data=(valHyperImages, valLabels),
                             class_weight=weights)
    historyDf = pd.DataFrame(train_history.history)
    historyDf['epoch'] = epoch
    historyDf['design'] = pkl[pkl.rfind('/')+5:pkl.find('.')]
    trainResultDF = pd.concat([trainResultDF, historyDf])
  pickle.dump(model, open('models/model_'+str(epoch)+'.pkl', 'wb'))
  pickle.dump(trainResultDF, open('trainResultDF.pkl', 'wb'))

# Save Model

In [None]:
pickle.dump(model, open('model.pkl', 'wb'))

# Load Model

In [None]:
model = pickle.load(open('exp5Models/model_49.pkl', 'rb'))

In [None]:
trainResultDF = pickle.load(open('EXP5trainResultDF.pkl', 'rb'))
trainResultDF.shape
# plt.plot(train_history.history['loss'][0:50])
# plt.plot(train_history.history['val_loss'][0:50])

In [None]:
design = 'jpeg_79'
sortedDF = trainResultDF.loc[trainResultDF['design'] == design].sort_values(by=['epoch'])
ytrain = [x for x in sortedDF['loss']]
yval = [x for x in sortedDF['val_loss']]
plt.plot(ytrain, label = "ytrain")
plt.plot(yval, label = "yval")
plt.legend()
plt.title(design)
plt.show()

# Compress All CSVs

# Evaluate

In [None]:
def calculate_test_metrics(model, results):
  m = {}
  for name, value in zip(model.metrics_names, results):
      m[name] = value
  if m['precision'] + m['recall'] != 0:
      f_score = 2 * ((m['precision'] * m['recall'])/(m['precision'] + m['recall']))
      m['F-score'] = f_score
  sqrt = math.sqrt((m['tp']+m['fp'])*(m['tp']+m['fn'])*(m['tn']+m['fp'])*(m['tn']+m['fn']))
  if sqrt != 0:
      mcc = ((m['tp'] * m['tn']) - (m['fp'] * m['fn']))/sqrt
      m['MCC'] = mcc
  return m

In [None]:
testDF = pd.read_pickle('/data/CSV/swerv/cts_swerv_80.pkl', compression='zip')

In [None]:
sizeBatch = 64 # is important to ensure that each batch has a decent chance of containing a few positive samples
# testDF = %time pd.read_pickle('/data/CSV/swerv/cts_swerv_70.pkl', compression='zip')
testDF = testDF.sample(frac=1).reset_index(drop=True) #Shuffle all rows
testLabels = testDF.pop(testDF.columns.values[-1])
testHyperImages = np.array(testDF).reshape(len(testDF),22,33,33)
batch_size = 32
baseline_results = model.evaluate(x=testHyperImages,
                                  y=testLabels,
                                  batch_size=sizeBatch)
test_metrics = calculate_test_metrics(model, baseline_results)
print(test_metrics)

# Backup

In [None]:
# scaler = sklearn.preprocessing.StandardScaler()
# trainHyperImages = scaler.fit_transform(trainHyperImages)
# valHyperImages = scaler.transform(valHyperImages)

# scaler = sklearn.preprocessing.StandardScaler()
# labels = df[33*33*22]
# df[0:33*33*22-1] = scaler.fit_transform(df[0:33*33*22-1]).round(decimals=2)
# df[33*33*22] = labels

In [None]:
# df = df.sample(frac=1).reset_index(drop=True) #Shuffle all rows
# dfVal = df.sample(frac=0.2)
# df = df.drop(dfVal.index)

# labels = df.pop(df.columns.values[-1])
# valLabels = dfVal.pop(dfVal.columns.values[-1])
# trainHyperImages = np.array(df).reshape(len(df),22,33,33)
# valHyperImages = np.array(dfVal).reshape(len(dfVal),22,33,33)