# Imports

In [None]:
import math
import numpy as np
import pandas as pd
import pickle
import sklearn
import tensorflow as tf
import random
import shutil
import os
import time
import subprocess

# Learning Models

In [None]:
evalMetrics = [tf.keras.metrics.TruePositives(name='tp'),
               tf.keras.metrics.FalsePositives(name='fp'),
               tf.keras.metrics.TrueNegatives(name='tn'),
               tf.keras.metrics.FalseNegatives(name='fn'),
               tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.Precision(name='precision'),
               tf.keras.metrics.Recall(name='recall'),
               tf.keras.metrics.AUC(name='auc')]

# FCN Model
def makeFCNModel():
  print('Making FCN model.')
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = (22, 33, 33)))
  model.add(tf.keras.layers.MaxPooling2D((3, 3)))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer = tf.keras.optimizers.Adam(),
                loss = tf.keras.losses.BinaryCrossentropy(),
                metrics = evalMetrics)
  return model

# CNN Model
def makeCNNModel():
  print('Making CNN model.')
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = (22, 33, 33)))
  model.add(tf.keras.layers.MaxPooling2D((3, 3)))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(128, activation = 'relu'))#Dense
  model.add(tf.keras.layers.Dense(128, activation = 'relu'))#Dense
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer = tf.keras.optimizers.Adam(),
                loss = tf.keras.losses.BinaryCrossentropy(),
                metrics = evalMetrics)
  return model

# Select Training Data (requires benchmark.pkl)

In [None]:
csvPath = '/home/sheiny/workspace/data/CSVS/'
df = pd.read_pickle('benchmarkInfo/ufscbenchmark.pkl', compression='zip')
kFold = 4
testSize = 20

if (testSize % kFold != 0):
  print('Warning: testSize % kFold != 0')

df = df.loc[df['FDRVTotal'] == 0]
df.reset_index(inplace=True)
inexistingFiles = []
for index in range(len(df)):
  design = df['Design'][index]
  density = df['Density'][index]
  if os.path.exists(csvPath+design+'/cts_'+design+'_'+str(density)+'.pkl') == False:
    inexistingFiles.append(index)
df.drop(inexistingFiles, inplace=True)
df.reset_index(inplace=True)
df.sort_values('IDRVShort', ascending=False, inplace=True, ignore_index=True)
totalSize = len(df)
testCircuits = [(df['Design'][x], df['Density'][x]) for x in range(testSize)]
if os.path.exists('testCircuits.pkl') == False:
  random.shuffle(testCircuits)
  pickle.dump(testCircuits, open('testCircuits.pkl', 'wb'))
else:
  testCircuits = pickle.load(open('testCircuits.pkl', 'rb'))
testRuns = [testCircuits[x:x+kFold] for x in range(len(testCircuits)) if x % kFold == 0]
trainingCircuits = [(df['Design'][x], df['Density'][x]) for x in range(testSize, len(df))]
if totalSize != (len(testCircuits) + len(trainingCircuits)):
  print('Error: len(df) should be equals len(trainingCircuits) + len(testCircuits)')
trainingPkls = ['/home/sheiny/workspace/data/CSVS/'+x[0]+'/cts_'+x[0]+'_'+str(x[1])+'.pkl' for x in trainingCircuits]

# Compute Class Weights

# Train for K-Fold Cross Validation

In [None]:
pos = 7661
neg = 1782339
total = pos+neg
w0 = total/(2*neg)
w1 = total/(2*pos)
weights = {0: w0, 1: w1}
sizeBatch = 64

def train(pklsForTraining, learningModel, modelPath, epochStart, epochEnd, trainResultDF = pd.DataFrame()):
  pkls = pklsForTraining.copy()
  for epoch in range(epochStart, epochEnd):
    random.shuffle(pkls)
    for pkl in pkls:
      trainDf = pd.read_pickle(pkl, compression='zip')
      trainDf = trainDf.reset_index(drop=True)
      valDf = trainDf.sample(frac=0.2)
      trainDf = trainDf.drop(valDf.index)

      labels = trainDf.pop(trainDf.columns.values[-1])
      valLabels = valDf.pop(valDf.columns.values[-1])
      trainDf.pop(trainDf.columns.values[0])#drop first column which contains the nodeIds
      valDf.pop(valDf.columns.values[0])#drop first column which contains the nodeIds
      trainHyperImages = np.array(trainDf).reshape(len(trainDf),22,33,33)
      valHyperImages = np.array(valDf).reshape(len(valDf),22,33,33)
      print('Epoch: ',epoch,' Training with:', pkl)
      train_history = learningModel.fit(x=trainHyperImages,
                                       y=labels,
                                       verbose=2, #0 = silent, 1 = progress bar, 2 = one line per epoch
                                       batch_size=sizeBatch,
                                       validation_data=(valHyperImages, valLabels),
                                       class_weight=weights)
      historyDf = pd.DataFrame(train_history.history)
      historyDf['epoch'] = epoch
      historyDf['design'] = pkl[pkl.rfind('/')+5:pkl.find('.')]
      trainResultDF = pd.concat([trainResultDF, historyDf])
    pickle.dump(learningModel, open(modelPath+'model_'+str(epoch)+'.pkl', 'wb'))
    pickle.dump(trainResultDF, open(modelPath+'trainResultDF.pkl', 'wb'))

In [None]:
numEpochs = 15
numTestRuns = len(testRuns)
useFCN = True

for run in range(numTestRuns):
  modelPath = 'results/fcn'+str(run)+'/'
  if os.path.exists(modelPath) == False:
    os.mkdir(modelPath)

  models = [x for x in os.listdir(modelPath)]
  lastRunEpoch = 0
  learningModel = None
  trainResultDF = pd.DataFrame()
  if len(models) > 0:
    if 'trainResultDF.pkl' in models:
      models.remove('trainResultDF.pkl')
    models.sort(key = lambda x : int(x[x.find('_')+1:x.find('.')]))
    lastModel = models[-1]
    lastRunEpoch = int(lastModel[lastModel.find('_')+1:lastModel.find('.')])
    learningModel = pickle.load(open(modelPath+'model_'+str(lastRunEpoch)+'.pkl', 'rb'))
    lastRunEpoch += 1
    if lastRunEpoch == numEpochs:
      continue
    trainResultDF = pickle.load(open(modelPath+'trainResultDF.pkl', 'rb'))
  else:
    learningModel = makeFCNModel() if useFCN else makeCNNModel()
    
  allPkls = trainingPkls.copy()
  allPkls += ['/home/sheiny/workspace/data/CSVS/'+y[0]+'/cts_'+y[0]+'_'+str(y[1])+'.pkl'
              for x in range(numTestRuns) if x != run
              for y in testRuns[x]]

  train(allPkls, learningModel, modelPath, lastRunEpoch, numEpochs, trainResultDF)

# Cross Validation

In [None]:
def predict(model, pkl):
  testDf = pd.read_pickle(pkl, compression='zip')
  labels = testDf.pop(testDf.columns.values[-1])
  testDf.pop(testDf.columns.values[0])#drop first column which contains the nodeIds
  testHyperImages = np.array(testDf).reshape(len(testDf),22,33,33)
  result = model.evaluate(testHyperImages, labels)
  resultDict = {m:r for (m, r) in zip(model.metrics_names, result)}
  return resultDict

def predictPkls(design, density, modelPath, pkls):
  model = pickle.load(open(modelPath, 'rb'))
  results = []
  for pkl in pkls:
    result = predict(model, pkl)
    result['Design'] = design
    result['Density'] = density
    results.append(result)
  return results

In [None]:
testPath = '/home/sheiny/workspace/data/WholeCSV/'
modelPaths = '/home/sheiny/workspace/Predictor/results/'

for testRun in range(len(testRuns)):
  modelPath = modelPaths+'fcn'+str(testRun)+'/model_14.pkl'
  resultDf = pd.DataFrame()
  for design, density in testRuns[testRun]:
    pklsToTest = [testPath+design+'/'+x for x in os.listdir(testPath+design+'/') if '_'+str(density)+'_' in x]
    pklsToTest.sort(key = lambda x : int(x[x.rfind('_')+1:x.find('.')]))
    resultDicts = predictPkls(design, density, modelPath, pklsToTest)
    df = pd.DataFrame.from_dict(resultDicts)
    resultDf = pd.concat([resultDf, df], axis=0, ignore_index=True)
  pickle.dump(resultDf, open('results/fcn'+str(testRun)+'_CV.pkl', 'wb'))

# Trained Model Experiment

In [None]:
testPath = '/home/sheiny/workspace/data/WholeCSV/'
modelPath = 'results/fcn2/model_14.pkl'
predictRuns = [y
               for x in testRuns
               for y in x]

resultDf = pd.DataFrame()
for design, density in predictRuns:
  pklsToTest = [testPath+design+'/'+x
                for x in os.listdir(testPath+design+'/') if '_'+str(density)+'_'
                in x]
  pklsToTest.sort(key = lambda x : int(x[x.rfind('_')+1:x.find('.')]))
  resultDicts = predictPkls(design, density, modelPath, pklsToTest)
  df = pd.DataFrame.from_dict(resultDicts)
  resultDf = pd.concat([resultDf, df], axis=0, ignore_index=True)
pickle.dump(resultDf, open('results/predictRuns.pkl', 'wb'))

# Benchmark Info (benchmark.pkl)

# Compress and merge CSVs into .pkls