# Imports

In [None]:
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import sklearn
import tensorflow as tf
import random
import shutil
import os
import time
import re
import subprocess

# Learning Model

In [None]:
def makeCNNModel(evalMetrics, learningRate, inputSize):
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = inputSize))
  model.add(tf.keras.layers.MaxPooling2D((3, 3)))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer = tf.keras.optimizers.Adam(),
#               optimizer = tf.keras.optimizers.experimental.SGD(),
                loss = tf.keras.losses.BinaryCrossentropy(),
                metrics = evalMetrics)
  return model

# Load Data

In [None]:
# allCircuits = ['/data/CSV/'+x+'/' for x in os.listdir('/data/CSV/')]
# allPkls = []
# for circuit in allCircuits:
#   for pkl in os.listdir(circuit):
#     if '.pkl' not in pkl:
#       continue
#     density = int(pkl[pkl.find('.')-2:pkl.find('.')])
#     if density == 79 or density == 81:
#       allPkls.append(circuit+pkl)

In [None]:
# Get all completelly routed circuits
df2 = df.loc[df['FDRTotal'] == 0]
# Get all designs to train
circuitsToTrain = [x for x in df2.loc[df2['Design'].str.contains('80') == False].sort_values(by=['Design'])['Design']]

In [None]:
# Get all completelly routed circuits
df2 = df.loc[df['FDRTotal'] == 0]
# Get all designs to train
circuitsToTrain = [x for x in df2.loc[df2['Design'].str.contains('80') == False].sort_values(by=['Design'])['Design']]

allPkls = ['/data/CSV/' + x[0:-3] + '/cts_' + x + '.pkl' for x in circuitsToTrain]
allPkls = [x for x in allPkls if os.path.exists(x)]

# Get only designs within 75 and 85 except 80
allPkls = [x for x in allPkls if int(re.findall(r'\d+', x)[0]) < 86 and int(re.findall(r'\d+', x)[0]) > 74]

# Traning

In [None]:
sizeBatch = 64  # almost 10% of chance to have viol \
                # is important to ensure that each batch has a decent chance of containing a few positive samples
numEpochs = 100
weights = {0: 0.5, 1: 50}
learningRate = 0.001
evalMetrics = [tf.keras.metrics.TruePositives(name='tp'),
               tf.keras.metrics.FalsePositives(name='fp'),
               tf.keras.metrics.TrueNegatives(name='tn'),
               tf.keras.metrics.FalseNegatives(name='fn'),
               tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.Precision(name='precision'),
               tf.keras.metrics.Recall(name='recall'),
               tf.keras.metrics.AUC(name='auc')]


if os.path.exists('models/') == False:
  os.mkdir('models/')

models = [x for x in os.listdir('models/')]
lastRunEpoch = 0
inputSize = (22, 33, 33)
model = None
trainResultDF = pd.DataFrame()
if len(models) > 0:
  models.sort()
  lastModel = models[-1]
  lastRunEpoch = int(lastModel[lastModel.find('_')+1:lastModel.find('.')])
  model = pickle.load(open('models/model_'+str(lastRunEpoch)+'.pkl', 'rb'))
  trainResultDF = pickle.load(open('trainResultDF.pkl', 'rb'))
else:
  model = makeCNNModel(evalMetrics, learningRate, inputSize)

for epoch in range(lastRunEpoch+1, numEpochs):
  random.shuffle(allPkls)
  for pkl in allPkls:
    trainDf = pd.read_pickle(pkl, compression='zip')
    trainDf = trainDf.reset_index(drop=True)
    valDf = trainDf.sample(frac=0.2)
    trainDf = trainDf.drop(valDf.index)

    labels = trainDf.pop(trainDf.columns.values[-1])
    valLabels = valDf.pop(valDf.columns.values[-1])
    trainHyperImages = np.array(trainDf).reshape(len(trainDf),22,33,33)
    valHyperImages = np.array(valDf).reshape(len(valDf),22,33,33)
    print('Epoch: ',epoch,' Training with:', pkl)
    train_history = model.fit(x=trainHyperImages,
                             y=labels,
                             verbose=2, #0 = silent, 1 = progress bar, 2 = one line per epoch
                             batch_size=sizeBatch,
                             validation_data=(valHyperImages, valLabels),
                             class_weight=weights)
    historyDf = pd.DataFrame(train_history.history)
    historyDf['epoch'] = epoch
    historyDf['design'] = pkl[pkl.rfind('/')+5:pkl.find('.')]
    trainResultDF = pd.concat([trainResultDF, historyDf])
  pickle.dump(model, open('models/model_'+str(epoch)+'.pkl', 'wb'))
  pickle.dump(trainResultDF, open('trainResultDF.pkl', 'wb'))

# Save Model

In [None]:
pickle.dump(model, open('model.pkl', 'wb'))

# Load Model

In [None]:
model = pickle.load(open('exp9Models/model_4.pkl', 'rb'))

In [None]:
trainResultDF = pickle.load(open('EXP9trainResultDF.pkl', 'rb'))
trainResultDF.shape
# plt.plot(train_history.history['loss'][0:50])
# plt.plot(train_history.history['val_loss'][0:50])

In [None]:
for epoch in range(min(trainResultDF['epoch']), max(trainResultDF['epoch'])+1):
  loss = sum(trainResultDF.loc[trainResultDF['epoch'] == epoch]['loss'])
  valLoss = sum(trainResultDF.loc[trainResultDF['epoch'] == epoch]['val_loss'])
  print('epoch:', epoch, ' loss: ', loss, ' valLoss: ', valLoss)

In [None]:
trainResultDF.head

In [None]:
design = 'jpeg_79'
sortedDF = trainResultDF.loc[trainResultDF['design'] == design].sort_values(by=['epoch'])
ytrain = [x for x in sortedDF['loss']]
yval = [x for x in sortedDF['val_loss']]
plt.plot(ytrain, label = "ytrain")
plt.plot(yval, label = "yval")
plt.legend()
plt.title(design)
plt.show()

# Compress All CSVs

# Evaluate

In [None]:
def calculate_test_metrics(model, results):
  m = {}
  for name, value in zip(model.metrics_names, results):
      m[name] = value
  if m['precision'] + m['recall'] != 0:
      f_score = 2 * ((m['precision'] * m['recall'])/(m['precision'] + m['recall']))
      m['F-score'] = f_score
  sqrt = math.sqrt((m['tp']+m['fp'])*(m['tp']+m['fn'])*(m['tn']+m['fp'])*(m['tn']+m['fn']))
  if sqrt != 0:
      mcc = ((m['tp'] * m['tn']) - (m['fp'] * m['fn']))/sqrt
      m['MCC'] = mcc
  return m

In [None]:
sizeBatch = 64 # is important to ensure that each batch has a decent chance of containing a few positive samples
# testPath = '/data/CSVWhole/jpeg/'
# testPkls = [testPath+x for x in  os.listdir(testPath)]
for circuit in os.listdir('/data/CSVWhole/'):
  i = 0
  resultDF = pd.DataFrame()
  for pkl in ['/data/CSVWhole/'+circuit+'/'+x for x in os.listdir('/data/CSVWhole/'+circuit)]:
    print(circuit, i, pkl)
    testDF = pd.read_pickle(pkl, compression='zip')
    testDF = testDF.sample(frac=1).reset_index(drop=True) #Shuffle all rows
    testLabels = testDF.pop(testDF.columns.values[-1])
    testHyperImages = np.array(testDF).reshape(len(testDF),22,33,33)
    baseline_results = model.evaluate(x=testHyperImages,
                                      y=testLabels,
                                      batch_size=sizeBatch)
    test_metrics = calculate_test_metrics(model, baseline_results)
    testDF = pd.DataFrame(test_metrics, index=[i])
    testDF['design'] = circuit
    resultDF = pd.concat([resultDF, testDF], ignore_index=True)
    i += 1
resultDF

# Benchmark Info

In [None]:
def numDRVs(file):
  shortViol = 0
  totalViol = 0
  for line in open(file, 'r').readlines():
    if 'Metal Short' in line:
      shortViol += 1
    if 'Total Violations' in line:
      totalViol = int(line.split(' ')[5])
  return [shortViol, totalViol]

def getDefInfo(file):
  info = {x:0 for x in ['COMPONENTS', 'NETS', 'SPECIALNETS', 'PINS', 'BLOCKAGES']}
  for line in open(file, 'r').readlines():
    tokens = line.split(' ')
    if tokens[0] in info:
      info[tokens[0]] = int(tokens[1])
  return info

def getRuntime(file):
  runtime = {x:-1 for x in ['GR', 'IDR', 'FDR']}
  val = []
  for line in open(file,"r"):
      val.append(int(line.split(' ')[0]))
  runtime['GR'] = val[0]
  runtime['IDR'] = val[1]
  runtime['FDR'] = val[2]
  return runtime


df = pd.DataFrame()
benchmarkPath = '/home/sheiny/workspace/Benchmarks/RoutedOpenCores/'
allCircuits = [benchmarkPath+circuit+'/base/' for circuit in os.listdir(benchmarkPath) if os.path.exists(benchmarkPath+circuit+'/base/')]

allCircuits.remove(benchmarkPath+'bp/base/')#Pad problem (too much congested at bottom left)
allCircuits.remove(benchmarkPath+'gcd/base/')#Too small

for circuit in allCircuits:
  for csv in os.listdir(circuit):
    if 'Runtime' not in csv:
      continue
    design = csv[csv.find('_')+1:csv.find('Runtime')]

    IDRShort, IDRTotal = numDRVs(circuit+'cts_'+design+'FirstDR.rpt')
    FDRShort, FDRTotal = numDRVs(circuit+'cts_'+design+'FinalDR.rpt')

    info = getDefInfo(circuit+'cts_'+design+'.def')
    drvs = {'IDRShort':IDRShort, 'IDRTotal':IDRTotal, 'FDRShort':FDRShort, 'FDRTotal':FDRTotal}
    runtime = getRuntime(circuit+'cts_'+design+'Runtime.out')
    dfVals = {'Design':design}
    dfVals.update(info)
    dfVals.update(drvs)
    dfVals.update(runtime)
    dfTemp = pd.DataFrame(dfVals, index=[0])
    df = pd.concat([df, dfTemp], ignore_index=True)
df.to_pickle('benchmark.pkl', compression='zip')

In [None]:
# Print circuit from table
# TODO: probably use ratio isntead (IDRShort/IDRTotal)
result = df.loc[df['Design'].str.contains("jpeg")].sort_values(by=['Design'])
result

# Backup

In [None]:
# scaler = sklearn.preprocessing.StandardScaler()
# trainHyperImages = scaler.fit_transform(trainHyperImages)
# valHyperImages = scaler.transform(valHyperImages)

# scaler = sklearn.preprocessing.StandardScaler()
# labels = df[33*33*22]
# df[0:33*33*22-1] = scaler.fit_transform(df[0:33*33*22-1]).round(decimals=2)
# df[33*33*22] = labels

In [None]:
# df = df.sample(frac=1).reset_index(drop=True) #Shuffle all rows
# dfVal = df.sample(frac=0.2)
# df = df.drop(dfVal.index)

# labels = df.pop(df.columns.values[-1])
# valLabels = dfVal.pop(dfVal.columns.values[-1])
# trainHyperImages = np.array(df).reshape(len(df),22,33,33)
# valHyperImages = np.array(dfVal).reshape(len(dfVal),22,33,33)