# Imports

In [None]:
import math
import numpy as np
import pandas as pd
import pickle
import sklearn
import tensorflow as tf
import random
import shutil
import os
import time
import subprocess

# Learning Models

In [None]:
evalMetrics = [tf.keras.metrics.TruePositives(name='tp'),
               tf.keras.metrics.FalsePositives(name='fp'),
               tf.keras.metrics.TrueNegatives(name='tn'),
               tf.keras.metrics.FalseNegatives(name='fn'),
               tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.Precision(name='precision'),
               tf.keras.metrics.Recall(name='recall'),
               tf.keras.metrics.AUC(name='auc')]

# FCN Model
def makeFCNModel():
  print('Making FCN model.')
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = (22, 33, 33)))
  model.add(tf.keras.layers.MaxPooling2D((3, 3)))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer = tf.keras.optimizers.Adam(),
                loss = tf.keras.losses.BinaryCrossentropy(),
                metrics = evalMetrics)
  return model

# FCN Model
# This model don't work
# def makeNewFCNModel():
#   print('Making New FCN model.')
#   model = tf.keras.Sequential()
#   model.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = (22, 33, 33)))
#   model.add(tf.keras.layers.MaxPooling2D((3, 3)))
#   model.add(tf.keras.layers.Flatten())
#   model.add(tf.keras.layers.Dense(128, activation='relu'))
#   model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification
#   model.compile(optimizer = tf.keras.optimizers.Adam(),
#                 loss = tf.keras.losses.BinaryCrossentropy(),
#                 metrics = evalMetrics)
#   return model

# CNN Model
def makeCNNModel():
  print('Making CNN model.')
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = (22, 33, 33)))
  model.add(tf.keras.layers.MaxPooling2D((3, 3)))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(128, activation = 'relu'))#Dense
  model.add(tf.keras.layers.Dense(128, activation = 'relu'))#Dense
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer = tf.keras.optimizers.Adam(),
                loss = tf.keras.losses.BinaryCrossentropy(),
                metrics = evalMetrics)
  return model

def create_fcn_model():
  # Convolutional layers
  print('Making a ChatGPT model.')
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(22, 33, 33)))
  model.add(tf.keras.layers.MaxPooling2D((2, 2), padding='same'))
  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
  model.add(tf.keras.layers.MaxPooling2D((2, 2), padding='same'))
  model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
  model.add(tf.keras.layers.MaxPooling2D((2, 2), padding='same'))

  # Fully convolutional layers
  model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
  model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same'))

  # Output layer for binary classification
  model.add(tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid'))

  # Flatten the output
  model.add(tf.keras.layers.Flatten())

  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.compile(optimizer='adam', loss='binary_crossentropy', metrics = evalMetrics)
  return model

def create_cnn_model():
  # Create a Sequential model
  print('Making a ChatGPT CNN model.')
  model = tf.keras.Sequential()

  # Add convolutional layers
  model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(22, 33, 33)))
  model.add(tf.keras.layers.MaxPooling2D((2, 2)))

  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
  model.add(tf.keras.layers.MaxPooling2D((2, 2)))

  model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
  model.add(tf.keras.layers.MaxPooling2D((2, 2)))

  # Flatten the output and add dense layers for classification
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(128, activation='relu'))
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

  # Compile the model
  model.compile(optimizer='adam', loss='binary_crossentropy', metrics = evalMetrics)

  # Print model summary
  # model.summary()
  return model

# Select Training Data (requires benchmark.pkl)

In [None]:
csvPath = '/home/sheiny/workspace/data/CSVS/'
df = pd.read_pickle('benchmarkInfo/ufscbenchmark.pkl', compression='zip')
kFold = 4
testSize = 20

if (testSize % kFold != 0):
  print('Warning: testSize % kFold != 0')

df = df.loc[df['FDRVTotal'] == 0]
df.reset_index(inplace=True, drop=True)
inexistingFiles = []
for index in range(len(df)):
  design = df['Design'][index]
  density = df['Density'][index]
  if os.path.exists(csvPath+design+'/cts_'+design+'_'+str(density)+'.pkl') == False:
    inexistingFiles.append(index)
df.drop(inexistingFiles, inplace=True)
df.reset_index(inplace=True, drop=True)
df.sort_values('IDRVShort', ascending=False, inplace=True, ignore_index=True)
totalSize = len(df)
testCircuits = [(df['Design'][x], df['Density'][x]) for x in range(testSize)]
if os.path.exists('testCircuits.pkl') == False:
  print('creating test set')
  random.shuffle(testCircuits)
  pickle.dump(testCircuits, open('testCircuits.pkl', 'wb'))
else:
  print('found existing test set, loading')
  testCircuits = pickle.load(open('testCircuits.pkl', 'rb'))
testRuns = [testCircuits[x:x+kFold] for x in range(len(testCircuits)) if x % kFold == 0]
trainingCircuits = [(df['Design'][x], df['Density'][x]) for x in range(testSize, len(df))]
if totalSize != (len(testCircuits) + len(trainingCircuits)):
  print('Error: len(df) should be equals len(trainingCircuits) + len(testCircuits)')
trainingPkls = ['/home/sheiny/workspace/data/CSVS/'+x[0]+'/cts_'+x[0]+'_'+str(x[1])+'.pkl' for x in trainingCircuits]

In [None]:
# Dont consider data augmentation
trainingPklsNoAugmentation = [x for x in trainingPkls if '70' in x]
trainingPkls = trainingPklsNoAugmentation
trainingPkls

# Compute Class Weights

# Train for K-Fold Cross Validation

In [None]:
pos = 7661
neg = 1782339
total = pos+neg
w0 = total/(2*neg)
w1 = total/(2*pos)
weights = {0: w0, 1: w1}
sizeBatch = 64

def train(pklsForTraining, learningModel, modelPath, epochStart, epochEnd, trainResultDF = pd.DataFrame()):
  pkls = pklsForTraining.copy()
  for epoch in range(epochStart, epochEnd):
    random.shuffle(pkls)
    for pkl in pkls:
      trainDf = pd.read_pickle(pkl, compression='zip')
      trainDf.reset_index(inplace=True, drop=True)
      valDf = trainDf.sample(frac=0.2)
      trainDf = trainDf.drop(valDf.index)
        
      # labels = trainDf.pop(trainDf.columns.values[-1])
      labels = trainDf.pop(trainDf.columns.values[-1]).to_numpy()
      labels = labels.reshape((len(labels), 1))

      # valLabels = valDf.pop(valDf.columns.values[-1])
      valLabels = valDf.pop(valDf.columns.values[-1]).to_numpy()
      valLabels = valLabels.reshape((len(valLabels), 1))

      trainDf.pop(trainDf.columns.values[0])#drop first column which contains the nodeIds
      valDf.pop(valDf.columns.values[0])#drop first column which contains the nodeIds
      trainHyperImages = np.array(trainDf).reshape(len(trainDf),22,33,33)
      valHyperImages = np.array(valDf).reshape(len(valDf),22,33,33)

      print('Epoch: ',epoch,' Training with:', pkl)
      train_history = learningModel.fit(x=trainHyperImages,
                                       y=labels,
                                       verbose=2, #0 = silent, 1 = progress bar, 2 = one line per epoch
                                       batch_size=sizeBatch,
                                       validation_data=(valHyperImages, valLabels),
                                       class_weight=weights)
      historyDf = pd.DataFrame(train_history.history)
      historyDf['epoch'] = epoch
      historyDf['design'] = pkl[pkl.rfind('/')+5:pkl.find('.')]
      trainResultDF = pd.concat([trainResultDF, historyDf])
    pickle.dump(learningModel, open(modelPath+'model_'+str(epoch)+'.pkl', 'wb'))
    pickle.dump(trainResultDF, open(modelPath+'trainResultDF.pkl', 'wb'))

In [None]:
from datetime import datetime

# Get the current date and time
now = datetime.now()

# Format the date and time as a string
date_time_str = now.strftime("%Y-%m-%d %H:%M:%S")

# Define the file path
file_path = 'results/start_training_time_fcn4NoAugment.txt'

# Write the date and time to the file
with open(file_path, 'w') as file:
    file.write(date_time_str)

In [None]:
numEpochs = 15
numTestRuns = len(testRuns)
useFCN = True
# useFCN = False
modelName = 'fcnNoAugment'
# modelName = 'fcn'
# modelName = 'cnn'

for run in range(numTestRuns):
  run = 4
  modelPath = 'results/'+modelName+str(run)+'/'
  if os.path.exists(modelPath) == False:
    os.mkdir(modelPath)

  models = [x for x in os.listdir(modelPath)]
  lastRunEpoch = 0
  learningModel = None
  trainResultDF = pd.DataFrame()
  if len(models) > 0:
    if 'trainResultDF.pkl' in models:
      models.remove('trainResultDF.pkl')
    models.sort(key = lambda x : int(x[x.find('_')+1:x.find('.')]))
    lastModel = models[-1]
    lastRunEpoch = int(lastModel[lastModel.find('_')+1:lastModel.find('.')])
    learningModel = pickle.load(open(modelPath+'model_'+str(lastRunEpoch)+'.pkl', 'rb'))
    lastRunEpoch += 1
    if lastRunEpoch == numEpochs:
      continue
    trainResultDF = pickle.load(open(modelPath+'trainResultDF.pkl', 'rb'))
  else:
    learningModel = makeFCNModel() if useFCN else makeCNNModel()
    # learningModel = makeFCNModel() if useFCN else makeCNNModel()
    # learningModel = create_fcn_model() if useFCN else makeCNNModel() #FCN chat gpt
    
    
  allPkls = trainingPkls.copy()
  # print('len(allPkls)', len(allPkls))
  # allPkls += ['/home/sheiny/workspace/data/CSVS/'+y[0]+'/cts_'+y[0]+'_'+str(y[1])+'.pkl'
  #             for x in range(numTestRuns) if x != run
  #             for y in testRuns[x]]

  train(allPkls, learningModel, modelPath, lastRunEpoch, numEpochs, trainResultDF)
  break #Only run (run number 4)

In [None]:
from datetime import datetime

# Get the current date and time
now = datetime.now()

# Format the date and time as a string
date_time_str = now.strftime("%Y-%m-%d %H:%M:%S")

# Define the file path
file_path = 'results/end_training_time_fcn4NoAugment.txt'

# Write the date and time to the file
with open(file_path, 'w') as file:
    file.write(date_time_str)

# Cross Validation

In [None]:
def predict(model, pkl):
  testDf = pd.read_pickle(pkl, compression='zip')
  labels = testDf.pop(testDf.columns.values[-1])
  testDf.pop(testDf.columns.values[0])#drop first column which contains the nodeIds
  testHyperImages = np.array(testDf).reshape(len(testDf),22,33,33)
  result = model.evaluate(testHyperImages, labels)
  resultDict = {m:r for (m, r) in zip(model.metrics_names, result)}
  return resultDict

def predictPkls(design, density, modelPath, pkls):
  model = pickle.load(open(modelPath, 'rb'))
  results = []
  for pkl in pkls:
    result = predict(model, pkl)
    result['Design'] = design
    result['Density'] = density
    results.append(result)
  return results

def getCircuitPkls(path, design, density):
  pkls = [path+design+'/'+x for x in os.listdir(path+design+'/') if '_'+str(density)+'_' in x]
  pkls.sort(key = lambda x : int(x[x.rfind('_')+1:x.find('.')]))
  return pkls

# TrainingPerformace

In [None]:
# 0 here means that this was using first CV run, test = [('bp_multi', 82), ('swerv', 84), ('bp_multi', 87), ('bp_multi', 86)]
testPath = '/home/sheiny/workspace/data/WholeCSV/'
modelPaths = '/home/sheiny/workspace/Predictor/results/fcn4/'
outputPath = '/home/sheiny/workspace/Predictor/results/trainingPerformance_fcn.pkl'
# modelPaths = '/home/sheiny/workspace/Predictor/results/cnn0/'
# outputPath = '/home/sheiny/workspace/Predictor/results/trainingPerformance_cnn0.pkl'

testRuns = [('bp_multi', 82), ('swerv', 84), ('bp_multi', 87), ('bp_multi', 86)]

models = [x for x in os.listdir(modelPaths) if 'model' in x]
models.sort(key=lambda x: int(x[x.rfind('_')+1:x.find('.')]))

resultDf = None
if os.path.exists(outputPath) == False:
  print('creating resultDf')
  resultDf = pd.DataFrame()
else:
  print('loading existent resultDf')
  resultDf = pickle.load(open(outputPath, 'rb'))

for model in models:
  if len(resultDf) != 0 and model[:model.find('.')] in set(resultDf['Model']):
    print('skipping already infered', model)
    continue
  modelPath = modelPaths + model
  for design, density in testRuns:
    pklsToTest = getCircuitPkls(testPath, design, density)
    resultDicts = predictPkls(design, density, modelPath, pklsToTest)
    for result in resultDicts:
      result['Model'] = model[:model.find('.')]
    df = pd.DataFrame.from_dict(resultDicts)
    resultDf = pd.concat([resultDf, df], axis=0, ignore_index=True)
  pickle.dump(resultDf, open(outputPath, 'wb'))

# Trained Model Experiment

# Benchmark Info (benchmark.pkl)

In [None]:
def numDRVs(file):
  shortViol = 0
  totalViol = 0
  for line in open(file, 'r').readlines():
    if 'Metal Short' in line:
      shortViol += 1
    if 'Total Violations' in line:
      totalViol = int(line.split(' ')[5])
  return [shortViol, totalViol]

def componentCount(lines):
  numComponents = 0
  startCounting = False
  for line in lines:
    tokens = line.split(' ')
    if startCounting and 'END' == tokens[0]:
      startCounting = False
    if startCounting and '-' == tokens[0] and 'FILL' not in tokens[2]:
      numComponents += 1
    if tokens[0] == 'COMPONENTS':
      startCounting = True
  return numComponents

def getDefInfo(file):
  info = {x:0 for x in ['NETS', 'SPECIALNETS', 'PINS', 'BLOCKAGES']}
  lines = open(file, 'r').readlines()
  for line in lines:
    tokens = line.split(' ')
    if tokens[0] in info:
      info[tokens[0]] = int(tokens[1])
  info['COMPONENTS'] = componentCount(lines)
  return info

def getRuntime(file):
  runtime = {x:-1 for x in ['RuntimeGR', 'RuntimeIDR', 'RuntimeFDR']}
  val = []
  for line in open(file,"r"):
      val.append(int(line.split(' ')[0]))
  runtime['RuntimeGR'] = val[0]
  runtime['RuntimeIDR'] = val[1]
  runtime['RuntimeFDR'] = val[2]
  return runtime

def getTotalGridSize(circuit, density):
  #TemplateRunner.tcl contains the library read part
  shutil.copyfile('TemplateRunner.tcl', 'run.tcl')
  f = open('run.tcl', 'a')
  f.write('read_def -continue_on_errors /home/sheiny/workspace/Predictor/OpenCoresUFSC/'+circuit+'/base/cts_'+circuit+'_'+str(density)+'FirstDR.def\n')
  f.write('ftx::initGraphFromDef 16\n')
  f.write('ftx::readRPT /home/sheiny/workspace/Predictor/OpenCoresUFSC/'+circuit+'/base/cts_'+circuit+'_'+str(density)+'FirstDR.rpt\n')
  f.close()
  f = open("log.txt", "w")
  subprocess.call(['OpenCoresUFSC/openroad',
                   'run.tcl',
                   '-exit'], stdout=f)
  lastLine = None
  with open('log.txt', 'r') as f:
    lastLine = f.readlines()[-1]
    f.close()
  os.remove('run.tcl')
  os.remove('log.txt')
  totalSize = int(lastLine.split()[6])
  positives = int(lastLine.split()[4])
  return totalSize, positives

includeGridSize = False
df = pd.DataFrame()
benchmarkPath = '/home/sheiny/workspace/Benchmarks/OpenCoresUFSC/'
# benchmarkPath = '/home/sheiny/workspace/Benchmarks/CMC/'
outFile = 'benchmarkInfo/NEWufscbenchmark.pkl'
# outFile = 'benchmarkInfo/cmcbenchmark.pkl'
allCircuitsPaths = [benchmarkPath+circuit+'/base/' for circuit in os.listdir(benchmarkPath) if os.path.exists(benchmarkPath+circuit+'/base/')]
allCircuitsPaths.sort()



for circuitPath in allCircuitsPaths:
  files = [x[x.find('_')+1:x.find('FinalDR.rpt')] for x in os.listdir(circuitPath) if 'FinalDR.rpt' in x]
  files.sort()

  for file in files:
    circuit = file[:file.rfind('_')]
    density = int(file[file.rfind('_')+1:])
    print('extracting: ', circuit, density)
    IDRShort, IDRTotal = numDRVs(circuitPath+'cts_'+file+'FirstDR.rpt')
    FDRShort, FDRTotal = numDRVs(circuitPath+'cts_'+file+'FinalDR.rpt')
    # print('IDRShort:', IDRShort, ' IDRTotal:',IDRTotal)
    # print('FDRShort:', FDRShort, ' FDRTotal:',FDRTotal)
    info = getDefInfo(circuitPath+'cts_'+file+'FinalDR.def')

    totalSize = None
    positives = None
    if (includeGridSize):
      totalSize, positives = getTotalGridSize(circuit, density)

    drvs = {'IDRVShort':IDRShort, 'IDRVTotal':IDRTotal, 'FDRVShort':FDRShort, 'FDRVTotal':FDRTotal}
    runtime = getRuntime(circuitPath+'cts_'+file+'Runtime.out')

    dfVals = {'Design':circuit, 'Density':density}
    if (includeGridSize):
      dfVals.update({'TotalSizeGrid':totalSize, 'Positives':positives})
    dfVals.update(info)
    dfVals.update(drvs)
    dfVals.update(runtime)
    dfTemp = pd.DataFrame(dfVals, index=[0])
    df = pd.concat([df, dfTemp], ignore_index=True)
df
df.to_pickle(outFile, compression='zip')

# Save Unrouted Circuits (FDRVTotal != 0)

In [None]:
df = pd.read_pickle('benchmarkInfo/NEWufscbenchmark.pkl', compression='zip')
df = df[df['FDRVTotal'] != 0]
df.to_csv('UnroutedCircuits.csv', index=False)

# Compress and merge CSVs into .pkls