# Imports

In [None]:
import json
import math
import matplotlib
import numpy as np
import pandas as pd
import pickle
import seaborn
import sklearn
import sklearn.model_selection
import sklearn.preprocessing
import tensorflow as tf
import random
import shutil
from keras.callbacks import CSVLogger
import os
from enum import Enum
import imblearn
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

# Generate Train and Test CSVs

In [None]:
circuitsDir = '/home/sheiny/workspace/RoutedDesigns/'
neighborDistance = 1
testSize = 0.2 #20%
useRandomOversample = False

if os.path.exists('data/'):
  shutil.rmtree('data/')
  os.mkdir('data/')
else:
  os.mkdir('data/')

typesOfDRVs = ["AdjacentCutSpacing", "SameLayerCutSpacing", "EndOfLine", "FloatingPatch", "MinArea", "MinWidth",
  "NonSuficientMetalOverlap", "CutShort", "MetalShort", "OutOfDieShort", "CornerSpacing", "ParallelRunLength"]
SelectedDRVTypes = ["CutShort", "MetalShort"]
label_name = "HasDetailedRoutingViolation"

circuits = []
for circuit in os.listdir(circuitsDir):
  if os.path.isdir(circuitsDir+'/'+circuit) and circuit != 'nangate45':
    for file in os.listdir(circuitsDir+'/'+circuit+'/base/'):
      if '_viol.csv' in file:
        circuits.append(circuitsDir+circuit+'/base/'+file.split('_')[0])

testIdx = set(random.sample(list(range(len(circuits))), int(testSize*len(circuits))))
testCircuits = [circuits[x] for x in testIdx]
circuits = [n for i, n in enumerate(circuits) if i not in testIdx]

with open('data/CSVInfo.txt', 'w') as fp:
  fp.write('Train Circuits:\n')
  for circuit in circuits:
    fp.write(circuit+'\n')
  fp.write('\nTest Circuits:\n')
  for circuit in testCircuits:
    fp.write(circuit+'\n')
  fp.close()

def processCSV(csvPath):
  typesOfDRVs = ["AdjacentCutSpacing", "SameLayerCutSpacing", "EndOfLine", "FloatingPatch", "MinArea", "MinWidth",
  "NonSuficientMetalOverlap", "CutShort", "MetalShort", "OutOfDieShort", "CornerSpacing", "ParallelRunLength"]
  SelectedDRVTypes = ["CutShort", "MetalShort"]

  df = pd.read_csv(csvPath, dtype=np.float32)
  df["HasDetailedRoutingViolation"] = False
  for drv in SelectedDRVTypes:
    df["HasDetailedRoutingViolation"] = df[label_name] | df[drv]
  df = df.drop(columns=typesOfDRVs)
  return df

In [None]:
scaler = sklearn.preprocessing.StandardScaler()
saveHeader = True
for i in range(len(circuits)):
  if i % 10 == 0:
    print('reading: ',i,' of ', len(circuits), ' circuits.')
  circuit = circuits[i]

  tempDF = processCSV(circuit+'_1_nonViol.csv')
  if useRandomOversample:
    tempDF = tempDF.sample(frac=0.1, replace=False)
  if len(tempDF) != 0:
    scaler.partial_fit(tempDF.drop(columns=['NodeID', 'HasDetailedRoutingViolation']))
  tempDF.to_csv('data/train.csv', mode='a', index=False, header=saveHeader)
  saveHeader = False

  tempDF = processCSV(circuit+'_1_surround.csv')
  if len(tempDF) != 0:
    scaler.partial_fit(tempDF.drop(columns=['NodeID', 'HasDetailedRoutingViolation']))
  tempDF.to_csv('data/train.csv', mode='a', index=False, header=False)

  tempDF = processCSV(circuit+'_1_viol.csv')
  if len(tempDF) != 0:
    scaler.partial_fit(tempDF.drop(columns=['NodeID', 'HasDetailedRoutingViolation']))
  tempDF.to_csv('data/train.csv', mode='a', index=False, header=False)

pickle.dump(scaler, open('data/scaler.pkl','wb'))

In [None]:
saveHeader = True
for i in range(len(testCircuits)):
  if i % 10 == 0:
    print('reading: ',i,' of ', len(testCircuits), ' circuits.')
  circuit = testCircuits[i]
  processCSV(circuit+'_1_nonViol.csv').to_csv('data/test.csv', mode='a', index=False, header=saveHeader)
  saveHeader = False
  processCSV(circuit+'_1_surround.csv').to_csv('data/test.csv', mode='a', index=False, header=False)
  processCSV(circuit+'_1_viol.csv').to_csv('data/test.csv', mode='a', index=False, header=False)

# Helper Functions

In [None]:
def print_positive_ratio(train_labels):
    neg, pos = np.bincount(train_labels)
    total = neg + pos
    print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(total, pos, 100 * pos / total))

# Claculate weight for classes
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
def calculate_class_weights(train_labels):
    neg, pos = np.bincount(train_labels)
    total = neg + pos
    weight_for_0 = (1 / neg)*(total)/2.0 
    weight_for_1 = (1 / pos)*(total)/2.0
    class_weight = {0: weight_for_0, 1: weight_for_1}
    print('Weight for class 0: {:.2f}'.format(weight_for_0))
    print('Weight for class 1: {:.2f}'.format(weight_for_1))
    return class_weight, neg, pos

def oversample(train_array, train_labels):
    oversample = RandomOverSampler()
    train_array, train_labels = oversample.fit_resample(train_array, train_labels)
    return train_array, train_labels

def undersample(train_array, train_labels):
    undersample = RandomUnderSampler()
    train_array, train_labels = undersample.fit_resample(train_array, train_labels)
    return train_array, train_labels

########## Learning Model ##########
def make_model(evalMetrics, dropOut, learningRate, inputSize, numNodes, numLayers, output_bias=None):
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=inputSize))
    for x in range(numLayers):
        model.add(tf.keras.layers.Dense(numNodes, activation='relu'))
        model.add(tf.keras.layers.Dropout(dropOut))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid', bias_initializer=output_bias))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningRate),
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=evalMetrics)
    return model

########## Test and check Performance ##########
def calculate_test_metrics(model, results):
    m = {}
    for name, value in zip(model.metrics_names, results):
        m[name] = value
    if m['precision'] + m['recall'] != 0:
        f_score = 2 * ((m['precision'] * m['recall'])/(m['precision'] + m['recall']))
        m['F-score'] = f_score
    sqrt = math.sqrt((m['tp']+m['fp'])*(m['tp']+m['fn'])*(m['tn']+m['fp'])*(m['tn']+m['fn']))
    if sqrt != 0:
        mcc = ((m['tp'] * m['tn']) - (m['fp'] * m['fn']))/sqrt
        m['MCC'] = mcc
    return m

# Learning Model Selection

# Traning

In [None]:
df = pd.read_csv('data/test.csv')
# df = pd.read_csv('data/train.csv')
# Remove NodeIDs (debug info)
df = df.drop(columns=["NodeID"])

batch_size = 32 # is important to ensure that each batch has a decent chance of containing a few positive samples
epochs = 10
learningRate = 0.001 #Eh?Predictor=0.05, default=0.001
dropOut = 0.05 #Eh?Predictor=0.05
evalMetrics = [tf.keras.metrics.TruePositives(name='tp'),
               tf.keras.metrics.FalsePositives(name='fp'),
               tf.keras.metrics.TrueNegatives(name='tn'),
               tf.keras.metrics.FalseNegatives(name='fn'),
               tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.Precision(name='precision'),
               tf.keras.metrics.Recall(name='recall'),
               tf.keras.metrics.AUC(name='auc')]













# Split 80/20 (train 80% test 20%)
train_df, val_df = sklearn.model_selection.train_test_split(df, test_size=0.2)

# Build np arrays of labels and features.
train_labels = np.array(train_df.pop("HasDetailedRoutingViolation"))
val_labels = np.array(val_df.pop("HasDetailedRoutingViolation"))

# READ scaler
# Transform

print_positive_ratio(train_labels)

train_array = np.array(train_df)
val_array = np.array(val_df)

# Save some memory
del train_df
del val_df

scaler = pickle.load(open('data/scaler.pkl','rb'))
train_array = scaler.transform(train_array)
val_array = scaler.transform(val_array)

# Create Model


# Train




# Test

In [None]:
# Read CSVs
# Drop node IDS
# Scale