# Imports

In [1]:
import json
import math
import matplotlib
import numpy as np
import pandas as pd
import pickle
import seaborn
import sklearn
import sklearn.model_selection
import sklearn.preprocessing
import tensorflow as tf
import random
import shutil
from keras.callbacks import CSVLogger
import os
from enum import Enum
import imblearn
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
import dask.dataframe as dd

2022-11-18 11:13:33.417530: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-18 11:13:34.004890: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-18 11:13:35.065209: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-11-18 11:13:35.065397: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

# Generate Train and Test CSVs

In [None]:
# Se nao funcionar agora deve ter a ver com o dtype nao ser o floar bla bla bla
# uma solucao de contorno pode ser usar float para salvar e abrir, porem castar as labels para inteiros

In [3]:
circuitsDir = '/home/sheiny/workspace/RoutedDesigns/'
neighborDistance = 1
testSize = 0.2 #20%
useRandomOversample = False
memoryTarget="128MB" #256Mb because it might use a little bit more then this

if os.path.exists('data/'):
  shutil.rmtree('data/')
  os.mkdir('data/')
else:
  os.mkdir('data/')

typesOfDRVs = ["AdjacentCutSpacing", "SameLayerCutSpacing", "EndOfLine", "FloatingPatch", "MinArea", "MinWidth",
  "NonSuficientMetalOverlap", "CutShort", "MetalShort", "OutOfDieShort", "CornerSpacing", "ParallelRunLength"]
SelectedDRVTypes = ["CutShort", "MetalShort"]
label_name = "HasDetailedRoutingViolation"

circuits = []
for circuit in os.listdir(circuitsDir):
  if os.path.isdir(circuitsDir+'/'+circuit) and circuit != 'nangate45':
    for file in os.listdir(circuitsDir+'/'+circuit+'/base/'):
      if '_viol.csv' in file:
        circuits.append(circuitsDir+circuit+'/base/'+file.split('_')[0])

testIdx = set(random.sample(list(range(len(circuits))), int(testSize*len(circuits))))
testCircuits = [circuits[x] for x in testIdx]
circuits = [n for i, n in enumerate(circuits) if i not in testIdx]

with open('data/CSVInfo.txt', 'w') as fp:
  fp.write('Train Circuits:\n')
  for circuit in circuits:
    fp.write(circuit+'\n')
  fp.write('\nTest Circuits:\n')
  for circuit in testCircuits:
    fp.write(circuit+'\n')
  fp.close()

In [4]:
#IF CORE ABOVE FAILS
trainDFs = []
for i in range(len(circuits)):
  if i % 10 == 0:
    print('reading: ',i,' of ', len(circuits), ' circuits.')
  circuit = circuits[i]

  tempDF = dd.read_csv(circuit+'_1_nonViol.csv', dtype=np.float32, blocksize=memoryTarget)
  if useRandomOversample:
    tempDF = tempDF.sample(frac=0.1, replace=False)
  for col in typesOfDRVs:
    tempDF[col] = tempDF[col].astype(bool)
  tempDF[label_name] = tempDF[label_name].astype(bool)
  trainDFs.append(tempDF)

  tempDF = dd.read_csv(circuit+'_1_surround.csv', dtype=np.float32, blocksize=memoryTarget)
  for col in typesOfDRVs:
    tempDF[col] = tempDF[col].astype(bool)
  tempDF[label_name] = tempDF[label_name].astype(bool)
  trainDFs.append(tempDF)

  tempDF = dd.read_csv(circuit+'_1_viol.csv', dtype=np.float32, blocksize=memoryTarget)
  for col in typesOfDRVs:
    tempDF[col] = tempDF[col].astype(bool)
  tempDF[label_name] = tempDF[label_name].astype(bool)
  trainDFs.append(tempDF)

print('Concatenating data frames.')
df = dd.concat(trainDFs, ignore_index=True)
# del trainDFs #save some memory

print('Writing CSVs.')
df.to_csv('data/train.csv', index=False, single_file=True)

reading:  0  of  178  circuits.
reading:  10  of  178  circuits.
reading:  20  of  178  circuits.
reading:  30  of  178  circuits.
reading:  40  of  178  circuits.
reading:  50  of  178  circuits.
reading:  60  of  178  circuits.
reading:  70  of  178  circuits.
reading:  80  of  178  circuits.
reading:  90  of  178  circuits.
reading:  100  of  178  circuits.
reading:  110  of  178  circuits.
reading:  120  of  178  circuits.
reading:  130  of  178  circuits.
reading:  140  of  178  circuits.
reading:  150  of  178  circuits.
reading:  160  of  178  circuits.
reading:  170  of  178  circuits.
Concatenating data frames.
Writing CSVs.


TypeError: NDFrame.to_csv() got an unexpected keyword argument 'dtype'

In [None]:
#Write Scaler
df = dd.read_csv('data/train.csv', blocksize=memoryTarget)
df = df.drop(columns=["NodeID"])

# Make sure to clear all DRV columns
print('Applying DRV filter.')
df[label_name] = False
# Apply filter for selected DRVs
for drv in SelectedDRVTypes:
  df[label_name] = df[label_name] | df[drv]


# Drop all drv collumns because they are no longer necessary
print('Dropping unnecessary DRV collumns.')
df = df.drop(columns=typesOfDRVs)

print('Writing CSVs.')
df.to_csv('data/train2.csv', index=False)

print('Calculating mean and stdv.')
scaler = sklearn.preprocessing.StandardScaler()
train_array = np.array(df)
del df #save some memory
train_array = scaler.fit(train_array)
pickle.dump(scaler, open('data/scaler.pkl','wb'))
del train_array

In [None]:
testDFs = []
for i in range(len(testCircuits)):
  if i % 10 == 0:
    print('reading: ',i,' of ', len(testCircuits), ' circuits.')
  circuit = testCircuits[i]
  testDFs.append(dd.read_csv(circuit+'_1_nonViol.csv', blocksize=memoryTarget))
  testDFs.append(dd.read_csv(circuit+'_1_surround.csv', blocksize=memoryTarget))
  testDFs.append(dd.read_csv(circuit+'_1_viol.csv', blocksize=memoryTarget))
print('Concatenating data frames.')
test_df = dd.concat(testDFs, ignore_index=True)
del testDFs #save some memory

print('Applying DRV filter.')
# Make sure to clear all DRV columns
test_df[label_name] = False
# Apply filter for selected DRVs
for drv in SelectedDRVTypes:
  test_df[label_name] = test_df[label_name] | test_df[drv]

print('Dropping unnecessary DRV collumns.')
# Drop all drv collumns because they are no longer necessary
test_df = test_df.drop(columns=typesOfDRVs)

print('Writing CSVs.')
test_df.to_csv('data/test.csv', index=False, single_file=True)
del test_df

# Helper Functions

In [None]:
def print_positive_ratio(train_labels):
    neg, pos = np.bincount(train_labels)
    total = neg + pos
    print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(total, pos, 100 * pos / total))

# Claculate weight for classes
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
def calculate_class_weights(train_labels):
    neg, pos = np.bincount(train_labels)
    total = neg + pos
    weight_for_0 = (1 / neg)*(total)/2.0 
    weight_for_1 = (1 / pos)*(total)/2.0
    class_weight = {0: weight_for_0, 1: weight_for_1}
    print('Weight for class 0: {:.2f}'.format(weight_for_0))
    print('Weight for class 1: {:.2f}'.format(weight_for_1))
    return class_weight, neg, pos

def oversample(train_array, train_labels):
    oversample = RandomOverSampler()
    train_array, train_labels = oversample.fit_resample(train_array, train_labels)
    return train_array, train_labels

def undersample(train_array, train_labels):
    undersample = RandomUnderSampler()
    train_array, train_labels = undersample.fit_resample(train_array, train_labels)
    return train_array, train_labels

########## Learning Model ##########
def make_model(evalMetrics, dropOut, learningRate, inputSize, numNodes, numLayers, output_bias=None):
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=inputSize))
    for x in range(numLayers):
        model.add(tf.keras.layers.Dense(numNodes, activation='relu'))
        model.add(tf.keras.layers.Dropout(dropOut))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid', bias_initializer=output_bias))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningRate),
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=evalMetrics)
    return model

########## Test and check Performance ##########
def calculate_test_metrics(model, results):
    m = {}
    for name, value in zip(model.metrics_names, results):
        m[name] = value
    if m['precision'] + m['recall'] != 0:
        f_score = 2 * ((m['precision'] * m['recall'])/(m['precision'] + m['recall']))
        m['F-score'] = f_score
    sqrt = math.sqrt((m['tp']+m['fp'])*(m['tp']+m['fn'])*(m['tn']+m['fp'])*(m['tn']+m['fn']))
    if sqrt != 0:
        mcc = ((m['tp'] * m['tn']) - (m['fp'] * m['fn']))/sqrt
        m['MCC'] = mcc
    return m

# Learning Model Selection

# Traning

In [None]:
df = pd.read_csv('data/test.csv')
# df = pd.read_csv('data/train.csv')
# Remove NodeIDs (debug info)
df = df.drop(columns=["NodeID"])

# READ scaler
# Transform





batch_size = 32 # is important to ensure that each batch has a decent chance of containing a few positive samples
epochs = 10
learningRate = 0.001 #Eh?Predictor=0.05, default=0.001
dropOut = 0.05 #Eh?Predictor=0.05
evalMetrics = [tf.keras.metrics.TruePositives(name='tp'),
               tf.keras.metrics.FalsePositives(name='fp'),
               tf.keras.metrics.TrueNegatives(name='tn'),
               tf.keras.metrics.FalseNegatives(name='fn'),
               tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.Precision(name='precision'),
               tf.keras.metrics.Recall(name='recall'),
               tf.keras.metrics.AUC(name='auc')]













# Split 80/20 (train 80% test 20%)
train_df, val_df = sklearn.model_selection.train_test_split(df, test_size=0.2)

# Build np arrays of labels and features.
train_labels = np.array(train_df.pop("HasDetailedRoutingViolation"))
val_labels = np.array(val_df.pop("HasDetailedRoutingViolation"))

print_positive_ratio(train_labels)

train_array = np.array(train_df)
val_array = np.array(val_df)

# Save some memory
del train_df
del val_df

scaler = pickle.load(open('data/scaler.pkl','rb'))
train_array = scaler.transform(train_array)
val_array = scaler.transform(val_array)

# Create Model


# Train




# Test

In [None]:
# Read CSVs
# Drop node IDS
# Scale