In [None]:
########## Imports ##########
import json
import math
import matplotlib
import numpy as np
import pandas as pd
import seaborn
import sklearn
import sklearn.model_selection
import sklearn.preprocessing
import pickle
import tensorflow as tf
import random
import shutil
from keras.callbacks import CSVLogger
import os
from enum import Enum
import imblearn
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

In [None]:
########## Hyper parameters ##########
class BalanceStrategy(Enum):
    NONE = 0
    WEIGHTS = 1
    OVERSAMPLE = 2
    UNDERSAMPLE = 3

########## Data PreProcessig Algorithms ##########
# The features will be rescaled so that they’ll have the properties of a standard normal distribution.
# mean (μ) = 0
# standard deviation (σ) = 1
def standardize(train_array, val_array, test_array=None):
    scaler = sklearn.preprocessing.StandardScaler()
    train_array = scaler.fit_transform(train_array)
    val_array = scaler.transform(val_array)
    if test_array is not None:
        test_array = scaler.transform(test_array)
        return train_array, val_array, test_array
    return train_array, val_array

def print_positive_ratio(train_labels):
    neg, pos = np.bincount(train_labels)
    total = neg + pos
    print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(total, pos, 100 * pos / total))

# Claculate weight for classes
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
def calculate_class_weights(df, train_labels):
    neg, pos = np.bincount(train_labels)
    total = neg + pos
    weight_for_0 = (1 / neg)*(total)/2.0 
    weight_for_1 = (1 / pos)*(total)/2.0
    class_weight = {0: weight_for_0, 1: weight_for_1}
    print('Weight for class 0: {:.2f}'.format(weight_for_0))
    print('Weight for class 1: {:.2f}'.format(weight_for_1))
    return class_weight, neg, pos

def oversample(train_array, train_labels):
    oversample = RandomOverSampler()
    train_array, train_labels = oversample.fit_resample(train_array, train_labels)
    return train_array, train_labels

def undersample(train_array, train_labels):
    undersample = RandomUnderSampler()
    train_array, train_labels = undersample.fit_resample(train_array, train_labels)
    return train_array, train_labels


########## Load Training Data ##########
def loadDataFrames(training_csvs, test_csvs, strategy, allDRVTypes):
#     dataframes = [pd.read_csv(csv) for csv in training_csvs]
#     test_dataframes = [pd.read_csv(test_csv) for test_csv in test_csvs]
    dataframes = [pd.read_csv(csv, dtype=np.float32) for csv in training_csvs]
    test_dataframes = [pd.read_csv(test_csv, dtype=np.float32) for test_csv in test_csvs]
        
    #merge all DataFrames into a single one
    df = pd.concat(dataframes, ignore_index=True)
    test_df = pd.concat(test_dataframes, ignore_index=True)
    del dataframes #save some memory
    del test_dataframes #save some memory

    # Remove NodeIDs (debug info)
    df = df.drop(columns=["NodeID"])
    test_df_NodeID = test_df["NodeID"] #backup this for DRV draw
    test_df = test_df.drop(columns=["NodeID"])

    # Make sure to clear all DRV columns
    df[label_name] = False
    test_df[label_name] = False
    # Apply filter for selected DRVs
    for drv in SelectedDRVTypes:
        df[label_name] = df[label_name] | df[drv]
        test_df[label_name] = test_df[label_name] | test_df[drv]

    # Drop all drv collumns because they are no longer necessary
    df = df.drop(columns=allDRVTypes)
    test_df = test_df.drop(columns=allDRVTypes)

    # Split 80/20 (train 80% test 20%)
    train_df, val_df = sklearn.model_selection.train_test_split(df, test_size=0.2)

    # Build np arrays of labels and features.
    train_labels = np.array(train_df.pop(label_name))
    val_labels = np.array(val_df.pop(label_name))
    test_labels = np.array(test_df.pop(label_name))
    train_array = np.array(train_df)
    val_array = np.array(val_df)
    test_array = np.array(test_df)

    # Save some memory
    del train_df
    del test_df
    del val_df

    # Apply the selected strategy to handle umbalanced data.
    weight = None
    if strategy == BalanceStrategy.OVERSAMPLE:
        train_array, train_labels = oversample(train_array, train_labels)
    elif strategy == BalanceStrategy.UNDERSAMPLE:
        train_array, train_labels = undersample(train_array, train_labels)
    elif strategy == BalanceStrategy.WEIGHTS:
        weight = calculate_class_weights(df, train_labels)
        weight = weight[0]

    print_positive_ratio(train_labels)

    del df # Save some memory

    # Scale
    train_array, val_array, test_array = standardize(train_array, val_array, test_array)
    return train_array, val_array, test_array, train_labels, val_labels, test_labels, weight


########## Learning Model ##########
def make_model(evalMetrics, dropOut, learningRate, inputSize, numNodes, numLayers, output_bias=None):
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=inputSize))
    for x in range(numLayers):
        model.add(tf.keras.layers.Dense(numNodes, activation='relu'))
        model.add(tf.keras.layers.Dropout(dropOut))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid', bias_initializer=output_bias))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningRate),
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=evalMetrics)
    return model


########## Test and check Performance ##########
def calculate_test_metrics(model, results):
    m = {}
    for name, value in zip(model.metrics_names, results):
        m[name] = value
    if m['precision'] + m['recall'] != 0:
        f_score = 2 * ((m['precision'] * m['recall'])/(m['precision'] + m['recall']))
        m['F-score'] = f_score
    sqrt = math.sqrt((m['tp']+m['fp'])*(m['tp']+m['fn'])*(m['tn']+m['fp'])*(m['tn']+m['fn']))
    if sqrt != 0:
        mcc = ((m['tp'] * m['tn']) - (m['fp'] * m['fn']))/sqrt
        m['MCC'] = mcc
    return m

In [None]:
########## Hyper Parameters ##########
typesOfDRVs = ["AdjacentCutSpacing", "SameLayerCutSpacing", "EndOfLine", "FloatingPatch", "MinArea", "MinWidth",
  "NonSuficientMetalOverlap", "CutShort", "MetalShort", "OutOfDieShort", "CornerSpacing", "ParallelRunLength"]
SelectedDRVTypes = ["CutShort", "MetalShort"]
label_name = "HasDetailedRoutingViolation"
designs_path = '/home/sheiny/workspace/RoutedDesigns/'
neighborhoodSize = str(0)
circuits = [x for x in os.listdir(designs_path) if '.' not in x and 'nangate45' not in x]

test_csv_list = []
for circuit in circuits:
    files = os.listdir(designs_path+circuit+'/base/')
    csvs = []
    for file in files:
        if neighborhoodSize+'.csv' not in file:
            continue
        csvs.append(file)
    csvs.sort()
    n = len(csvs)
    middle = int(n/2)-1 if (n % 2 == 0 ) else int(n/2)
    test_csv_list.append(designs_path+circuit+'/base/'+csvs[middle])

csv_list = []
for circuit in circuits:
    files = os.listdir(designs_path+circuit+'/base/')
    for file in files:
        if neighborhoodSize+'.csv' not in file:
            continue
        csvFile = designs_path+circuit+'/base/'+file
        if csvFile not in test_csv_list:
            csv_list.append(designs_path+circuit+'/base/'+file)


batch_size = 32 # is important to ensure that each batch has a decent chance of containing a few positive samples
epochs = 10
learningRate = 0.001 #Eh?Predictor=0.05, default=0.001
dropOut = 0.05 #Eh?Predictor=0.05
evalMetrics = [tf.keras.metrics.TruePositives(name='tp'),
               tf.keras.metrics.FalsePositives(name='fp'),
               tf.keras.metrics.TrueNegatives(name='tn'),
               tf.keras.metrics.FalseNegatives(name='fn'),
               tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.Precision(name='precision'),
               tf.keras.metrics.Recall(name='recall'),
               tf.keras.metrics.AUC(name='auc')]



for strategy in [BalanceStrategy.NONE]:
    strat = str(strategy).split('.')[1]
    for numNodes in [50]:
        #for numLayers in range(1,5):
        for numLayers in [1]:
            model_name = strat+'_'+str(numLayers)+'_'+str(numNodes)+'_Epochs'+str(epochs)
            print(model_name)
        
            ########## Load Data ##########
            train_array, val_array, test_array, train_labels, val_labels, test_labels, weight = loadDataFrames(csv_list, test_csv_list, strategy, typesOfDRVs)


            ########## Training ##########
            if os.path.exists(model_name):
                shutil.rmtree(model_name)
            else:
                os.mkdir(model_name)
            checkpoint_path = model_name+"/cp.ckpt"

            # Create a callback that saves the model's weights at the end of each epoch
            cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True)
            # Create a callback that saves model history at the end of each epoch
            csv_logger = CSVLogger(model_name+"/model_history_log.csv", append=True)

            #initialize learning model
            neg, pos = np.bincount(train_labels)
            initial_bias = np.log([pos/neg])

            inputSize = len(train_array[0])
            model = make_model(evalMetrics, dropOut, learningRate, inputSize, numNodes, numLayers, initial_bias)

            dataset = tf.data.Dataset.from_tensor_slices((train_array, train_labels))
            train_dataset = dataset.shuffle(len(train_array)).batch(batch_size)

            train_history = model.fit(train_dataset,
                                      batch_size=batch_size,
                                      validation_data=(val_array, val_labels),
                                      class_weight=weight, #default class_weight = None
                                      epochs=epochs,
                                      callbacks=[cp_callback, csv_logger])


            ########## Test ##########
            baseline_results = model.evaluate(test_array,
                                              test_labels,
                                              batch_size=batch_size,
                                              verbose=0)
            test_metrics = calculate_test_metrics(model, baseline_results)
            print(test_metrics)
            with open(model_name+'/test_metrics.pkl', 'wb') as f:
                pickle.dump(test_metrics, f)

print('The END')