# Imports

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
import sklearn.model_selection
import sklearn.preprocessing
import math
import seaborn
import matplotlib

# Hyperparameters

In [None]:
# Fix random seed.
tf.random.set_seed(1234)
np.random.seed(1234) # Scikit Learn does not have its own global random state but uses the numpy random state instead.
batch_size = 256 # is important to ensure that each batch has a decent chance of containing a few positive samples
epochs = 10
learning_rate = 0.005
beta = 0.001 #regularization
drop_out = 0.05

matplotlib.rcParams['figure.figsize'] = (12, 10)
colors = matplotlib.pyplot.rcParams['axes.prop_cycle'].by_key()['color']

# Load Data

In [None]:
# ATTENTION: If you want to deploy a model, it's critical that you preserve the preprocessing calculations.
# The easiest way to implement them as layers, and attach them to your model before export.

# Naming convention: Circuit Name + ABU + DR iteration + csv
# Note: ABU zero means that the placement wasn't changed from contest benchmark.
# csvs = ["ispd19_test6.0.0.csv", "ispd19_test6.0.95.0.csv", "ispd19_test6.1.0.csv", "ispd19_test6.0.1.csv", "ispd19_test6.0.95.1.csv", "ispd19_test6.1.1.csv"]
csvs = ["ispd19_test6.0.1.csv", "ispd19_test6.0.95.1.csv", "ispd19_test6.1.1.csv"]
path = "data/ICCAD2019/"

dataframes = [pd.read_csv(path+file_name) for file_name in csvs]
df = pd.concat(dataframes, ignore_index=True)

# Use a utility from sklearn to split and shuffle our dataset.
train_df, val_df = sklearn.model_selection.train_test_split(df, test_size=0.2)

# Form np arrays of labels and features.
train_labels = np.array(train_df.pop('HasDetailedRoutingViolation'))
val_labels = np.array(val_df.pop('HasDetailedRoutingViolation'))

train_features = np.array(train_df)
val_features = np.array(val_df)

# Scale
scaler = sklearn.preprocessing.StandardScaler()
train_features = scaler.fit_transform(train_features)
val_features = scaler.transform(val_features)
train_features = np.clip(train_features, -5, 5)
val_features = np.clip(val_features, -5, 5)

# Claculate weight for classes
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
neg, pos = np.bincount(df['HasDetailedRoutingViolation'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(total, pos, 100 * pos / total))
weight_for_0 = (1 / neg)*(total)/2.0
weight_for_1 = (1 / pos)*(total)/2.0
class_weight = {0: weight_for_0, 1: weight_for_1}
print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

# Load EhPredictor's dataset

In [None]:
df = pd.read_csv("data/ISDP14/EhPredictorISPD14.csv")

# drop l53 because is always zero
df.pop('l53')
df.pop('normal')

# Instead of having the number of shorts, use them as a boolean
df.loc[df['short'] > 0, 'short'] = 1

# Convert to log-space. l9 l43 l45 l52 l51
log_cols = ['l9', 'l43', 'l45', 'l52', 'l51']
eps=0.001 # 0 => 0.1¢
for col in log_cols:
    df[col] = np.log(df[col] + eps)

# CSV organization:
# des_perf_1_dataset=all_dataset[0:5476,:]
# des_perf_a_dataset=all_dataset[5476:16928,:]
# des_perf_b_dataset=all_dataset[16928:26928,:]
# fft_1_dataset=all_dataset[26928:28864,:]
# fft_2_dataset=all_dataset[28864:32113,:]
# fft_a_dataset=all_dataset[32113:38604,:]
# fft_b_dataset=all_dataset[38604:44375,:]
# matrix_mult_1_dataset=all_dataset[44375:52656,:]
# matrix_mult_a_dataset=all_dataset[52656:69168,:]
# matrix_mult_b_dataset=all_dataset[69168:90601,:]
# pci_bridge32_a_dataset=all_dataset[90601:94170,:]
# pci_bridge32_b_dataset=all_dataset[94170:103961,:]
# superblue11_a_dataset=all_dataset[103961:175113,:]
# superblue12_dataset=all_dataset[175113:241123,:]

# Test circuits: mgc fft_2
test_df = df.iloc[28864:32113]
df2 = df[0:28864]
df3 = df[32113:]
df = pd.concat([df2, df3])

# Use a utility from sklearn to split and shuffle our dataset.
train_df, val_df = sklearn.model_selection.train_test_split(df, test_size=0.2)

# Form np arrays of labels and features.
train_labels = np.array(train_df.pop('short'))
val_labels = np.array(val_df.pop('short'))
test_labels = np.array(test_df.pop('short'))

train_features = np.array(train_df)
val_features = np.array(val_df)
test_features = np.array(test_df)

# Scale
scaler = sklearn.preprocessing.StandardScaler()
train_features = scaler.fit_transform(train_features)
val_features = scaler.transform(val_features)
test_features = scaler.transform(test_features)
train_features = np.clip(train_features, -5, 5)
val_features = np.clip(val_features, -5, 5)
test_features = np.clip(test_features, -5, 5)

# Claculate weight for classes
# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
neg, pos = np.bincount(df['short'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(total, pos, 100 * pos / total))
weight_for_0 = (1 / neg)*(total)/2.0 
weight_for_1 = (1 / pos)*(total)/2.0
class_weight = {0: weight_for_0, 1: weight_for_1}
print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

# Learning Model and Metrics

In [None]:
METRICS = [tf.keras.metrics.TruePositives(name='tp'),
           tf.keras.metrics.FalsePositives(name='fp'),
           tf.keras.metrics.TrueNegatives(name='tn'),
           tf.keras.metrics.FalseNegatives(name='fn'), 
           tf.keras.metrics.BinaryAccuracy(name='accuracy'),
           tf.keras.metrics.Precision(name='precision'),
           tf.keras.metrics.Recall(name='recall'),
           tf.keras.metrics.AUC(name='auc')]

def make_model(metrics = METRICS, output_bias=None, lr=learning_rate):
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    model = tf.keras.Sequential([tf.keras.layers.Dense(20,
                                                       activation='relu', # Relu throw away negative values
                                                       kernel_regularizer=tf.keras.regularizers.l2(beta)),
                                 tf.keras.layers.Dropout(drop_out),
                                 tf.keras.layers.Dense(1, activation='sigmoid', bias_initializer=output_bias)])
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr),
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=metrics)
    return model

dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
train_dataset = dataset.shuffle(len(train_features)).batch(batch_size)

# Load Model

In [None]:
# initial_bias = np.log([pos/neg])
# model = make_model(output_bias = initial_bias)
# model.load_weights('model_weights')

# Train The Model

In [None]:
initial_bias = np.log([pos/neg])

model = make_model(output_bias = initial_bias)
train_history = model.fit(train_dataset,
                          batch_size=batch_size,
                          validation_data=(val_features, val_labels),
                          class_weight=class_weight,
                          epochs=epochs)

# Save Model

In [None]:
# model.save_weights('model_weights')

# Confusion Matrix

In [None]:
def plot_cm(labels, predictions, p=0.5):
    cm = sklearn.metrics.confusion_matrix(labels, predictions > p)
    matplotlib.pyplot.figure(figsize=(5,5))
    seaborn.heatmap(cm, annot=True, fmt="d")
    matplotlib.pyplot.title('Confusion matrix @{:.2f}'.format(p))
    matplotlib.pyplot.ylabel('Actual label')
    matplotlib.pyplot.xlabel('Predicted label')
    matplotlib.pyplot.show()
    
def calculate_metrics(model, results):
    m = {}
    for name, value in zip(model.metrics_names, results):
        m[name] = value
    if(m['precision'] + m['recall'] != 0):
        f_score = (2 * m['precision'] * m['recall'])/(m['precision'] + m['recall'])
        m['F-score'] = f_score
    sqrt = math.sqrt((m['tp']+m['fp'])*(m['tp']+m['fn'])*(m['tn']+m['fp'])*(m['tn']+m['fn']))
    if sqrt != 0:
        mcc = (m['tp'] * m['tn'] - m['fp'] * m['fn'])/sqrt
        m['MCC'] = mcc
    return m

def print_metrics(metrics):
    for x, y in metrics.items():
        print(x,':', round(y, 2))

# Training performace

In [None]:
baseline_results = model.evaluate(train_features, train_labels, batch_size=batch_size, verbose=0)
metrics = calculate_metrics(model, baseline_results)
print_metrics(metrics)
train_predictions_baseline = model.predict(train_features, batch_size=batch_size)
plot_cm(train_labels, train_predictions_baseline)

# Test performace on ISPD14-mgc_fft_2

In [None]:
baseline_results = model.evaluate(test_features, test_labels, batch_size=batch_size, verbose=0)
metrics = calculate_metrics(model, baseline_results)
print_metrics(metrics)
test_predictions_baseline = model.predict(test_features, batch_size=batch_size)
plot_cm(test_labels, test_predictions_baseline)