In [1]:
import numpy as np
import pickle

In [2]:
with open('epfl_arithmetic.pkl', 'rb') as f:
    data = pickle.load(f)

In [3]:
def prepare_dataset(data):
    labels = []
    features = []
    sequences = []
    for _, d in data.items():
        feature = {}
        label = {}
        label['Path_Delay'] = d['Path_Delay']
        label['Slice_LUTs'] = d['Slice_LUTs']
        for f in ['CI', 'CO', 'level', 'level_avg',
                  'cut', 'xor', 'xor_ratio', 'mux', 'mux_ratio', 'and', 'and_ratio',
                  'obj', 'power', 'slack', 'LUT', 'fanin', 'fanout', 'mffc', 
                  #'fanin_max', 'fanin_avg', 
                  'fanout_max', 'fanout_avg', 'mffc_max', 'mffc_avg']:
            feature[f] = d[f]
        labels.append(label)
        features.append(feature)
        sequences.append(d['Sequence'])
    return features, labels, sequences

In [4]:
def preprocess_faninout(data):
    for d in data:
        fi = d.pop('fanin')
        fo = d.pop('fanout')
        d['fanin_all'] = fi['2'] # assume all fanin's are 2
        
        fo_threshold = 10
        fo_all, fo_large = 0, 0
        
        for k, v in fo.items():
            if k not in [str(x) for x in list(range(fo_threshold))]:
                fo_large += v
            fo_all += v
        
        for i in range(fo_threshold):
            d['fanout_ratio_%d' % i] = fo['%d' % i] / fo_all
        d['fanout_ratio_large'] = fo_large / fo_all
        d['fanout_all'] = fo_all
        
def preprocess_slack(data):
    for d in data:
        slack = d.pop('slack')
        slack_threshold = 21 # x10
        slack_all = slack['total_nodes']
        
        keys_skip = ['total_nodes']
        for i in range(slack_threshold):
            lower = 10 * i
            upper = lower + 10
            key = f"{lower}_{upper}"
            name = f"slack_ratio_{key}"
            keys_skip.append(key)
            if key not in slack:
                d[name] = 0.
            else:
                d[name] = slack[key] / slack_all
        
        slack_large = 0
        for k, v in slack.items():
            if k not in keys_skip:
                slack_large += v
        d['slack_ratio_large'] = slack_large / slack_all
        d['slack_all'] = slack_all
                
def preprocess_mffc(data):
    for d in data:
        mffc = d.pop('mffc')
        mffc_threshold = 9
        mffc_all, mffc_large = 0, 0
        
        for k, v in mffc.items():
            if k not in [str(x) for x in list(range(mffc_threshold))]:
                mffc_large += v
            mffc_all += v
        
        for i in range(mffc_threshold):
            d['mffc_ratio_%d' % i] = 0. if mffc_all == 0 else mffc['%d' % i] / mffc_all
        d['mffc_ratio_large'] = 0. if mffc_all == 0 else mffc_large / mffc_all
        d['mffc_all'] = mffc_all
        
def preprocess_LUT(data):
    for d in data:
        lut = d.pop('LUT')
        for i in [2, 3, 4, 5, 6]:
            key = f"{i}_LUT_ratio"
            lut_ratio = lut[key] if key in lut else 0.
            d[f"LUT_ratio_{i}"] = lut_ratio / 100 # convert percentage
        d["LUT_level"] = lut["level"]
        d["LUT_level_avg"] = lut["level_avg"]
        d["LUT_size_avg"] = lut["size_avg"]
        d["LUT_total"] = lut["total"]

In [5]:
seq_to_token = {
    '&if -W 300 -K 6 -v': 0, 
    '&st': 1, 
    '&synch2': 2, 
    '&dc2': 3, 
    '&if -W 300 -y -K 6': 4, 
    '&syn2': 5, 
    '&sweep': 6, 
    '&mfs': 7, 
    '&scorr': 8, 
    '&if -W 300 -g -K 6': 9, 
    '&b -d': 10, 
    '&if -W 300 -x -K 6': 11, 
    '&dch': 12, 
    '&b': 13, 
    '&syn4': 14, 
    '&dch -f': 15, 
    '&syn3': 16
}

token_to_seq = {}
for x, y in seq_to_token.items():
    token_to_seq[y] = x

def preprocess_sequence(sequences):
    # convert the string representation into a list of tokens
    seq_list = []
    for seq in sequences:
        seq = seq.split(';')[2: -3] # remove the redundant parts
        sl = []
        for s in seq:
            if s.startswith('&'):
                sl.append(seq_to_token[s])
        seq_list.append(sl)
    return seq_list

In [6]:
def flatten_all(data):
    flattened_data = []
    for d in data:
        fd = list(d.values())
        flattened_data.append(fd)
    return np.array(flattened_data)

def normalize(data):
    eps = 1e-5
    data_t = np.transpose(data)
    for i in range(len(data_t)):
        mean = np.mean(data_t[i])
        std = np.std(data_t[i])
        data_t[i] = (data_t[i] - mean) / (std + eps)
    return np.transpose(data_t)

In [7]:
features, labels, sequences = prepare_dataset(data)
preprocess_mffc(features)
preprocess_faninout(features)
preprocess_slack(features)
preprocess_LUT(features)

In [8]:
features_flatted = flatten_all(features)
features_normalized = normalize(features_flatted)

In [9]:
sequences_list = preprocess_sequence(sequences)

In [10]:
print(sequences_list)

[[16, 12, 0, 1], [0, 7, 1, 3, 9], [2, 15, 4], [10, 12, 10], [15, 4], [0, 1, 14], [0, 1, 5, 3], [4, 16, 11], [4, 14, 14], [14, 12, 10], [3, 4, 13], [5, 0, 7, 1, 9], [14, 5, 15], [9, 5, 4], [10, 3], [4, 13, 14], [13, 14, 14], [13, 2, 14], [9, 4, 12], [4, 11], [12, 12], [15, 10, 5], [11, 14, 11], [15, 2, 3], [12, 9, 9], [0, 1, 0, 7, 1, 2], [12, 16, 14], [12, 0, 7, 1, 0, 7, 1], [13, 9, 16], [15, 10, 5], [11, 10, 9], [0, 7, 1, 9, 10], [10, 10, 0, 7, 1], [4, 13, 5], [9, 2, 16], [9, 2, 3], [9, 0, 1, 4], [4, 9, 0, 7, 1], [3, 0, 7, 1, 5], [4, 16, 4], [5, 3, 11], [10, 9, 14], [16, 9, 14], [5, 5, 10], [2, 16, 11], [12, 13, 3], [5, 11, 2], [14, 2, 0, 1], [10, 5], [5, 3, 16], [9, 4, 3], [14, 16, 9], [5, 16, 12], [12, 0, 7, 1, 2], [9, 4], [0, 7, 1, 0, 7, 1, 5], [4, 0, 7, 1, 12], [5, 0, 1, 11], [3, 9, 9], [3, 11], [4, 14, 2], [14, 0, 1, 14], [4, 16, 15], [4, 5, 10], [9, 14, 0, 7, 1], [11, 0, 1, 12], [14, 0, 7, 1, 2], [5, 0, 7, 1, 4], [0, 1, 10, 0, 1], [5, 13, 10], [9, 5, 12], [5, 10, 14], [14, 4, 0, 

In [12]:
print(features_normalized[0])

[-0.19999673  0.67649886  1.78264676  2.48436868 -0.34359386  0.46091353
  0.68660272 -0.77326058 -0.59965381  0.23384121  0.06179993  0.24288077
  0.30109648 -0.34518441  0.03769996 -0.78669432  0.          0.
 -0.13611833 -0.3051325  -0.97348628  1.93114758 -0.5305673  -0.830435
  0.75229103  1.54030808 -0.19491412  0.38717519  0.24288077 -0.17290607
 -0.60877532  0.41377227 -0.94090941  0.56529387 -0.39385522  2.34213298
 -0.54126379  0.16594892 -0.4765915   0.          0.25312888 -0.56564741
 -0.20824581  0.04854573  0.32150767  0.31344758  0.62587999  0.43083035
  0.60588747  0.91934729  0.8644727   0.68436988  0.75135341  0.58655978
  0.97235558  0.53847484  1.01762044  0.93981323  0.9626633   0.51469304
  0.73230045  1.03424211 -0.32582945  0.37521716  1.91060817 -0.91369989
  0.87790107 -0.83553891 -0.30479476  1.77319677  2.50645232 -0.91868749
  0.37521716]
