# This code implements the training pipeline for various models

In [1]:
import torch
import torch.nn as nn
from torch.optim import Adam
from dataformatter import *
from models import *
import matplotlib.pyplot as plt
import os
import pdb
import random
from collections import defaultdict

In [2]:
def set_random_seed(seed):
	# Esp important for ensuring deterministic behavior with CNNs
	torch.backends.cudnn.deterministic = True
	np.random.seed(seed)
	random.seed(seed)
	torch.manual_seed(seed)
	cuda_available = torch.cuda.is_available()
	if cuda_available:
		torch.cuda.manual_seed_all(seed)
	return cuda_available

In [3]:
def run_epoch(model, optimizer, data_iterator, mode='train', eval_frac=-1):
    model.train()
    if mode != 'train':
        model.eval()
    stats = []
    all_outs = []
    max_tr_batch = -1
    num_egs, batch_idx, n_ones = 0, 1, 0
    for batch in data_iterator:
        # we get the loss from passing the batch to the model
        # each model will have it's own way of deadling with the data [we can jointly figure this out]
        if eval_frac > 0:
            batch, num_batches = batch
            max_tr_batch = int(eval_frac * num_batches)
            n_ones = (np.array(batch)[:, -1]).sum()
        loss, acc, bsz, outs = model(np.array(batch))
        all_outs.append(outs)
        stats.append([loss.item(), acc.item(), bsz, n_ones, len(batch)])
        if mode == 'train' and ((batch_idx < max_tr_batch) or (max_tr_batch < 0)):
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), MAX_GRAD_NORM)
            optimizer.step()
        batch_idx += 1
    stats = np.array(stats)
    avg_loss = np.mean(stats[:, 0])
    avg_acc = (stats[:, 1]).sum() / (stats[:, 2].sum() * 1.0)
    return (avg_loss, avg_acc), stats[max_tr_batch:, :], all_outs

In [4]:
def trainer(model, optimizer, dataset, num_epochs=20, desc='Description of model', eval_frac=0.8, shuffle=True):
    # Todo [all]
    # Figure out how to split the data into a train-val-test regime
    stats = []
    for epoch_ in range(num_epochs):
        # get a data iterator for this epoch
        data_iter = get_batch_iterator(dataset, BATCH_SZ, shuffle=shuffle, batch_info=True)
        epoch_stats, e_stats = run_epoch(model, optimizer, data_iter, mode='train', eval_frac=eval_frac)
        stats.append(epoch_stats)
        print('Epoch {} : Avrg Loss = {}, Avrg Acc = {} '.format(epoch_, stats[-1][0], stats[-1][1]))
        major_acc = (e_stats[:, 3].sum()) / (1.0 * e_stats[:, 4].sum())
        major_acc = max(major_acc, 1.0 - major_acc)
        print('Epoch {} : Eval Loss = {}, Eval Acc = {}, Eval Majority Acc = {}'.format(epoch_, (e_stats[:, 0]).mean(), (e_stats[:, 1].sum())/(1.0 * e_stats[:, 2].sum()), major_acc))
        print('-'*50)

    stats = np.array(stats)
#     graph_results(stats, desc)
    return model

def set_wise_trainer(model, optimizer, setwise_dataset, num_epochs=20, desc='Set-Wise Model', shuffle=False):
    model.train()
    stats = defaultdict(list)
    for epoch_ in range(num_epochs):
        # get a data iterator for this epoch
        accs = []
        setwise_keys = list(setwise_dataset.keys())
        perm = np.random.permutation(len(setwise_keys))
        setwise_keys = np.array(setwise_keys)[perm]
        for set_id  in setwise_keys:
            dataset = setwise_dataset[set_id]
            model.remap_embedders(dataset, set_id)
            data_iter = get_batch_iterator(dataset, BATCH_SZ, shuffle=shuffle)
            this_stats, _, all_outs = run_epoch(model, optimizer, data_iter, mode='train')
            all_preds = torch.cat([x[0] for x in all_outs])
            all_ys = torch.cat([x[1] for x in all_outs])
#             print(set_id, (all_preds.argmax(dim=-1).eq(all_ys)), all_preds.argmax(dim=-1).float().mean(), all_ys.float().mean())
            stats[set_id].append(this_stats)
            accs.append(this_stats[-1])
        acc_stats = np.min(accs), np.mean(accs), np.median(accs), np.max(accs)
        print('Min Acc {} | Mean Acc : {} | Median Acc {} | Max Acc {} '.format(*acc_stats))
    return model, stats, all_outs

def evaluate(model, dataset, epoch_=-1, print_res=True, shuffle=False):
    # get a data iterator for this epoch
    model.eval()
    data_iter = get_batch_iterator(dataset, BATCH_SZ, shuffle=shuffle, batch_info=False)
    epoch_stats, _, all_outs = run_epoch(model, None, data_iter, mode='val')
    if print_res:
        print('Epoch {} : Avg Loss = {} Avg Acc = {}'.format(epoch_, epoch_stats[0], epoch_stats[1]))
    return epoch_stats, all_outs

In [5]:
def eval_setwise(eval_setwise_dataset, model, MAX_TR_KEY=0, desc='graph', epoch=-1):
    all_stats = []
    for id_, (set_id, this_dataset) in enumerate(eval_setwise_dataset.items()):
        set_id = MAX_TR_KEY + int(set_id)
        model.remap_embedders(this_dataset, set_id)
        result, all_outs = evaluate(model, this_dataset, print_res=False)
        average_pred = np.mean(this_dataset[:, -1])
        all_stats.append([*result, 1.0 - average_pred, average_pred])
    av_res = np.mean(all_stats, axis=0)
    print('[{}] Epoch[{}] : Loss {}, Acc {}, Major [0] Acc {}, Marjor [1] Acc {}'.format(desc, epoch, *av_res))
    all_preds = torch.cat([x[0] for x in all_outs])
    all_ys = torch.cat([x[1] for x in all_outs])
#     print((all_preds.argmax(dim=-1).eq(all_ys).float()).mean().item(), all_preds.argmax(dim=-1).float().mean().item(), all_ys.float().mean().item())

In [6]:
def model_main(join=False):
    if not os.path.exists(SAVE_FLDR):
        os.makedirs(SAVE_FLDR)

    set_random_seed(RANDOM_SEED)
    print('Creating Model of type : {}, Batchsz = {}, Learning Rate = {}'.format(MODEL_TYPE, BATCH_SZ, LR))
    model = get_model(MODEL_TYPE)
    chosen_columns = model.get_data_columns()
    chosen_columns.append('Set')
    train_dataset = csv_to_data(DATA_PATH, chosen_columns)
    average_pred = np.mean(train_dataset[:, -2])
    if torch.cuda.is_available():
        model.cuda()
    model.use_cuda = torch.cuda.is_available()
    eval_dataset = csv_to_data(EVAL_DATA_PATH, chosen_columns)
    print(model.feat_idx_map, torch.cuda.is_available())
    print('This is the average accuracy : ', 1.0 - average_pred, ' From predicting all zeros')
    
    if not SET_WISE:
        optimizer = Adam(model.parameters(), lr=LR)
        model.prep_for_data(train_dataset, temp_order=True)
        model = trainer(model, optimizer, train_dataset, num_epochs=N_EPOCHS, desc=MODEL_DESC, shuffle=False)
    else:
        train_setwise_dataset = group_by_set(train_dataset, set_idx=3)
        eval_setwise_dataset = group_by_set(eval_dataset, set_idx=3)
        print('Total keys before : ', len(train_setwise_dataset.keys()))
        if join:
            MAX_TR_KEY = max([int(x) for x in train_setwise_dataset.keys()]) + 1
            new_eval_set = {}
            for k, v in eval_setwise_dataset.items():
                new_id = MAX_TR_KEY + int(k)
                train_setwise_dataset[new_id] = v
                new_eval_set[new_id] = v
            print('Total keys After : ', len(train_setwise_dataset.keys()))
            eval_setwise_dataset = new_eval_set
            for k, v in train_setwise_dataset.items():
                if v.shape[0] < 2:
                    print(k)
                    
        all_tr_keys = list(train_setwise_dataset.keys())
        val_keys = np.random.choice(all_tr_keys, size=int(0.2 * len(all_tr_keys)))
        tr_keys = set(all_tr_keys) - set(val_keys)

        vals = [len(x) for x in list(train_setwise_dataset.values())]
        max_key = all_tr_keys[np.argmax(vals)]
        # Logging-pre-training performance
        model.prep_for_data(train_setwise_dataset[max_key], temp_order=True)
        for set_id, this_dataset in train_setwise_dataset.items():
            model.remap_embedders(this_dataset, set_id)

        tr_val_setwise_dataset = {k: train_setwise_dataset[k] for k in val_keys}
        train_setwise_dataset = {k: train_setwise_dataset[k] for k in tr_keys}

        print('Logging Pre-Training Performance')
        MAX_TR_KEY = max([int(x) for x in train_setwise_dataset.keys()]) + 1
        if not join:
            eval_setwise(eval_setwise_dataset, model, MAX_TR_KEY=MAX_TR_KEY, desc=EVAL_DESC)
        eval_setwise(tr_val_setwise_dataset, model, desc=TR_DESC)
        optimizer = Adam(model.parameters(), lr=LR) # Now we can add all the model parameters to the optimizer
        for i in range(N_EPOCHS):
            model, stats, _ = set_wise_trainer(model, optimizer, train_setwise_dataset, num_epochs=1)
            if not join:
                eval_setwise(eval_setwise_dataset, model, MAX_TR_KEY=MAX_TR_KEY, desc=EVAL_DESC, epoch=i)
            eval_setwise(tr_val_setwise_dataset, model, desc=TR_DESC, epoch=i)
            torch.save(model.state_dict(), '{}/{}_saved_model.pth'.format(SAVE_FLDR, MODEL_DESC))
    # Need to return the eval performance here

In [7]:
DATA_PATH = 'hawkeye_trace_belady_bzip.csv' # This is the CSV FILE WE ARE TRYING TO ANALYZE
TR_DESC = 'JOINT_DATA_BZIP'
EVAL_DATA_PATH = 'lru_trace_belady_bzip.csv' # This is the CSV FILE WE ARE TRYING TO ANALYZE
EVAL_DESC = 'JOINT_DATA_BZIP'
JOIN = True

# DATA_PATH = 'lru_trace_belady_xalancbmk.csv' # This is the CSV FILE WE ARE TRYING TO ANALYZE
# TR_DESC = 'LRU_XALANCBMK'
# EVAL_DATA_PATH = 'lru_trace_belady_xalancbmk.csv' # This is the CSV FILE WE ARE TRYING TO ANALYZE
# EVAL_DESC = 'LRU_GRAPH'

SAVE_FLDR = 'results'
N_EPOCHS = 4
MAX_GRAD_NORM = 0.1
SET_WISE = True
RANDOM_SEED = 140982301

In [8]:
batch_szs = [32]
lrs = [1e-4] #, 3e-4]
model_types = ['TRANSFORMER'] #, "TRANSFORMER_1"]
SAVE_FLDR = "pytorch_c++"


for BATCH_SZ in batch_szs:
    for LR in lrs:
        for MODEL_TYPE in model_types:
            MODEL_DESC = "{}-{}_BSZ.{}_LR.{}".format(TR_DESC, MODEL_TYPE, BATCH_SZ, LR)
            model_main(join=JOIN)

Creating Model of type : TRANSFORMER, Batchsz = 32, Learning Rate = 0.0001
{'Program Counter': 0, 'Set Occupancy': 1, 'Belady Friendly': 2} True
This is the average accuracy :  0.7336359237840702  From predicting all zeros
Total keys before :  2048
Total keys After :  4096
0.0 (53, 4)
1.0 (57, 4)
2.0 (39, 4)
3.0 (47, 4)
4.0 (48, 4)
5.0 (48, 4)
6.0 (45, 4)
7.0 (45, 4)
8.0 (52, 4)
9.0 (50, 4)
10.0 (64, 4)
11.0 (45, 4)
12.0 (46, 4)
13.0 (42, 4)
14.0 (61, 4)
15.0 (47, 4)
16.0 (54, 4)
17.0 (34, 4)
18.0 (57, 4)
19.0 (52, 4)
20.0 (47, 4)
21.0 (43, 4)
22.0 (45, 4)
23.0 (64, 4)
24.0 (62, 4)
25.0 (65, 4)
26.0 (52, 4)
27.0 (44, 4)
28.0 (60, 4)
29.0 (39, 4)
30.0 (43, 4)
31.0 (48, 4)
32.0 (53, 4)
33.0 (44, 4)
34.0 (43, 4)
35.0 (54, 4)
36.0 (46, 4)
37.0 (48, 4)
38.0 (43, 4)
39.0 (48, 4)
40.0 (54, 4)
41.0 (45, 4)
42.0 (43, 4)
43.0 (44, 4)
44.0 (45, 4)
45.0 (47, 4)
46.0 (48, 4)
47.0 (41, 4)
48.0 (57, 4)
49.0 (54, 4)
50.0 (39, 4)
51.0 (37, 4)
52.0 (48, 4)
53.0 (46, 4)
54.0 (47, 4)
55.0 (58, 4)
56.0 (61

(Pdb)  mask


tensor(0., device='cuda:0')


(Pdb)  x


tensor([[[-0.2606,  0.5461,  1.1548,  ...,  1.5437,  2.2557,  0.5184],
         [-0.2606,  0.5461,  1.1548,  ..., -0.0226,  1.2805,  0.9925],
         [-0.9434,  0.1992,  0.3588,  ..., -0.0903,  0.1641,  1.0003],
         ...,
         [ 0.0663, -0.4144,  1.2725,  ...,  1.9608,  1.9180,  0.6004],
         [ 1.9506, -0.0669, -0.8323,  ...,  1.9608,  1.9180,  0.6004],
         [-0.2606,  0.5461,  1.1548,  ...,  1.9608,  1.9180,  0.6004]]],
       device='cuda:0', grad_fn=<AddBackward0>)


(Pdb)  x.shape


torch.Size([1, 10, 128])


(Pdb)  exit()


BdbQuit: 

In [None]:
chosen_columns = ['Program Counter', 'Physical Address', 'Set', 'Cache Friendly']
dataset = csv_to_data(DATA_PATH, chosen_columns)
pcs = dataset[:, 0]
dict_ = defaultdict(int)
for id_ in pcs:
    dict_[id_] += 1
values = np.array(list(dict_.values()))
bc = np.bincount(values)
print(len(values), dataset.shape, values.mean(), values.max(), values.min(), np.median(values))
print(bc)

In [10]:
temp = csv_to_data(DATA_PATH, ['Hit', 'Belady Hit'])
temp = np.array(temp)
print(temp.mean(axis=0))

[0.33296782 0.33755649]
