In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split, WeightedRandomSampler, Subset
import pandas as pd
from collections import Counter
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import matplotlib.pyplot as plt
import pyarrow.parquet as pq
import numpy as np
import json
from datetime import datetime
from CustomAccuracyMetrics import get_cycle_penalty_max_offpath, get_cycle_penalty
from sklearn import metrics
import os
from itertools import product
from CustDataset import CustDataset
import utils
import torch.nn as nn
import torch.nn.functional as F
import CustomAccuracyMetrics

In [None]:
test_root = f"/home/narehman_l/test_{datetime.now().strftime("%Y.%m.%d_%H.%M.%S")}"
os.mkdir(test_root)

In [None]:
avg_off_path = {
    'clang': 49.544,
    'gcc' : 35.391,
    'mysql' : 11.846,
    'mongodb' : 13.896,
    'postgres' : 20.575,
    'verilator' : 19.353,
    'postgres' : 139.567
}

cycles = [
    9566632.0,
    11505414.0,
    10004884.0,
    12326291.0,
    7771614.0,
    35282687.0,
    497644239.0,
]

In [None]:
batch_sizes = [512]
ns = [48]
pentalty_weights = [0.001]
hidden_size_0s = [512]
hidden_size_1s = [256]
hidden_size_2s = [64]
downsample_factor = [10]
feature_lists = [[
    'ft_start_addr', 
    'ft_length', 
    'ft_ended_by',
    'cycles_since_btb_rec', 
    'cycles_since_ibtb_rec',
    'cycles_since_misfetch_rec', 
    'cycles_since_mispred_rec',
    'btb_miss_rate', 
    'ibtb_miss_rate', 
    'misfetch_rate', 
    'mispred_rate',
    'cf_mask', 
    'tage_comp_base', 
    'tage_comp_short', 
    'tage_comp_long',
    'tage_comp_loop', 
    'tage_comp_sc'
]]
strides = [
    2,
    4,
    8,
    12,
]
num_epochs = [5]

In [None]:
configs = product(batch_sizes,
                    ns,
                    pentalty_weights,
                    hidden_size_0s,
                    hidden_size_1s,
                    hidden_size_2s,
                    downsample_factor,
                    feature_lists,
                    strides)

In [None]:

class Feed_Forward(nn.Module):
    def __init__(self, input_size, hidden_size_0, hidden_size_1, hidden_size_2, output_size):
        super(Feed_Forward, self).__init__()

        self.hidden_size_0 = hidden_size_0
        self.hidden_size_1 = hidden_size_1
        self.hidden_size_2 = hidden_size_2

        self.i2h0 = nn.Linear(input_size, hidden_size_0)
        self.h02h1 = nn.Linear(hidden_size_0, hidden_size_1)
        self.h12h2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.h22o = nn.Linear(hidden_size_2, output_size)

    def forward(self, input):
        
        output = F.relu(self.i2h0(input))
        output = F.relu(self.h02h1(output))
        output = F.relu(self.h12h2(output))
        output = F.sigmoid(self.h22o(output))

        return output

In [None]:
for i, config in enumerate(configs):
    print(config)
    batch_size      = config[0]
    n               = config[1]
    penalty_weight  = config[2]
    hidden_size_0   = config[3]
    hidden_size_1   = config[4]
    hidden_size_2   = config[5]
    downsample_factor = config[6]
    feature_list    = config[7]
    stride          = config[8]

    # write out configuration
    test_path = test_root + f"/config_{i}"
    print(test_path)
    os.mkdir(test_path)
    with open(test_path + '/config.txt', 'w') as config_file:
        config_file.write(f'batch_size: {batch_size}')
        config_file.write(f'n: {n}')
        config_file.write(f'penalty_weight: {penalty_weight}\n')
        config_file.write(f'hidden_size_0: {hidden_size_0}\n')
        config_file.write(f'hidden_size_2: {hidden_size_2}\n')
        config_file.write(f'downsample_factor: {downsample_factor}\n')
        config_file.write(f'feature_list: {feature_list}\n')
        config_file.write(f'stride: {stride}\n')



In [None]:
# create dataset
train_feather_files = [f'/home/narehman_l/10_21_2024_ml/icache_consumed_chunked_data/{file_id}.feather' for file_id in range(0, 4183)]
dataset = CustDataset(train_feather_files, n, stride, feature_list, penalty_weight)

train_loader, workload_test_sets = utils.get_train_loader(dataset, 
                                                            '/home/narehman_l/10_21_2024_ml/icache_consumed_chunked_data/train_test_idxs.json',
                                                            batch_size,
                                                            num_workers=0)

feed_forward_model = Feed_Forward(input_size=(len(feature_list) * n), hidden_size_0=hidden_size_0, hidden_size_1=hidden_size_1, hidden_size_2=hidden_size_2, output_size=1)

In [None]:
dataset[800][0].shape

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.BCELoss(reduction='none')
optimizer = torch.optim.Adam(feed_forward_model.parameters(), lr=0.00005)

loss_values = utils.train_model(feed_forward_model, criterion, optimizer, test_path, device, train_loader, num_epochs=1)

In [None]:
utils.plot_losses(loss_values, test_path)

In [None]:
predictions_df = utils.test_model(feed_forward_model, dataset, workload_test_sets, test_path, device, batch_size, num_workers=4)

In [None]:
eval_df = CustomAccuracyMetrics.get_metrics(predictions_df, predictions_df, avg_off_path, cycles)

In [None]:
print(eval_df['custom_accuracy'])