In [261]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator
import numpy as np
import spacy
import random
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader
import datetime
import time
import matplotlib.pyplot as plt

In [262]:
torch.cuda.is_available()

True

In [263]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layer,
                 p):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        #self.hidden = self.initHidden()
        
        self.dropout = nn.Dropout(p)
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)
        
    def forward(self, x):
        # x shape: (seq_length(1개 input당), N(batch_size), input_size)
        
        embedding = self.embedding(x)
        # embedding shape: (seq_length, N, embedding_size)
        encoder_states, (hidden, cell) = self.rnn(embedding)
        # If (h_0, c_0) is not provided, both h_0 and c_0 default to zero.
        # hidden = h_n, cell = c_n (n=seq_length)
        # encoder_state: (seq_length, batch_size, hidden_size)
        
        return encoder_states, hidden, cell
        # encoder_states(output=hidden state for all time step): (seq_len, batch, num_directions * hidden_size)
    #def initHidden(self):
    #    return torch.zeros(1, 1, self.hidden_size)
    
    
    # Summerize
    # LSTM init. : (input_size, hidden_size, num_layer)
    # input : (seq_length, batch_size, input_size)
    # hidden (of t) : (num_layer, batch_size, hidden_size)
    # cell (of t) : (num_layer, batch_size,  hidden_size)
    # output : (seq_length, batch_size, hidden_length)
    # by FC layer, hidden_length --> output_length

In [264]:
class Decoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size,
                 num_layers, p): # how to choose output_size?
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.dropout = nn.Dropout(p)
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(hidden_size + embedding_size, hidden_size, num_layers, dropout=p)
        
        
        self.softmax = nn.Softmax(dim=2)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_size, 1) # binary(0 or 1) classification
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x, encoder_states, hidden, cell, scaling_factor):
        # shape of x: (N) but we want (1, N)
        x = x.unsqueeze(1)
        # x: (1, N, input_size)
        
        embedding = self.embedding(x)
        #embedding shape: (1, N, embedding_size)
        
        sequence_length = encoder_states.shape[0]
        
        # h: (1, batch_size, hidden_size) 
        h_reshaped = hidden
        h_reshaped = h_reshaped.squeeze(0)
        h_reshaped = h_reshaped.unsqueeze(1)
        # h_reshaped: --> (batch_size, 1, hidden_size)
        
        encoder_states = encoder_states.permute(1, 2, 0)
        # encoder_states: (batch_size, hidden_size, seq_length)
        
        energy = torch.bmm(h_reshaped, encoder_states)
        # energy: (batch_size, 1, seq_length)
        #print('raw attn, no scaling: ', energy[0])
        energy = torch.mul(energy, scaling_factor)
        #print('raw attn, with scaling: ', energy[0])
        
        attention = self.softmax(energy)
        #print('attn: ', attention[0])
        #s = torch.zeros(1).to(device)
        
        #for i in range(attention.shape[2]):
        #    s += attention[0][0][i]
        
        #print('attn[-1]: ', attention[0][0][-1].float())
        # attention: (N, 1, seq_length)
        #print('sum: ', s)
        
        encoder_states = encoder_states.transpose(1,2)
        # (N, hidden_size, seq_length) --> (N, seq_length, hidden_size)
        
        weighted_sum = torch.bmm(attention, encoder_states).permute(1,0,2)
        # (N, 1, hidden_size) --> (1, N, hidden_size)
        
        predictions = self.fc(weighted_sum)
        # shape of predictions: (1, N, 1)
        #print('predictions:', format(predictions.shape))
        
        predictions = predictions.squeeze(0)
        # (N, 1)
        
        predictions = self.sigmoid(predictions)
        # nn.CrossEntropyLoss(LogSoftmax + NLLoss) doesn't need to do Softmax
        
        return predictions, hidden, cell, attention

In [265]:
class AttnDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(AttnDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self, source_warmup, source_test, scaling_factor):
        # source(_warmup, _test): (seq_len, batch_size, input_size) ex) (30, N, 1)
        # embedding_size = 128
        # target: (target_len, batch_size, target_vocab_size) ex) (30, N, 1)
        batch_size = source_warmup.shape[1]
        target_len = source_warmup.shape[0]
        # target_vocab_size = 1
        
        outputs = torch.zeros(target_len, batch_size, 1).to(device)
        
        encoder_states, hidden, cell = self.encoder(source_warmup)
        
        
        for t in range(target_len): 
            output, hidden, cell, attention = self.decoder(source_test[t], encoder_states, hidden, cell,
                                               scaling_factor)
            
            #print(encoder_states.shape)
            #print(source_test[t].shape)
            #print(hidden.shape)
            encoder_states = torch.cat((encoder_states, hidden), dim=0)
            #print(encoder_states.shape)
            
            #print('tmp output', format(output.shape))
            
            # best_guess = torch.max(output, 1).values
            #print(best_guess.shape)
            #print(type(best_guess))
            # best_guess: (N)
            '''
            for i in range(len(best_guess)):
                if best_guess[i] > 0.5: # how about '0 -> rand(batch_size)' ?
                    outputs[t] = 1
                elif outputs[t] = 0
            '''
            outputs[t] = output
            # outputs[t] = [1 if i > 0.5 else 0 for i in best_guess]
            # outputs: (target_len, batch_size, target_vocab_size)
            #print('tmp outputs:', format(outputs.shape))
        
        #print('total outputs:', format(outputs.shape))
        return outputs, attention

In [361]:
#old - Non-overlapping trace sequence
class TraceDataset(Dataset):
    
    def __init__(self, n, unique_pc_length, pc, y):
        self.num_seq = n
        self.pc = pc
        self.y = y
        self.unique_pc_length = unique_pc_length
    
    def get_y():
        return self.y
    
    def get_pc():
        return self.pc
        
    def __len__(self):
        if len(self.pc[-1]) == 2*self.num_seq:
            return len(self.pc)
        else:
            return len(self.pc)-1
    
    def __getitem__(self, idx):
        
        X = torch.fmod(self.pc[idx], self.unique_pc_length)
        y = self.y[2*idx+1]
        
        return X, y

In [None]:
#new - Overlapping trace sequence
class TraceDataset(Dataset):
    
    def __init__(self, n, unique_pc_length, pc, y):
        self.num_seq = n
        self.pc = pc
        self.y = y
        self.unique_pc_length = unique_pc_length
    
    def get_y():
        return self.y
    
    def get_pc():
        return self.pc
        
    def __len__(self):
        return len(self.pc) - 2*n + 1
    
    def __getitem__(self, idx):
        
        X = torch.fmod(self.pc[idx:idx+2*n], self.unique_pc_length)
        y = self.y[idx+n:idx+2*n]
        
        return X, y

In [267]:
class OPTgen:
    
    def __init__(self, size):
        self.num_hit = 0
        self.num_miss = 0
        self.demand_access = 0
        self.prefetch_access = 0
        self.CACHE_SIZE = size
        self.liveness_history = [0 for i in range(128)]
        
        #self.addr_history = {}
    
    def add_access(self, curr_quanta):
        self.demand_access += 1
        (self.liveness_history)[curr_quanta] = 0
    
    def add_prefetch(self, curr_quanta):
        self.prefetch_access = 0
        (self.liveness_history)[curr_quanta] = 0
        
    def should_cache(self, curr_quanta, last_quanta):
        is_cache = 1
        i = last_quanta
        while i != curr_quanta:
            if self.liveness_history[i] >= self.CACHE_SIZE:
                is_cache = 0
                break
            i = (i + 1) % len(self.liveness_history)
        
        if is_cache:
            i = last_quanta;
            while i != curr_quanta:
                self.liveness_history[i] += 1;
                i = (i+1) % len(self.liveness_history)
        
        if is_cache:
            self.num_hit += 1
        else:
            self.num_miss += 1
        
        return is_cache
    
    def get_num_opt_hits():
        return self.num_hit

In [268]:
def update_addr_history_lru(addr_history, curr_set, curr_lru):
    for key, value in addr_history[curr_set].items():
        if value['lru'] < curr_lru: 
            value['lru'] += 1

In [269]:
def delete_lru_addr_history_element(addr_history, curr_set):
    lru_addr = 0
    for key, value in addr_history[curr_set].items():
        timer = value['prev_time']
        if value['lru'] == 128 -1:
            lru_addr = key
            break
    del addr_history[curr_set][lru_addr]

In [362]:
def makeOPT(pc, mem_addr):
    
    perset_optgen = [ OPTgen(16) for i in range(2048)] 
    addr_history = [ {} for i in range(2048) ] #list of dictionary: 2048 set * 16 way
    hit_miss_history = [ 0 for i in range(len(pc))]
    perset_time = [ 0 for i in range(2048) ]
    
    for i in range(len(pc)): # put your sample set # of PCs
        #print(bin(pc[i]))
        curr_tag = (mem_addr[i] >> 17) & (2**(48 - 11 - 6) -1) # 6 -> bo+g, 11 -> idx 
        curr_set = (mem_addr[i] >> 6) & (2**11 - 1)
        curr_time = perset_time[curr_set] % 128
        
        #print(bin(curr_tag))
        if curr_tag in addr_history[curr_set]:
            prev_time = addr_history[curr_set][curr_tag]['prev_time'] % 128
            hit_miss_history[addr_history[curr_set][curr_tag]['idx']] = perset_optgen[curr_set].should_cache(curr_time, prev_time)
            perset_optgen[curr_set].add_access(curr_time)
            update_addr_history_lru(addr_history, curr_set, addr_history[curr_set][curr_tag]['lru'])
        else:
            if len(addr_history[curr_set]) >= 128:
                #print(curr_set)
                #print(addr_history[curr_set])
                delete_lru_addr_history_element(addr_history, curr_set)
            
            addr_history[curr_set][curr_tag] = {}    
            addr_history[curr_set][curr_tag]['lru'] = 0
            perset_optgen[curr_set].add_access(curr_time)
            update_addr_history_lru(addr_history,curr_set, 128 -1)
            
        perset_time[curr_set] += 1
        addr_history[curr_set][curr_tag]['prev_time'] = curr_time
        addr_history[curr_set][curr_tag]['idx'] = i
        addr_history[curr_set][curr_tag]['lru'] = 0
    
    return hit_miss_history


In [363]:
def extract_trace(file, pc_seq, addr_seq):

    pc = ''
    addr = ''
    f = open(file, 'r')
    dic_pc = {}
    dic_addr = {}
    i = 0
    while i < 49661289//4:
        i += 1
        line = f.readline()
        if not line: break
        
        line = line.replace('PC : ', '')
        '''
        if 'src_mem' in line:
            line = line.replace(' src_mem[0]: ', '')
        elif 'dest_mem' in line:
            line = line.replace(' dest_mem[0]: ', '')
        '''
        
        line = line.replace(' addr : ', '')
        pc, addr = line.split(sep=',')
        
        dic_pc[pc] = 'yes'
        dic_addr[addr] = 'yes'
        pc_seq.append(int(pc))
        addr_seq.append(int(addr))
    
    return len(dic_pc), len(dic_addr)
    # pc_seq = torch.tensor(pc_seq)
        
    # total_pc = torch.split(pc_seq, 30)
    
    '''
    train_pc = total_pc[:int(len(total_pc)*0.6)] # need to change as various trace files
    val_pc = total_pc[int(len(total_pc)*0.6):int(len(total_pc)*0.8)+1]
    test_pc = total_pc[int(len(total_pc)*0.8):]

    train_send = []
    val_send =[]
    test_send = []

    for i in train_pc:
        train_send.append(i)
    
    for i in val_pc:
        val_send.append(i)
    
    for i in test_pc:
        test_send.append(i)
    '''
    
    #return train_send, val_send, test_send
    

In [377]:
#old

# hyperparameters
file = "../ChampSim/ChampSim/big_llc_trace/mcf_46B.txt"
# unique_pc_length = 2143 # need to change by programs!
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2
n = 30


total_pc = []
total_addr = []

train_pc = []
val_pc = []
test_pc = []


unique_pc_length, unique_addr_length = extract_trace(file, total_pc, total_addr)

y_true = []

train_true = []
val_true = []
test_true = []

y_true = makeOPT(total_pc, total_addr)

train_pc = total_pc[:int(len(total_pc)*train_ratio)] # need to change as various trace files
val_pc = total_pc[int(len(total_pc)*train_ratio):int(len(total_pc)*(train_ratio+val_ratio))+1] # +1 is not needed (just for match even #)
test_pc = total_pc[int(len(total_pc)*(train_ratio+val_ratio)):]

train_y = y_true[:int(len(y_true)*train_ratio)] # need to change as various trace files
val_y = y_true[int(len(y_true)*train_ratio):int(len(y_true)*(train_ratio+val_ratio))+1] # +1 is not needed (just for match even #)
test_y = y_true[int(len(y_true)*(train_ratio+val_ratio)):]

train_pc = torch.tensor(train_pc)
train_pc = torch.split(train_pc, 2*n)

train_y = torch.tensor(train_y)
train_y = torch.split(train_y, n)

val_pc = torch.tensor(val_pc)
val_pc = torch.split(val_pc, 2*n)
        
val_y = torch.tensor(val_y)
val_y = torch.split(val_y, n)

test_pc = torch.tensor(test_pc)
test_pc = torch.split(test_pc, 2*n)
        
test_y = torch.tensor(test_y)
test_y = torch.split(test_y, n)



In [None]:
#new

# hyperparameters
file = "../ChampSim/ChampSim/big_llc_trace/mcf_46B.txt"
# unique_pc_length = 2143 # need to change by programs!
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2
n = 30


total_pc = []
total_addr = []

train_pc = []
val_pc = []
test_pc = []


unique_pc_length, unique_addr_length = extract_trace(file, total_pc, total_addr)

y_true = []

train_true = []
val_true = []
test_true = []

y_true = makeOPT(total_pc, total_addr)

train_pc = total_pc[:int(len(total_pc)*train_ratio)] # need to change as various trace files
val_pc = total_pc[int(len(total_pc)*train_ratio):int(len(total_pc)*(train_ratio+val_ratio))+1] # +1 is not needed (just for match even #)
test_pc = total_pc[int(len(total_pc)*(train_ratio+val_ratio)):]

train_y = y_true[:int(len(y_true)*train_ratio)] # need to change as various trace files
val_y = y_true[int(len(y_true)*train_ratio):int(len(y_true)*(train_ratio+val_ratio))+1] # +1 is not needed (just for match even #)
test_y = y_true[int(len(y_true)*(train_ratio+val_ratio)):]

train_pc = torch.tensor(train_pc)

train_y = torch.tensor(train_y)

val_pc = torch.tensor(val_pc)
        
val_y = torch.tensor(val_y)

test_pc = torch.tensor(test_pc)
        
test_y = torch.tensor(test_y)


In [378]:
correct = 0
for i in range(len(y_true)):
    if y_true[i]:
        correct += 1
print('OPTgen hit rate:', correct / len(y_true) * 100)
print(len(y_true))

OPTgen hit rate: 63.108206134323375
12415322


In [379]:
trainset = TraceDataset(n, unique_pc_length, train_pc, train_y)
valset = TraceDataset(n, unique_pc_length, val_pc, val_y)
testset = TraceDataset(n, unique_pc_length, test_pc, test_y)

partition = {'train': trainset, 'val': valset, 'test' : testset}

In [380]:
#attn_train = torch.zeros(batch_size, 1, 2*n)
#attn_val = torch.zeros(batch_size, 1, 2*n)
#attn_test = torch.zeros(batch_size, 1, 2*n)

In [381]:
print(unique_pc_length)
print(unique_addr_length)
print(unique_addr_length/unique_pc_length)
print(len(val_pc))
print(len(val_y))
print(len(train_pc))
print(len(trainset))
print(len(train_pc[-1]))
print('pc length', format(len(total_pc)))

168
2243467
13353.970238095239
41385
82769
124154
124153
13
pc length 12415322


In [382]:
# Training hyperparameters
num_epochs = 1
learning_rate = 0.0001
l2 = 0.00001
batch_size = 128

# Model hyperparameters
load_model = False
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

input_size_encoder = unique_pc_length # need to change as various program! (2143=600.perlbench)
input_size_decoder = unique_pc_length
output_size = 10
encoder_embedding_size = 128
decoder_embedding_size = 128
hidden_size = 1024
num_layers = 1
enc_dropout = 0.0
dec_dropout = 0.0

scaling_factor = 5

optimizer = 'Adam'

encoder_net = Encoder(input_size_encoder, encoder_embedding_size, hidden_size,
                     num_layers, enc_dropout).to(device)

decoder_net = Decoder(input_size_decoder, decoder_embedding_size, hidden_size,
                     output_size, num_layers, dec_dropout).to(device)

model = AttnDecoder(encoder_net, decoder_net).to(device) # *******

In [383]:
!pwd

/home/junseo/cache-replacement


In [371]:
def train(model, input_data, optimizer, loss_fn):
    trainloader = DataLoader(partition['train'],
                             batch_size=batch_size,
                             shuffle=True, drop_last=True)
    
    #print('train', format(len(trainloader)))
    
    model.train()
    model.zero_grad()
    optimizer.zero_grad()
    
    train_acc = 0.0
    train_loss = 0.0
    for i, (X ,y) in enumerate(trainloader):
        #print(i)
        
        # X: (batch_size, 2*seq_length) --> (2*seq_length, batch_size)
        X_warmup = X[:,0:30].transpose(0,1).to(device)
        X_test = X[:, 30:].transpose(0,1).to(device)
        y_true = y.to(device) # y: (batch_size, seq_length)
        
        # (seq_length, batch_size) --> (2*seq_length, batch_size, 1)
        #X_warmup = X_warmup.unsqueeze(-1)
        #X_test = X_test.unsqueeze(-1)
        
        model.zero_grad()
        optimizer.zero_grad()
        
        
        y_pred, attn_train = model(X_warmup, X_test, scaling_factor)
        
        
        y_pred = y_pred.transpose(0,1).squeeze(2)
        # y_pred: (seq_length, batch_size, 1) --> (batch_size, seq_length)
        # y_true: (batch_size, seq_length)
        
        #print(y_pred)
        #print(y_true)
        
        #print('y_pred:', format(y_pred.shape))
        #print('y_true:', format(y_true.shape))
        loss = loss_fn(y_pred.reshape(-1).float(), y_true.reshape(-1).float())
        #print('y_pred.view:', format(y_pred.reshape(-1).shape))
        #print('y_true.view:', format(y_true.reshape(-1).shape))
        #print(loss)
        y_pred = y_pred.reshape(-1)
        y_true = y_true.reshape(-1)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_acc += accuracy(y_pred, y_true)

        
    train_loss = train_loss / len(trainloader)
    train_acc = train_acc / len(trainloader)
    print('train_loss:', format(train_loss))
    print('train_acc:', format(train_acc))
    return model, train_loss, train_acc, attn_train


In [372]:
def validate(model, partition, loss_fn):
    valloader = DataLoader(partition['val'],
                           batch_size=batch_size,
                           shuffle=True, drop_last=True)
    
    
    model.eval()
    
    val_acc = 0.0
    val_loss = 0.0
    with torch.no_grad():
        for i, (X, y) in enumerate(valloader):

            # X: (batch_size, 2*seq_length) --> (2*seq_length, batch_size)
            X_warmup = X[:,0:30].transpose(0,1).to(device)
            X_test = X[:, 30:].transpose(0,1).to(device)
            y_true = y.to(device) # y: (batch_size, seq_length)
        
            # (seq_length, batch_size) --> (2*seq_length, batch_size, 1)
            #X_warmup = X_warmup.unsqueeze(-1)
            #X_test = X_test.unsqueeze(-1)
            
            y_pred, attn_val = model(X_warmup, X_test, scaling_factor)
            y_pred = y_pred.transpose(0,1).squeeze(2)
            loss = loss_fn(y_pred.reshape(-1).float(), y_true.reshape(-1).float())
            
            y_pred = y_pred.reshape(-1)
            y_true = y_true.reshape(-1)
            
            val_loss += loss.item()
            val_acc += accuracy(y_pred, y_true)

    #print(len(valloader))
    val_loss = val_loss / len(valloader)
    val_acc = val_acc / len(valloader)
    print('val_loss:', format(val_loss))
    print('val_acc:', format(val_acc))
    
    return val_loss, val_acc, attn_val

In [373]:
def test(model, partition):
    testloader = DataLoader(partition['test'], 
                           batch_size=batch_size, 
                           shuffle=True, drop_last=True)

    
    model.eval()

    test_acc = 0.0
    with torch.no_grad():
        for i, (X, y) in enumerate(testloader):

            # X: (batch_size, 2*seq_length) --> (2*seq_length, batch_size)
            X_warmup = X[:,0:30].transpose(0,1).to(device)
            X_test = X[:, 30:].transpose(0,1).to(device)
            y_true = y.to(device) # y: (batch_size, seq_length)
        
            # (seq_length, batch_size) --> (2*seq_length, batch_size, 1)
            #X_warmup = X_warmup.unsqueeze(-1)
            #X_test = X_test.unsqueeze(-1)
            
            
            y_pred, attn_test = model(X_warmup, X_test, scaling_factor)
            y_pred = y_pred.transpose(0,1).squeeze(2)
            
            y_pred = y_pred.reshape(-1)
            y_true = y_true.reshape(-1)
            
            test_acc += accuracy(y_pred, y_true)

    test_acc = test_acc / len(testloader)
    print('test_acc:' ,format(test_acc))
    return test_acc, attn_test

In [374]:
def experiment(partition):
    
    encoder_net = Encoder(input_size_encoder, encoder_embedding_size, hidden_size,
                     num_layers, enc_dropout).to(device)

    decoder_net = Decoder(input_size_decoder, decoder_embedding_size, hidden_size,
                     output_size, num_layers, dec_dropout).to(device)

    model = AttnDecoder(encoder_net, decoder_net).to(device) # *******
    
    loss_fn = nn.BCELoss()
    
    if optimizer == 'SGD':
        optimize = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=l2)
    elif optim == 'RMSprop':
        optimize = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=l2)
    elif optimizer == 'Adam':
        optimize = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=l2)
    else:
        raise ValueError('in-valid optimizer choice')
        
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    train_attns = []
    val_attns = []
    
    result = {}
    for epoch in range(num_epochs):
        ts = time.time()
        model, train_loss, train_acc, attn_train = train(model, partition, optimize, loss_fn)
        val_loss, val_acc, attn_val = validate(model, partition, loss_fn)
        te = time.time()
        train_attns.append(attn_train)
        val_attns.append(attn_val)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        
        print('Epoch {}, ACC(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.5f}/{:2.5f}. Took {:2.2f} sec'.format(epoch, train_acc, val_acc, train_loss, val_loss, te-ts))
        
        
    test_acc, attn_test = test(model, partition)
    result['test_attn'] = attn_test
    
    result['train_losses'] = train_losses
    result['val_losses'] = val_losses
    result['train_accs'] = train_accs
    result['val_accs'] = val_accs
    result['train_attns'] = train_attns
    result['val_attns'] = val_attns
    #result['train_acc'] = train_acc
    #result['val_acc'] = val_acc
    result['test_acc'] = test_acc
    return result, attn_train, attn_val, attn_test
    

In [375]:
def accuracy(y_pred, y_true):
    # y_pred: (N, seq_length, 2)
    # y_true: (N, seq_length)
    y = [1 if i>0.5 else 0 for i in y_pred]
    correct = 0
    for i in range(len(y)):
        if y[i] == y_true[i]:
            correct += 1
    return correct/len(y)

In [317]:
import hashlib
import json
from os import listdir
from os.path import isfile, join
import pandas as pd

#!mkdir results
'''
def save_exp_result(result):
    exp_name = 'exp_1'
    # del setting['epoch']
    
    # hash_key = hashlib.sha1(str(setting).encode()).hexdigest()[:6]
    filename = './results/{}-{}.json'.format(exp_name, 1)
    # result.update(setting) ????
    with(filename, 'w') as f:
        json.dump(result, f)
'''
'''        
def load_exp_result(exp_name):
    dir_path = './results'
    filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if '.json' in f]
    list_result = []
    for filename in filenames:
        if exp_name in filename:
            with open(join(dir_path, filename), 'r') as infile:
                results = json.load(infile)
                list_result.append(results)
    df = pd.DataFrame(list_result)
    return df
'''

"        \ndef load_exp_result(exp_name):\n    dir_path = './results'\n    filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if '.json' in f]\n    list_result = []\n    for filename in filenames:\n        if exp_name in filename:\n            with open(join(dir_path, filename), 'r') as infile:\n                results = json.load(infile)\n                list_result.append(results)\n    df = pd.DataFrame(list_result)\n    return df\n"

In [384]:
result, attn_train, attn_val, attn_test = experiment(partition)
#save_exp_result(result)
correct = 0
for i in range(len(y_true)):
    if y_true[i]:
        correct += 1
print('OPTgen hit rate:', correct / len(y_true) * 100)

train_loss: 0.6354837332716667
train_acc: 0.6416997226522201
val_loss: 0.6460896189736877
val_acc: 0.6250137061403513
Epoch 0, ACC(train/val): 0.64/0.63, Loss(train/val) 0.63548/0.64609. Took 19756.23 sec
test_acc: 0.6047552244582043
OPTgen hit rate: 63.108206134323375


In [None]:
%matplotlib inline
from mpl_toolkits import mplot3d
import numpy as np
import matplotlib.pyplot as plt

def f(x, y, attn):
    z = np.zeros((len(y),len(x)))
    
    for i in range(len(x)):
        for j in range(len(y)):
            z[j][i] = attn[i][0][j]
    
    return z

x = np.linspace(0, 63, 64)
y = np.linspace(0, 58, 59)

X, Y = np.meshgrid(x, y)
attn_train_np = attn_train.cpu().detach().numpy()

Z = f(x, y, attn_train_np)

print(X.shape)
print(Y.shape)
print(Z.shape)
print(attn_train_np.shape)

fig = plt.figure()
ax = plt.axes(projection='3d')
ax.contour3D(X, Y, Z, 50, cmap='binary')
ax.set_xlabel('batch')
ax.set_ylabel('sequence')
ax.set_zlabel('attn_train');

In [None]:
z = attn_train.squeeze(1).cpu().detach().numpy()

print(z.shape)
plt.figure(figsize = (5.5, 5.5))
plt.gray()
plt.pcolor(z)
plt.colorbar()
plt.xlabel('sequence')
plt.ylabel('batch')
plt.show()

In [None]:
attn_val_np = attn_val.cpu().detach().numpy()

Z = f(x, y, attn_val_np)

print(X.shape)
print(Y.shape)
print(Z.shape)
print(attn_val_np.shape)

fig = plt.figure()
ax = plt.axes(projection='3d')
ax.contour3D(X, Y, Z, 50, cmap='binary')
ax.set_xlabel('batch')
ax.set_ylabel('sequence')
ax.set_zlabel('attn_val');

In [None]:
z = attn_val.squeeze(1).cpu().detach().numpy()

print(z.shape)
plt.figure(figsize = (5.5, 5.5))
plt.gray()
plt.pcolor(z)
plt.colorbar()
plt.xlabel('sequence')
plt.ylabel('batch')
plt.show()

In [None]:
attn_test_np = attn_test.cpu().detach().numpy()

Z = f(x, y, attn_test_np)

print(X.shape)
print(Y.shape)
print(Z.shape)
print(attn_test_np.shape)

fig = plt.figure()
ax = plt.axes(projection='3d')
ax.contour3D(X, Y, Z, 50, cmap='binary')
ax.set_xlabel('batch')
ax.set_ylabel('sequence')
ax.set_zlabel('attn_test');

In [None]:
z = attn_test.squeeze(1).cpu().detach().numpy()

print(z.shape)
plt.figure(figsize = (5.5, 5.5))
plt.gray()
plt.pcolor(z)
plt.colorbar()
plt.xlabel('sequence')
plt.ylabel('batch')
plt.show()