In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from ipynb.fs.full.utility_functions import gen_data_dict, get_data, get_cumsum, error_count, write_to_csv, opart, SquaredHingeLoss

np.random.seed(4)
torch.manual_seed(4)

<torch._C.Generator at 0x266712afcd0>

In [2]:
# sequences
seqs = gen_data_dict('sequence_label_data/signals.gz')

# target 
target_df_1 = pd.read_csv('learning_data/target_lambda_fold1.csv')
target_df_2 = pd.read_csv('learning_data/target_lambda_fold2.csv')

targets_low_1  = target_df_1.iloc[:, 1:2].to_numpy()
targets_high_1 = target_df_1.iloc[:, 2:3].to_numpy()
targets_low_2  = target_df_2.iloc[:, 1:2].to_numpy()
targets_high_2 = target_df_2.iloc[:, 2:3].to_numpy()

targets_low_1  = torch.FloatTensor(targets_low_1)
targets_high_1 = torch.FloatTensor(targets_high_1)
targets_low_2  = torch.FloatTensor(targets_low_2)
targets_high_2 = torch.FloatTensor(targets_high_2)

In [3]:
# Define the RNN model
class RNNModel(nn.Module):
    def __init__(self):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size=1, hidden_size=8, num_layers=1, nonlinearity='relu', bias=True, batch_first=False)
        self.fc1 = nn.Linear(8, 1)

    def forward(self, x):
        _, x = self.rnn(x)
        x    = self.fc1(x)
        return x

In [4]:
# Instantiate the model, define custom loss function, and optimizer
model1 = RNNModel()
model2 = RNNModel()

squared_hinge_loss = SquaredHingeLoss()
optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)

# Training loop
record_loss1_test = []
record_loss2_test = []
min_loss_1_test = float('inf')
min_loss_2_test = float('inf')
for epoch in range(1000):
    # Forward pass
    outputs1 = torch.tensor([[0.0]])
    outputs2 = torch.tensor([[0.0]])
    for i in range(len(seqs)):
        seq = torch.tensor(seqs[i][1]['logratio'].to_numpy(), dtype=torch.float32).reshape(-1,1)
        outputs1 += model1(seq)
        outputs2 += model2(seq)
    
    # Compute the custom loss
    loss_1 = squared_hinge_loss(outputs1, targets_low_1, targets_high_1)
    loss_2 = squared_hinge_loss(outputs2, targets_low_2, targets_high_2)
    loss_1_test = squared_hinge_loss(outputs1, targets_low_2, targets_high_2)
    loss_2_test = squared_hinge_loss(outputs2, targets_low_1, targets_high_1)

    # Print the loss every epochs
    if (epoch) % 1 == 0:
        print(f'Epoch {epoch:5d}, Loss_1: {loss_1.item():8.4f}, Loss_1_test: {loss_1_test.item():8.4f}, Loss_2: {loss_2.item():8.4f}, Loss_2_test: {loss_2_test.item():8.4f}')
    
    # Backward pass and optimization
    optimizer1.zero_grad()
    loss_1.backward()
    optimizer1.step()

    optimizer2.zero_grad()
    loss_2.backward()
    optimizer2.step()

    # record
    record_loss1_test.append(loss_1_test.item())
    record_loss2_test.append(loss_2_test.item())

    # save models
    if loss_1_test < min_loss_1_test:
        min_loss_1_test = loss_1_test
        torch.save(model1.state_dict(), 'saved_models/model1_rnn_relu_best.pth')
    
    if loss_2_test < min_loss_2_test:
        min_loss_2_test = loss_2_test
        torch.save(model2.state_dict(), 'saved_models/model2_rnn_relu_best.pth')

Epoch     0, Loss_1: 9654.5010, Loss_1_test: 9578.5010, Loss_2: 9652.5215, Loss_2_test: 9728.9961
Epoch     1, Loss_1: 9225.3545, Loss_1_test: 9152.0947, Loss_2: 9351.2129, Loss_2_test: 9425.7529
Epoch     2, Loss_1: 8806.4150, Loss_1_test: 8735.8477, Loss_2: 9052.2520, Loss_2_test: 9124.8672
Epoch     3, Loss_1: 8396.0195, Loss_1_test: 8328.1025, Loss_2: 8755.9150, Loss_2_test: 8826.6133
Epoch     4, Loss_1: 7995.4492, Loss_1_test: 7930.1367, Loss_2: 8462.1855, Loss_2_test: 8530.9766
Epoch     5, Loss_1: 7603.5156, Loss_1_test: 7540.7656, Loss_2: 8171.3125, Loss_2_test: 8238.2061
Epoch     6, Loss_1: 7219.9746, Loss_1_test: 7159.7495, Loss_2: 7883.3311, Loss_2_test: 7948.3379
Epoch     7, Loss_1: 6845.1875, Loss_1_test: 6787.4473, Loss_2: 7598.5225, Loss_2_test: 7661.6538
Epoch     8, Loss_1: 6478.4014, Loss_1_test: 6423.1089, Loss_2: 7317.1768, Loss_2_test: 7378.4468
Epoch     9, Loss_1: 6118.4492, Loss_1_test: 6065.5801, Loss_2: 7039.2290, Loss_2_test: 7098.6519
Epoch    10, Loss_1:

In [None]:
# Load model1
model1 = RNNModel()
model1.load_state_dict(torch.load('saved_models/model1_rnn_relu_best.pth'))
model1.eval()  # Set the model to evaluation mode

# Load model2
model2 = RNNModel()
model2.load_state_dict(torch.load('saved_models/model2_rnn_relu_best.pth'))
model2.eval()  # Set the model to evaluation mode

In [None]:
ldas1 = np.zeros(len(seqs))
ldas2 = np.zeros(len(seqs))
with torch.no_grad():
    for i in range(len(seqs)):
        seq = torch.tensor(seqs[i][1]['logratio'].to_numpy(), dtype=torch.float32).reshape(-1,1)
        ldas1[i] = model1(seq).numpy()[0][0]
        ldas2[i] = model2(seq).numpy()[0][0]

In [None]:
seqs   = gen_data_dict('sequence_label_data/signals.gz')
labels = gen_data_dict('sequence_label_data/labels.gz')

header = ['sequenceID', 'fold_1_total_labels', 'fold_2_total_labels', 'fold_1_errs', 'fold_2_errs']

for i in range(len(seqs)):
    # generate data
    sequence, neg_start_1, neg_end_1, pos_start_1, pos_end_1, neg_start_2, neg_end_2, pos_start_2, pos_end_2 = get_data(i, seqs=seqs, labels=labels)
    sequence_length = len(sequence)-1

    # vectors of cumulative sums
    y, z = get_cumsum(sequence)

    # get total labels
    fold1_total_labels = len(neg_start_1) + len(pos_start_1)
    fold2_total_labels = len(neg_start_2) + len(pos_start_2)

    # run each lambda and record it into csv file
    row  = [i, fold1_total_labels, fold2_total_labels]

    chpnt_fold1 = opart(10**ldas2[i], sequence)
    chpnt_fold2 = opart(10**ldas1[i], sequence)

    err_1 = error_count(chpnt_fold1, neg_start_1, neg_end_1, pos_start_1, pos_end_1)
    err_2 = error_count(chpnt_fold2, neg_start_2, neg_end_2, pos_start_2, pos_end_2)
    
    row.append(sum(err_1))
    row.append(sum(err_2))

    write_to_csv('learning_output/rnn_relu.csv', header, row)