In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from ipynb.fs.full.OPART import gen_data_dict, get_data, get_cumsum, L, trace_back, error_count, write_to_csv, opart

np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x24d781bb2f0>

In [2]:
# Custom loss function
margin = 1
class SquaredHingeLoss(nn.Module):
    def forward(self, predicted, low, high):
        low  = low + margin
        high = high - margin
        loss = torch.relu(low - predicted) + torch.relu(predicted - high)
        return torch.sum(loss**2)

In [3]:
# data
data = pd.read_csv('learning_data/seq_features.csv')['count'].to_numpy()
data = np.log10(np.log10(data)).reshape(-1,1)
data = torch.FloatTensor(data)

target_df_1 = pd.read_csv('learning_data/target_lambda_fold1.csv')
target_df_2 = pd.read_csv('learning_data/target_lambda_fold2.csv')

targets_low_1  = target_df_1.iloc[:, 1:2].to_numpy()
targets_high_1 = target_df_1.iloc[:, 2:3].to_numpy()
targets_low_2  = target_df_2.iloc[:, 1:2].to_numpy()
targets_high_2 = target_df_2.iloc[:, 2:3].to_numpy()

targets_low_1  = torch.FloatTensor(targets_low_1)
targets_high_1 = torch.FloatTensor(targets_high_1)
targets_low_2  = torch.FloatTensor(targets_low_2)
targets_high_2 = torch.FloatTensor(targets_high_2)

In [4]:
# Define the linear model
class MyModel(nn.Module):
    def __init__(self, input_size):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 1)

    def forward(self, x):
        return self.fc1(x)

In [5]:
# Instantiate the model, define custom loss function, and optimizer
model1 = MyModel(input_size = 1)
model2 = MyModel(input_size = 1)

squared_hinge_loss = SquaredHingeLoss()
optimizer1 = optim.Adam(model1.parameters(), lr=0.01)
optimizer2 = optim.Adam(model2.parameters(), lr=0.01)

# Training loop
epochs = 5000
for epoch in range(epochs+1):
    # Forward pass
    outputs1 = model1(data)
    outputs2 = model2(data)
    
    # Compute the custom loss
    loss_1 = squared_hinge_loss(outputs1, targets_low_1, targets_high_1)
    loss_2 = squared_hinge_loss(outputs2, targets_low_2, targets_high_2)
    
    # Backward pass and optimization
    optimizer1.zero_grad()
    loss_1.backward()
    optimizer1.step()

    optimizer2.zero_grad()
    loss_2.backward()
    optimizer2.step()
    
    # Print the loss every 100 epochs
    if (epoch) % 500 == 0:
        print(f'Epoch [{epoch:5d}/{epochs}], Loss_1: {loss_1.item():8.4f}, Loss_2: {loss_2.item():8.4f}')

Epoch [    0/5000], Loss_1: 622.5753, Loss_2: 650.6815
Epoch [  500/5000], Loss_1: 462.6325, Loss_2: 552.7117
Epoch [ 1000/5000], Loss_1: 443.0288, Loss_2: 550.0253
Epoch [ 1500/5000], Loss_1: 440.2346, Loss_2: 550.0333
Epoch [ 2000/5000], Loss_1: 439.9647, Loss_2: 550.0263
Epoch [ 2500/5000], Loss_1: 439.9647, Loss_2: 550.0080
Epoch [ 3000/5000], Loss_1: 439.9647, Loss_2: 550.0074
Epoch [ 3500/5000], Loss_1: 439.9647, Loss_2: 550.0189
Epoch [ 4000/5000], Loss_1: 439.9647, Loss_2: 550.0118
Epoch [ 4500/5000], Loss_1: 439.9647, Loss_2: 550.0013
Epoch [ 5000/5000], Loss_1: 439.9647, Loss_2: 550.0230


In [6]:
with torch.no_grad():
    ldas1 = model1(data).numpy().reshape(-1)
    ldas2 = model2(data).numpy().reshape(-1)

In [7]:
seqs   = gen_data_dict('sequence_label_data/signals.gz')
labels = gen_data_dict('sequence_label_data/labels.gz')

header = ['sequenceID', 'fold_1_total_labels', 'fold_2_total_labels', 'fold_1_errs', 'fold_2_errs']

for i in range(len(seqs)):
    # generate data
    sequence, neg_start_1, neg_end_1, pos_start_1, pos_end_1, neg_start_2, neg_end_2, pos_start_2, pos_end_2 = get_data(i, seqs=seqs, labels=labels)
    sequence_length = len(sequence)-1

    # vectors of cumulative sums
    y, z = get_cumsum(sequence)

    # get total labels
    fold1_total_labels = len(neg_start_1) + len(pos_start_1)
    fold2_total_labels = len(neg_start_2) + len(pos_start_2)

    # run each lambda and record it into csv file
    row  = [i, fold1_total_labels, fold2_total_labels]

    chpnt_fold1 = opart(10**ldas2[i], sequence, y, z)
    chpnt_fold2 = opart(10**ldas1[i], sequence, y, z)

    err_1 = error_count(chpnt_fold1, neg_start_1, neg_end_1, pos_start_1, pos_end_1)
    err_2 = error_count(chpnt_fold2, neg_start_2, neg_end_2, pos_start_2, pos_end_2)
    
    row.append(sum(err_1))
    row.append(sum(err_2))

    write_to_csv('learning_output/linear_GD.csv', header, row)