In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from ipynb.fs.full.utility_functions import gen_data_dict, get_data, error_count, write_to_csv, opart, SquaredHingeLoss

np.set_printoptions(precision=3)
np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x25441abb330>

In [12]:
# data
data = pd.read_csv('1.genome_learning_data/seq_features.csv')['count'].to_numpy()
data = np.log(np.log(data)).reshape(-1,1)
data = torch.FloatTensor(data)

target_df_1 = pd.read_csv('1.genome_learning_data/target_lambda_fold1_base_e.csv')
target_df_2 = pd.read_csv('1.genome_learning_data/target_lambda_fold2_base_e.csv')

targets_low_1  = target_df_1.iloc[:, 1:2].to_numpy()
targets_high_1 = target_df_1.iloc[:, 2:3].to_numpy()
targets_low_2  = target_df_2.iloc[:, 1:2].to_numpy()
targets_high_2 = target_df_2.iloc[:, 2:3].to_numpy()

targets_low_1  = torch.FloatTensor(targets_low_1)
targets_high_1 = torch.FloatTensor(targets_high_1)
targets_low_2  = torch.FloatTensor(targets_low_2)
targets_high_2 = torch.FloatTensor(targets_high_2)

y1 = torch.cat((targets_low_1, targets_high_1), dim=1)
y2 = torch.cat((targets_low_2, targets_high_2), dim=1)

In [13]:
# Define the linear model
class LinearModel(nn.Module):
    def __init__(self, input_size):
        super(LinearModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 1)

    def forward(self, x):
        return self.fc1(x)

In [14]:
# Instantiate the model, define custom loss function, and optimizer
model1 = LinearModel(input_size = 1)
model2 = LinearModel(input_size = 1)

squared_hinge_loss = SquaredHingeLoss(margin=1.5, low_lim=-8, high_lim=8)
optimizer1 = optim.Adam(model1.parameters(), lr=0.0001)
optimizer2 = optim.Adam(model2.parameters(), lr=0.0001)

# Training loop
min_loss_1_test = float('inf')
min_loss_2_test = float('inf')
for epoch in range(10001):
    # Forward pass
    outputs1 = model1(data)
    outputs2 = model2(data)
    
    # Compute the custom loss
    loss_1 = squared_hinge_loss(outputs1, y1)
    loss_2 = squared_hinge_loss(outputs2, y2)

    loss_1_test = squared_hinge_loss(outputs1, y2)
    loss_2_test = squared_hinge_loss(outputs2, y1)
    
    # Backward pass and optimization
    optimizer1.zero_grad()
    loss_1.backward()
    optimizer1.step()

    optimizer2.zero_grad()
    loss_2.backward()
    optimizer2.step()

    # save models
    if loss_1_test < min_loss_1_test:
        min_loss_1_test = loss_1_test
        torch.save(model1.state_dict(), '1.genome_saved_models/model1_linear_best.pth')
    
    if loss_2_test < min_loss_2_test:
        min_loss_2_test = loss_2_test
        torch.save(model2.state_dict(), '1.genome_saved_models/model2_linear_best.pth')
    
    # Print the loss every 100 epochs
    if (epoch) % 1000 == 0:
        print(f'Epoch {epoch:5d}, Loss_1: {loss_1.item():8.4f}, Loss_1_test: {loss_1_test.item():8.4f}, Loss_2: {loss_2.item():8.4f}, Loss_2_test: {loss_2_test.item():8.4f}')

Epoch     0, Loss_1:   3.3376, Loss_1_test:   3.0308, Loss_2:   2.8394, Loss_2_test:   3.0363
Epoch  1000, Loss_1:   2.9092, Loss_1_test:   2.7616, Loss_2:   2.6496, Loss_2_test:   2.6954
Epoch  2000, Loss_1:   2.6138, Loss_1_test:   2.6095, Loss_2:   2.5712, Loss_2_test:   2.5055
Epoch  3000, Loss_1:   2.4283, Loss_1_test:   2.5496, Loss_2:   2.5524, Loss_2_test:   2.4204
Epoch  4000, Loss_1:   2.3185, Loss_1_test:   2.5586, Loss_2:   2.5501, Loss_2_test:   2.3899
Epoch  5000, Loss_1:   2.2533, Loss_1_test:   2.6162, Loss_2:   2.5500, Loss_2_test:   2.3858
Epoch  6000, Loss_1:   2.2210, Loss_1_test:   2.6971, Loss_2:   2.5498, Loss_2_test:   2.3859
Epoch  7000, Loss_1:   2.2198, Loss_1_test:   2.7202, Loss_2:   2.5495, Loss_2_test:   2.3858
Epoch  8000, Loss_1:   2.2196, Loss_1_test:   2.7316, Loss_2:   2.5490, Loss_2_test:   2.3863
Epoch  9000, Loss_1:   2.2196, Loss_1_test:   2.7347, Loss_2:   2.5485, Loss_2_test:   2.3878
Epoch 10000, Loss_1:   2.2196, Loss_1_test:   2.7349, Loss_2

In [15]:
# torch.save(model1.state_dict(), '1.genome_saved_models/model1_linear_best.pth')
# torch.save(model2.state_dict(), '1.genome_saved_models/model2_linear_best.pth')

In [16]:
# Load model1
model1 = LinearModel(input_size=1)
model1.load_state_dict(torch.load('1.genome_saved_models/model1_linear_best.pth'))
model1.eval()  # Set the model to evaluation mode

# Load model2
model2 = LinearModel(input_size=1)
model2.load_state_dict(torch.load('1.genome_saved_models/model2_linear_best.pth'))
model2.eval()  # Set the model to evaluation mode

LinearModel(
  (fc1): Linear(in_features=1, out_features=1, bias=True)
)

In [17]:
for name, param in model1.named_parameters():
    print(f"Parameter: {name}, Value: {param.data}")

Parameter: fc1.weight, Value: tensor([[-0.1333]])
Parameter: fc1.bias, Value: tensor([0.3075])


In [18]:
for name, param in model2.named_parameters():
    print(f"Parameter: {name}, Value: {param.data}")

Parameter: fc1.weight, Value: tensor([[-0.2770]])
Parameter: fc1.bias, Value: tensor([0.5791])


In [19]:
with torch.no_grad():
    ldas1 = np.e**(model1(data).numpy()).reshape(-1)
    ldas2 = np.e**(model2(data).numpy()).reshape(-1)

In [20]:
seqs   = gen_data_dict('sequence_label_data/genome/signals.gz')
labels = gen_data_dict('sequence_label_data/genome/labels.gz')

header = ['sequenceID', 'fold_1_total_labels', 'fold_2_total_labels', 'fold_1_fp_errs', 'fold_1_fn_errs', 'fold_1_tp', 'fold_1_tn', 'fold_2_fp_errs', 'fold_2_fn_errs', 'fold_2_tp', 'fold_2_tn',]

for i in range(len(seqs)):
    # generate data
    sequence, neg_start_1, neg_end_1, pos_start_1, pos_end_1, neg_start_2, neg_end_2, pos_start_2, pos_end_2 = get_data(i, seqs=seqs, labels=labels)

    # get total labels
    fold1_total_labels = len(neg_start_1) + len(pos_start_1)
    fold2_total_labels = len(neg_start_2) + len(pos_start_2)

    # run each lambda and record it into csv file
    row = [seqs[i][0], fold1_total_labels, fold2_total_labels]

    chpnt_fold1 = opart(ldas2[i], sequence)
    chpnt_fold2 = opart(ldas1[i], sequence)

    err_1 = error_count(chpnt_fold1, neg_start_1, neg_end_1, pos_start_1, pos_end_1)
    err_2 = error_count(chpnt_fold2, neg_start_2, neg_end_2, pos_start_2, pos_end_2)
    
    row.append(err_1[0])
    row.append(err_1[1])
    row.append(err_1[2])
    row.append(err_1[3])
    row.append(err_2[0])
    row.append(err_2[1])
    row.append(err_2[2])
    row.append(err_2[3])

    write_to_csv('1.genome_learning_output/linear.csv', header, row)