In [1]:
import torch
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
rfam_dir = "../rfam/data/raw/processed/release-14.8"
rfams = ["RF00001","RF00174","RF00169","RF00050"]

In [3]:
from RNARepLearn.datasets import CombinedRfamDataset, SingleRfamDataset
#dataset = CombinedRfamDataset(rfam_dir, rfams, "Under300", 15, 300)
dataset = SingleRfamDataset(rfam_dir, "RF00001", 15)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

from torch_geometric.loader import DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

Processing...
Done!


In [4]:
##Model
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, Linear

class Encoder(torch.nn.Module):
    def __init__(self, input_channels, output_channels):
        super().__init__()
        self.conv1 = GCNConv(input_channels, output_channels)
        self.conv2 = GCNConv(output_channels,output_channels)

        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        #build representation, encode
        # input -> V_N,D
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x,edge_index)
        x = F.relu(x)
        
        return x
    
class AttentionDecoder(torch.nn.Module):
    def __init__(self, input_channels, output_channels):
        super().__init__()
        self.key_projection = Linear(input_channels, input_channels)
        self.query_projection = Linear(input_channels, input_channels)
        self.nuc_projection = Linear(input_channels, output_channels)
        
    def forward(self, x):
        keys = self.key_projection(x)
        queries = self.query_projection(x)
        
        nucleotides = self.nuc_projection(x)
        
        dotprod = torch.matmul(queries,keys.T)
        
        return F.softmax(nucleotides, dim=1), F.softmax(dotprod, dim=1)

In [5]:
layers = []
layers.append(Encoder(4,64))
layers.append(AttentionDecoder(64,4))
model = torch.nn.Sequential(*layers)

In [6]:
from RNARepLearn.train import MaskedTraining
training = MaskedTraining(model, 10, 15, writer)

In [7]:
training.run(train_loader)

[Epoch    1/  10] [Batch    1/ 125] Loss:  9.13e+00 Nucleotide-Loss:  1.39e+00 Edge-Loss:  7.75e+00
[Epoch    1/  10] [Batch   11/ 125] Loss:  8.87e+00 Nucleotide-Loss:  1.20e+00 Edge-Loss:  7.67e+00
[Epoch    1/  10] [Batch   21/ 125] Loss:  8.63e+00 Nucleotide-Loss:  1.09e+00 Edge-Loss:  7.54e+00
[Epoch    1/  10] [Batch   31/ 125] Loss:  8.25e+00 Nucleotide-Loss:  1.01e+00 Edge-Loss:  7.24e+00
[Epoch    1/  10] [Batch   41/ 125] Loss:  8.09e+00 Nucleotide-Loss:  8.73e-01 Edge-Loss:  7.22e+00
[Epoch    1/  10] [Batch   51/ 125] Loss:  7.81e+00 Nucleotide-Loss:  8.06e-01 Edge-Loss:  7.00e+00
[Epoch    1/  10] [Batch   61/ 125] Loss:  7.87e+00 Nucleotide-Loss:  7.89e-01 Edge-Loss:  7.08e+00
[Epoch    1/  10] [Batch   71/ 125] Loss:  7.73e+00 Nucleotide-Loss:  8.02e-01 Edge-Loss:  6.92e+00
[Epoch    1/  10] [Batch   81/ 125] Loss:  8.11e+00 Nucleotide-Loss:  7.85e-01 Edge-Loss:  7.32e+00
[Epoch    1/  10] [Batch   91/ 125] Loss:  7.69e+00 Nucleotide-Loss:  8.08e-01 Edge-Loss:  6.88e+00


KeyboardInterrupt: 