In [1]:
import numpy as np
import torch
import os
import pandas as pd
import torch_geometric
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
rfam_dir = "../rfam/data/raw/processed/release-14.8"
rfam_id = "RF00008"

In [3]:
from RNARepLearn.datasets import SingleMaskedRfamDataset

In [4]:
rfam_dataset = SingleMaskedRfamDataset(rfam_dir,rfam_id)

Processing...
Done!


In [5]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(rfam_dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, 4)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [6]:
## Set up mask
import random
train_mask = torch.tensor([random.randrange(100) < 30 for i in range(rfam_dataset[0].num_nodes)])
test_mask = torch.tensor([random.randrange(100) < 30 for i in range(rfam_dataset[0].num_nodes)])

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
model = model.double()
data = rfam_dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[train_mask], data["y"][train_mask].long())
    loss.backward()
    optimizer.step()

In [8]:
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[test_mask] == data.y[test_mask]).sum()
acc = int(correct) / int(test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 1.0000


In [11]:
len(os.listdir("test"))

0