In [11]:
import torch
from torch import nn
import torchvision.models as models

# data can be reshaped to rectangular matrices:
#     62 × 173 = 10,726
#     31 × 346 = 10,726
#     2 x 31 x 173 = 10,726

# change shape like this?
#     x = x.view(x.size(0), 1, x.size(2)*16, x.size(3)*16) # [batch_size, 1, 32, 32]
# or like this?
#     a.resize_((1,5))
# or like this?
#     y = x.reshape(1, 2, 1, 3)

# rebuild first conv layer like this?
#     model.conv1 = nn.Conv2d(1, 64, 7, 2, 3, bias=False) # For torchvision.models.resnet18
# or like this?
#     model = models.resnet50()
#     conv1 = model.conv1
#     model.conv1 = nn.Conv2d(
#         1024, conv1.out_channels, conv1.kernel_size, conv1.stride, conv1.padding,
#         conv.dilation, conv1.groups, conv1.bias)

model = models.resnet18(num_classes=51)
conv1 = model.conv1
model.conv1 = nn.Conv2d(
  2, conv1.out_channels, conv1.kernel_size, conv1.stride, conv1.padding,
  conv1.dilation, conv1.groups, conv1.bias)
print(model)

ResNet(
  (conv1): Conv2d(2, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [12]:
from google.colab import drive
drive.mount('/content/gdrive')

!ls '/content/gdrive/My Drive/Academia/OHSU/Proposal/Aim 2/Target Sets/resnet_graph_targets.txt'
!ls '/content/gdrive/My Drive/Academia/OHSU/Proposal/Aim 2/Node Features/node_features.txt'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
'/content/gdrive/My Drive/Academia/OHSU/Proposal/Aim 2/Target Sets/resnet_graph_targets.txt'
'/content/gdrive/My Drive/Academia/OHSU/Proposal/Aim 2/Node Features/node_features.txt'


In [13]:
import random
device = cuda0 = torch.device('cuda:0')
cpu = torch.device('cpu')

# from https://discuss.pytorch.org/t/how-to-define-train-mask-val-mask-test-mask-in-my-own-dataset/56289
features_fn = '/content/gdrive/My Drive/Academia/OHSU/Proposal/Aim 2/Node Features/node_features.txt'
graph_targets_fn = '/content/gdrive/My Drive/Academia/OHSU/Proposal/Aim 2/Target Sets/resnet_graph_targets.txt'

# magic numbers
INPUT_CHANNELS = 1
OUTPUT_CHANNELS = 51
BATCH_SIZE = 64
EPOCHS = 500 #set this to 200 - 2000
BENCHMARKING = False
random.seed = 88888888

In [14]:
# we'll need the target encoder from this code block... not sure if we need anything else
def build_resnet_datalist(node_features_fn, graph_targets_fn):
    feature_v = numpy.loadtxt(node_features_fn)
    target_v = numpy.loadtxt(graph_targets_fn,dtype=str,delimiter="\n")
    
    target_encoder = sklearn.preprocessing.LabelEncoder()
    target_v = target_encoder.fit_transform(target_v)

    data_list = []
    for row_idx in range(len(feature_v)):
      x = torch.tensor(feature_v[row_idx,:],dtype=torch.float)
      x = x.reshape(2,31,173)
      y = torch.tensor([target_v[row_idx]])
      data_list.append({'x':x,'y':y})

    return data_list

def build_reactome_graph_loader(data_list,batch_size):

    loader = DataLoader(data_list,batch_size=batch_size,shuffle=True)

    return loader

In [15]:
import numpy
import sklearn
from sklearn import preprocessing
from torch.utils.data import Dataset, DataLoader
data_list = build_resnet_datalist(features_fn, graph_targets_fn)
random.shuffle(data_list)

In [16]:
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.CrossEntropyLoss()

def train(loader,device):
  model.train()

  for batch in loader:  # Iterate in batches over the training dataset.
    x = batch['x'].to(device)
    y = batch['y'].to(device)
    out = model(x)  # Perform a single forward pass.
    y = torch.squeeze(y)
    loss = criterion(out, y)  # Compute the loss.
    loss.backward()  # Derive gradients.
    optimizer.step()  # Update parameters based on gradients.
    optimizer.zero_grad()  # Clear gradients.

def test(loader,device):
  model.eval()

  correct = 0
  for batch in loader:  # Iterate in batches over the training/test dataset.
    x = batch['x'].to(device)
    y = batch['y'].to(device)
    out = model(x)  # Perform a single forward pass.
    y = torch.squeeze(y)
    loss = criterion(out, y)  # Compute the loss.
    pred = out.argmax(dim=1)  # Use the class with highest probability.
    correct += int((pred == y).sum())  # Check against ground-truth labels.
  return correct / len(loader.dataset)  # Derive ratio of correct predictions.

In [17]:
acc_str = ''
fold = 'full_dataset'
fold_size = 911

#>>> train =              z[:fold_size * (fold - 1)] +         z[fold_size * fold:]
#train_data_list = data_list[:fold_size * (fold - 1)] + data_list[fold_size * fold:]
#>>> test =              z[fold_size * (fold - 1):fold_size * fold]
#test_data_list = data_list[fold_size * (fold - 1):fold_size * fold]
train_data_list = data_list

print(f'Number of training examples: {len(train_data_list)}')
#print(f'Number of test examples: {len(test_data_list)}')
train_data_loader = build_reactome_graph_loader(train_data_list,BATCH_SIZE)
#test_data_loader = build_reactome_graph_loader(test_data_list,BATCH_SIZE)
for epoch in range(EPOCHS):
  train(train_data_loader,device)
  train_acc = test(train_data_loader,device)
  #test_acc = test(test_data_loader,device)
  acc_str += f'{train_acc:.4f}'#',{test_acc:.4f}\n'
  print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}')#', Test Acc: {test_acc:.4f}')

training_acc_fn = F"resnet_classification_acc_fold_{fold}.txt"
path = F"/content/gdrive/My Drive/Academia/OHSU/Proposal/resnet_{training_acc_fn}"
with open(path, 'w') as writefile:
    writefile.write(acc_str)
model_save_name = F"trained_pytorch_model_fold_{fold}.pt"
path = F"/content/gdrive/My Drive/Academia/OHSU/Proposal/resnet_{model_save_name}" 
torch.save(model.state_dict(), path)
print(F"model saved as {path}")

Number of training examples: 9115
Epoch: 000, Train Acc: 0.8018
Epoch: 001, Train Acc: 0.8919
Epoch: 002, Train Acc: 0.9358
Epoch: 003, Train Acc: 0.9401
Epoch: 004, Train Acc: 0.9533
Epoch: 005, Train Acc: 0.9536
Epoch: 006, Train Acc: 0.9521
Epoch: 007, Train Acc: 0.9560
Epoch: 008, Train Acc: 0.9738
Epoch: 009, Train Acc: 0.9706
Epoch: 010, Train Acc: 0.9650
Epoch: 011, Train Acc: 0.9646
Epoch: 012, Train Acc: 0.9681
Epoch: 013, Train Acc: 0.9930
Epoch: 014, Train Acc: 0.9876
Epoch: 015, Train Acc: 0.9941
Epoch: 016, Train Acc: 0.9850
Epoch: 017, Train Acc: 0.9865
Epoch: 018, Train Acc: 0.9867
Epoch: 019, Train Acc: 0.9897
Epoch: 020, Train Acc: 0.9841
Epoch: 021, Train Acc: 0.9810
Epoch: 022, Train Acc: 0.9929
Epoch: 023, Train Acc: 0.9937
Epoch: 024, Train Acc: 0.9799
Epoch: 025, Train Acc: 0.9948
Epoch: 026, Train Acc: 0.9955
Epoch: 027, Train Acc: 0.9979
Epoch: 028, Train Acc: 0.9925
Epoch: 029, Train Acc: 0.9940
Epoch: 030, Train Acc: 0.9498
Epoch: 031, Train Acc: 0.9696
Epoch: