In [22]:
%matplotlib inline
import os
import os.path
from tqdm import tqdm
from sklearn.neighbors import KDTree
from sklearn.decomposition import PCA
import time
import copy
import pandas as pd
import numpy as np
from random import seed
from random import randint
import random
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from PIL import Image
from matplotlib import pyplot as plt
from sklearn.metrics import fbeta_score, precision_recall_fscore_support, multilabel_confusion_matrix
from tqdm import tqdm_notebook as tqdm
import warnings
warnings.filterwarnings("ignore")
import csv
def read_dict(path):
    'Reads Python dictionary stored in a csv file'
    dictionary = {}
    for key, val in csv.reader(open(path)):
        dictionary[key] = val
    return dictionary

torch.set_num_threads(5)

scaling_weights = True
p = 0
v_size = 32
flips = (0.2, 0.2, 0.2)
max_radius = 40
shuffle = False
noise_treatment = False
period_checkpoint = 50
# current_file_name = os.path.basename(__file__)[:-3]
PERCENTILE = 99.7
DISABLE_TQDM = True
precomputed_path = '../files/precomputed/'
PDB_path = '../files/PDB/'
train_dir = '../pictures/train/'


In [23]:
"""-----------------------Parameters Change--------------------------------"""

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
lr = 0.00007
output_dim = 0
# 'hydropathy', 'charge', 'isoelectric'
weights = []
batch_size = 50
max_epochs = 20
type_of_dataset = '1'

"""-----------------------Parameters Changed-------------------------------"""

if type_of_dataset == '1':
    output_dim = 932
    d = 'bp'
if type_of_dataset == '2':
    output_dim = 439
    d = 'cc'
if type_of_dataset == '3':
    output_dim = 589
    d = 'mf'

if len(weights) == 0:
    input_channels = 1
    name = 'None'
else:
    input_channels = 3
    name = 'hci'
    
features_save = 'best_features/3d/features_{}_{}.pkl'.format(d, name)
labels_csv = '../labels/struct/new_{}.csv'.format(type_of_dataset)
weights_path = 'Weights/3d/weights_3d_{}_{}.pth'.format(d, name)
input_shape = (input_channels, 32, 32, 32)
num_features = input_channels * 256
print(device)
n_channels = 1 + len(weights)
stddev_conv3d = np.sqrt(2.0/(n_channels))

cuda:2


In [24]:
acc_pdb = read_dict('../datasets/new_dataset_pdb_{}.csv'.format(type_of_dataset))

df = pd.read_csv('../datasets/new_{}.csv'.format(type_of_dataset))
acc_labels = []
for idx,row in df.iterrows():
    temp = list(int(i) for i in row[1].strip().split(','))
    acc_labels.append([row[0],np.array(temp)])
    
# acc_labels = np.array(acc_labels)
    
train_acc_labels = acc_labels[:8500]
val_acc_labels = acc_labels[8500:]
train_df = pd.DataFrame(train_acc_labels)
val_df = pd.DataFrame(val_acc_labels)
print(f"Validation_Data Length: {len(val_df)}\nTrain_Data Length: {len(train_df)}")
print(f"Label type: {type(train_df.iloc[0,1])}\tEach Label type: {type(train_df.iloc[0,1][0])}")
print(f"Example of accession_no: {train_df.iloc[0,0]}")

Validation_Data Length: 2598
Train_Data Length: 8500
Label type: <class 'numpy.ndarray'>	Each Label type: <class 'numpy.int64'>
Example of accession_no: A0A0H2VDN9


In [25]:
data_transforms = transforms.Compose([
        transforms.ToTensor(),
    ])

In [26]:
from torch.utils import data
class VolumeData(data.Dataset):
    def __init__(self,df,dirpath,transform,test = False):
        
        self.df = df
        self.directory_precomputed = precomputed_path
        self.directory_pdb = PDB_path
        self.flips = flips
        self.acc_labels = df.values.tolist()
        self.acc_pdb = acc_pdb
        self.max_radius = max_radius
        self.noise_treatment = noise_treatment
        self.n_channels = max(1, len(weights))
        self.p = p
        self.scaling_weights = scaling_weights
        self.shuffle = shuffle
        self.v_size = v_size
        self.weights = weights
        self.on_epoch_end()

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
#         acc = list(self.acc_pdb)
#         pdb_id = self.acc_pdb[acc[idx]]
        X, y = self.__data_augmentation(idx)
        
        label_tensor = torch.zeros((1, output_dim))
        z = torch.from_numpy(y)
        for j,ele in enumerate(z):
            label_tensor[0, j] = ele
        image_label = torch.tensor(label_tensor,dtype= torch.float32)
        
        #   convert X to tensor
        
        return (X, image_label.squeeze())
    
    def __data_augmentation(self, idx):
        'Returns augmented data with batch_size enzymes' # X : (v_size, v_size, v_size, n_channels)
        # Initialization
        X = np.empty((self.n_channels,
                      self.v_size,
                      self.v_size,
                      self.v_size))
        try:
                   
            y = np.empty(len(self.acc_labels[idx][1]), dtype=int)

            # Computations
            if len(self.acc_labels[idx]) != 2:
                print(len(self.labels[idx]), end = ' ')
            y = self.acc_labels[idx][1]
            pdb_id = self.acc_pdb[self.acc_labels[idx][0]]

            # Load precomputed coordinates
            coords = load_coords(pdb_id, self.p, self.directory_precomputed)
            coords = coords_center_to_zero(coords)
            coords = adjust_size(coords, v_size=self.v_size, max_radius=self.max_radius)

            # Get weights
            local_weights = []
            for weight in self.weights:
                local_weight = load_weights(pdb_id, weight, self.p,
                                            self.scaling_weights, self.directory_precomputed) # Compute extended weights
                local_weights += [local_weight] # Store
                

            # PCA
            coords = PCA(n_components=3).fit_transform(coords)

            # Do flip
            coords_temp = flip_around_axis(coords, axis=self.flips)

            if len(self.weights) == 0:
                # Convert to volume and store
                X[0, :, :, :] = coords_to_volume(coords_temp, self.v_size,
                                                    noise_treatment=self.noise_treatment)

            else:
                # Compute to weights of volume and store
                for k in range(self.n_channels):
                    X[k, :, :, :] = weights_to_volume(coords_temp, local_weights[k],
                                                         self.v_size, noise_treatment=self.noise_treatment)

            return X, np.array(y)
        except:
            print(idx)
    
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.acc_pdb))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
    
def load_coords(pdb_id, desired_p, source_path):
    'Loads precomputed coordinates'
    return np.load(precomputed_name(pdb_id, source_path, 'coords', desired_p))
    
def coords_center_to_zero(coords):
    'Centering coordinates on [0,0,0]'
    barycenter = get_barycenter(coords)
    return coords - np.full((coords.shape[0], 3), barycenter)

def adjust_size(coords, v_size=32, max_radius=40):
    return np.multiply((v_size/2-1)/max_radius, coords)

def load_weights(pdb_id, weights_name, desired_p, scaling, source_path):
    'Loads precomputed weights'
    return np.load(precomputed_name(pdb_id, source_path, 'weights', desired_p, weights_name, scaling))

def flip_around_axis(coords, axis=(0.2, 0.2, 0.2)):
    'Flips coordinates randomly w.r.t. each axis with its associated probability'
    for col in range(3):
        if np.random.binomial(1, axis[col]):
            coords[:,col] = np.negative(coords[:,col])
    return coords

def coords_to_volume(coords, v_size, noise_treatment=False):
    'Converts coordinates to binary voxels' # Input is centered on [0,0,0]
    return weights_to_volume(coords=coords, weights=1, v_size=v_size, noise_treatment=noise_treatment)

def weights_to_volume(coords, weights, v_size, noise_treatment=False):
    'Converts coordinates to voxels with weights' # Input is centered on [0,0,0]
    # Initialization
    volume = np.zeros((v_size, v_size, v_size))

    # Translate center
    coords = coords + np.full((coords.shape[0], 3), (v_size-1)/2)

    # Round components
    coords = coords.astype(int)

    # Filter rows with values that are out of the grid
    mask = ((coords >= 0) & (coords < v_size)).all(axis=1)

    # Convert to volume
    volume[tuple(coords[mask].T)] = weights[mask] if type(weights) != int else weights

    # Remove noise
    if noise_treatment == True:
        volume = remove_noise(coords, volume)

    return volume

def precomputed_name(pdb_id, path, type_file, desired_p, weights_name=None, scaling=True):
    'Returns path in string of precomputed file'
    if type_file == 'coords':
        return os.path.join(path, pdb_id.lower() + '_coords_p' + str(desired_p) + '.npy')
    elif type_file == 'weights':
        return os.path.join(path, pdb_id.lower() + '_' + weights_name + '_p' + str(desired_p) + '_scaling' + str(scaling) + '.npy')
    
def get_barycenter(coords):
    'Gets barycenter point of a Nx3 matrix'
    return np.array([np.mean(coords, axis=0)])
            


In [27]:
# Train dataset
train_dataset = VolumeData(train_df,train_dir,data_transforms)
train_loader = data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=False)

# validation dataset
val_dataset = VolumeData(val_df,train_dir,data_transforms)
val_loader = data.DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=False)

dataloaders_dict = {'train':train_loader, 'val':val_loader}

In [28]:
features, labels = next(iter(train_loader))
print(f'Train Features: {features.shape}\nTrain Labels: {labels.shape}')
print()
features, labels = next(iter(val_loader))
print(f'Validation Features: {features.shape}\nValidation Labels: {labels.shape}')
print()

Train Features: torch.Size([50, 1, 32, 32, 32])
Train Labels: torch.Size([50, 932])

Validation Features: torch.Size([50, 1, 32, 32, 32])
Validation Labels: torch.Size([50, 932])



In [29]:
print("TRAINING")
print("training examples: ",len(train_dataset))
print("batch size: ",batch_size)
print("batches available: ",len(train_loader))
print()
print("VALIDATION")
print("validation examples: ",len(val_dataset))
print("batch size: ",batch_size)
print("batches available: ",len(val_loader))
print()

TRAINING
training examples:  8500
batch size:  50
batches available:  170

VALIDATION
validation examples:  2598
batch size:  50
batches available:  52



In [30]:
#DEFINE NETWORK

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.init as init

class _3d_cnn(nn.Module):
    def __init__(self, input_shape, output_dim):
        """
        :param input_shape: input image shape, (h, w, c)
        """
        super(_3d_cnn, self).__init__()

        self.features = nn.Sequential(
            nn.Conv3d(input_channels,  16, (5, 1, 3), stride=(1, 1, 1)),
            nn.PReLU(),
            nn.Conv3d(16, 16, (1, 9, 3), stride=(1, 2, 1)),
            nn.PReLU(),
            nn.MaxPool3d((2, 1, 1), stride=(2, 1, 1)),

            nn.Conv3d(16, 32, kernel_size=(4, 1, 3), stride=(1, 1, 1)),
            nn.PReLU(),
            nn.Conv3d(32, 32, kernel_size=(1, 8, 3), stride=(1, 2, 1)),
            nn.PReLU(),
            nn.MaxPool3d((2, 1, 1), stride=(2, 1, 1)),
            
            nn.AvgPool3d(2)
        )

        # Compute number of input features for the last fully-connected layer
        input_shape = (1,) + input_shape
        x = Variable(torch.rand(input_shape), requires_grad=False)
        x = self.features(x)
        x = Flatten()(x)
        self.n = x.size()[1]
        
        print(num_features)
        self.fc1 = nn.Linear(self.n, num_features)
        self.fc2 = nn.Linear(num_features, output_dim)
        self.bn = nn.BatchNorm1d(self.n)
        self.bn2 = nn.BatchNorm1d(num_features)

    def forward(self, x):
        x = self.features(x)
        x = Flatten()(x)
        x = self.bn(x)
        x = self.bn2(F.relu(self.fc1(x)))
        x = F.sigmoid(self.fc2(x))
        return x


class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)
    
class AvgPool(nn.Module):
    def forward(self, x):
        return F.avg_pool3d(x, x.shape[2:])

In [31]:
temp = _3d_cnn(input_shape, output_dim)

temp(torch.zeros(4,input_channels,32,32,32))

256


tensor([[0.5119, 0.4894, 0.5012,  ..., 0.5068, 0.5109, 0.5142],
        [0.5119, 0.4894, 0.5012,  ..., 0.5068, 0.5109, 0.5142],
        [0.5119, 0.4894, 0.5012,  ..., 0.5068, 0.5109, 0.5142],
        [0.5119, 0.4894, 0.5012,  ..., 0.5068, 0.5109, 0.5142]],
       grad_fn=<SigmoidBackward>)

In [32]:
NeuralNet = _3d_cnn(input_shape, output_dim)
NeuralNet

256


_3d_cnn(
  (features): Sequential(
    (0): Conv3d(1, 16, kernel_size=(5, 1, 3), stride=(1, 1, 1))
    (1): PReLU(num_parameters=1)
    (2): Conv3d(16, 16, kernel_size=(1, 9, 3), stride=(1, 2, 1))
    (3): PReLU(num_parameters=1)
    (4): MaxPool3d(kernel_size=(2, 1, 1), stride=(2, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (5): Conv3d(16, 32, kernel_size=(4, 1, 3), stride=(1, 1, 1))
    (6): PReLU(num_parameters=1)
    (7): Conv3d(32, 32, kernel_size=(1, 8, 3), stride=(1, 2, 1))
    (8): PReLU(num_parameters=1)
    (9): MaxPool3d(kernel_size=(2, 1, 1), stride=(2, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (10): AvgPool3d(kernel_size=2, stride=2, padding=0)
  )
  (fc1): Linear(in_features=768, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=932, bias=True)
  (bn): BatchNorm1d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [33]:
total_params = sum(p.numel() for p in NeuralNet.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(p.numel() for p in NeuralNet.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')

476,408 total parameters.
476,408 training parameters.


In [34]:
NeuralNet = NeuralNet.to(device)
optimizer = optim.Adam(NeuralNet.parameters(),lr = lr)
loss_func = torch.nn.BCEWithLogitsLoss()
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience = 2)
best_loss = np.inf
best_f_score = np.inf
best_precision = np.inf
best_recall = np.inf

In [35]:
prec = []
recall = []
f = []
losss = []
val_f = []
val_loss = []
val_prec = []
val_recall = []

def store(phase,p,r,fs,l):
    if phase == 'train':
        prec.append(p)
        recall.append(r)
        f.append(fs)
        losss.append(l)
    else:
        val_prec.append(p)
        val_recall.append(r)
        val_f.append(fs)
        val_loss.append(l)
        
def calc(l,f,p,r,length):
    loss = l/length
    pre = p/length
    fs = f/length
    re = r/length
    
    return loss,fs,pre,re

def result(epoch, NUM_EPOCHS,phase,epoch_loss,epoch_f_loss,epoch_precision,epoch_recall,elapsed_time):
    print("\tPhase: {}\n\t\t Epoch: {}/{} | {}_loss:{:.8f} | f_score:{:.8f} | precision:{:.8f} | recall:{:.8f} | Time: {:.4f}s".format(phase,
                                                                              epoch+1,
                                                                              NUM_EPOCHS,
                                                                              phase,
                                                                              epoch_loss,
                                                                              epoch_f_score,
                                                                              epoch_precision,
                                                                              epoch_recall,
                                                                              elapsed_time))

In [None]:
for epoch in range(max_epochs):
    for phase in ['train', 'val']:
        start_time = time.time()
        if phase == 'train':
            NeuralNet.train()
        else:
            NeuralNet.eval()

        running_loss = 0.0
        running_f_score = 0.0
        running_precision = 0.0
        running_recall = 0.0

        for images_batch, labels_batch in tqdm(dataloaders_dict[phase],disable = DISABLE_TQDM):
            images_batch = images_batch.to(device, dtype=torch.float)
            labels_batch = labels_batch.to(device, dtype=torch.float)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                pred_batch = NeuralNet(images_batch)
                _, preds = torch.max(pred_batch.data, 1)
                loss = loss_func(pred_batch,labels_batch)
            if phase == 'train':
                loss.backward()
                optimizer.step()

            labels_cpu = labels_batch.cpu().detach().numpy()
            pred_cpu = pred_batch.cpu().detach().numpy()

#             print(metrics.multilabel_confusion_matrix(labels_cpu, pred_cpu>0.5, samplewise = True))

            temp_precision, temp_recall, temp_f_score, _ = precision_recall_fscore_support(
                                                                labels_cpu, pred_cpu > 0.1, beta=0.5, average='samples')

            running_loss += loss.item() * images_batch.size(0)
            running_precision += (temp_precision * len(images_batch))
            running_recall += (temp_recall * len(images_batch))
            running_f_score += (temp_f_score * len(images_batch))

        epoch_loss = running_loss / len(dataloaders_dict[phase].dataset)
        epoch_f_score = running_f_score / len(dataloaders_dict[phase].dataset)
        epoch_precision = running_precision / len(dataloaders_dict[phase].dataset)
        epoch_recall = running_recall / len(dataloaders_dict[phase].dataset)
        
        store(phase,epoch_precision,epoch_recall,epoch_f_score,epoch_loss)

        if phase == 'val' and epoch_f_score < best_f_score:
#             print("model val_loss Improved from {:.8f} to {:.8f}".format(best_loss,epoch_loss))
            best_f_score = epoch_f_score
            best_precision = epoch_precision
            best_recall = epoch_recall
            best_loss = epoch_loss
            best_model_wts = copy.deepcopy(NeuralNet.state_dict())
            torch.save(NeuralNet.state_dict(), weights_path)

        if phase == 'val':
            scheduler.step(epoch_loss)

        elapsed_time = time.time()-start_time
        result(epoch, max_epochs,phase,epoch_loss,epoch_f_score,epoch_precision,epoch_recall,elapsed_time)


	Phase: train
		 Epoch: 1/20 | train_loss:0.95076383 | f_score:0.05269638 | precision:0.04299893 | recall:0.99987604 | Time: 173.0518s
	Phase: val
		 Epoch: 1/20 | val_loss:0.94571218 | f_score:0.04712442 | precision:0.03833212 | recall:0.99972771 | Time: 52.9640s
	Phase: train
		 Epoch: 2/20 | train_loss:0.93145226 | f_score:0.05267794 | precision:0.04298398 | recall:0.99936603 | Time: 177.3520s
	Phase: val
		 Epoch: 2/20 | val_loss:0.89155553 | f_score:0.04711088 | precision:0.03832121 | recall:0.99916946 | Time: 49.9496s
	Phase: train
		 Epoch: 3/20 | train_loss:0.86301625 | f_score:0.05532030 | precision:0.04523402 | recall:0.98762165 | Time: 173.0790s
	Phase: val
		 Epoch: 3/20 | val_loss:0.83541274 | f_score:0.04773537 | precision:0.03885347 | recall:0.99578422 | Time: 48.0856s
	Phase: train
		 Epoch: 4/20 | train_loss:0.80467238 | f_score:0.09877851 | precision:0.08544646 | recall:0.85028039 | Time: 175.6294s
	Phase: val
		 Epoch: 4/20 | val_loss:0.77576311 | f_score:0.11116892 

In [36]:
NeuralNet.load_state_dict(torch.load(weights_path))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [37]:
#DEFINE NETWORK

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.init as init

class _3d_cnn(nn.Module):
    def __init__(self, input_shape, output_dim):
        """
        :param input_shape: input image shape, (h, w, c)
        """
        super(_3d_cnn, self).__init__()

        self.features = nn.Sequential(
            nn.Conv3d(input_channels,  16, (5, 1, 3), stride=(1, 1, 1)),
            nn.PReLU(),
            nn.Conv3d(16, 16, (1, 9, 3), stride=(1, 2, 1)),
            nn.PReLU(),
            nn.MaxPool3d((2, 1, 1), stride=(2, 1, 1)),

            nn.Conv3d(16, 32, kernel_size=(4, 1, 3), stride=(1, 1, 1)),
            nn.PReLU(),
            nn.Conv3d(32, 32, kernel_size=(1, 8, 3), stride=(1, 2, 1)),
            nn.PReLU(),
            nn.MaxPool3d((2, 1, 1), stride=(2, 1, 1)),
            
            nn.AvgPool3d(2)
            
        )
        
        input_shape = (1,) + input_shape
        x = Variable(torch.rand(input_shape), requires_grad=False)
        x = self.features(x)
        x = Flatten()(x)
        self.n = x.size()[1]
        print(num_features)
        
        
        self.fc1 = nn.Linear(self.n, num_features)
        self.fc2 = nn.Linear(num_features, output_dim)
        self.bn = nn.BatchNorm1d(self.n)
        self.bn2 = nn.BatchNorm1d(num_features)

    def forward(self, x):
        x = self.features(x)
        x = Flatten()(x)
        x = self.fc1(x)
        return x

newmodel = _3d_cnn(input_shape, output_dim)
newmodel.load_state_dict(torch.load(weights_path))

256


IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [38]:
newmodel

_3d_cnn(
  (features): Sequential(
    (0): Conv3d(1, 16, kernel_size=(5, 1, 3), stride=(1, 1, 1))
    (1): PReLU(num_parameters=1)
    (2): Conv3d(16, 16, kernel_size=(1, 9, 3), stride=(1, 2, 1))
    (3): PReLU(num_parameters=1)
    (4): MaxPool3d(kernel_size=(2, 1, 1), stride=(2, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (5): Conv3d(16, 32, kernel_size=(4, 1, 3), stride=(1, 1, 1))
    (6): PReLU(num_parameters=1)
    (7): Conv3d(32, 32, kernel_size=(1, 8, 3), stride=(1, 2, 1))
    (8): PReLU(num_parameters=1)
    (9): MaxPool3d(kernel_size=(2, 1, 1), stride=(2, 1, 1), padding=0, dilation=1, ceil_mode=False)
    (10): AvgPool3d(kernel_size=2, stride=2, padding=0)
  )
  (fc1): Linear(in_features=768, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=932, bias=True)
  (bn): BatchNorm1d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [39]:
def get_image_tensor(pdb_id):
    X = np.empty((max(1,len(weights)),
                      v_size,
                      v_size,
                      v_size))

    coords = load_coords(pdb_id, p, precomputed_path)
    coords = coords_center_to_zero(coords)
    coords = adjust_size(coords, v_size=v_size, max_radius=max_radius)

    # Get weights
    local_weights = []
    for weight in weights:
        local_weight = load_weights(pdb_id, weight, p,
                                    scaling_weights, precomputed_path) # Compute extended weights
        local_weights += [local_weight] # Store

    # PCA
    coords = PCA(n_components=3).fit_transform(coords)

    # Do flip
    coords_temp = flip_around_axis(coords, axis=flips)

    if len(weights) == 0:
        # Convert to volume and store
        X[0, :, :, :] = coords_to_volume(coords_temp, v_size,
                                            noise_treatment=noise_treatment)

    else:
        # Compute to weights of volume and store
        for k in range(max(1,len(weights))):
            X[k, :, :, :] = weights_to_volume(coords_temp, local_weights[k],
                                                 v_size, noise_treatment=noise_treatment)

    return X

In [40]:
features = []
accession = []
temp = list(acc_pdb)
newmodel.to(device)

for acc in temp[1:]:
    pdb = acc_pdb[acc]
    img_numpy = get_image_tensor(pdb)
    img_tensor = torch.tensor(img_numpy)
    img_tensor = img_tensor.unsqueeze(0)
    img_tensor = img_tensor.to(device, dtype=torch.float)
    pred = newmodel(img_tensor)
    temp = pred.cpu().detach().numpy()
    features.append(temp)
    accession.append(acc)
res_df = pd.DataFrame({'accession': acc,'features':features})

res_df.to_pickle(features_save)

In [41]:
print(len(features[1][0]))

256
