### Imports

In [1]:
from tape import ProteinBertForValuePredictionFragmentationProsit
import numpy as np
from tape import TAPETokenizer
from prosittransformer.DataHandler import pad_sequences
import torch

In [2]:
from prosittransformer.utils import SequenceConverter

### Data pre-processors

##### Charge pre-processors

In [3]:
CHARGES = [1, 2, 3, 4, 5, 6]
def get_precursor_charge_onehot(charges):
    array = np.zeros([len(charges), max(CHARGES)], dtype=int)
    for i, precursor_charge in enumerate(charges):
        array[i, precursor_charge - 1] = 1
    return array



##### Peptide pre-processors

In [4]:
tokenizer = TAPETokenizer(vocab='iupac')
def TokenizePeptides(peptides):
    #input_ids = pad_sequences([tokenizer.encode(p) for p in peptides])
    
    input_ids = [tokenizer.encode(p) for p in peptides]
    input_mask = np.ones_like(input_ids)
    
    input_ids = pad_sequences(input_ids, 0)
    input_mask = pad_sequences(input_mask, 0)
        
    return input_ids, input_mask

### Toy data

In [5]:
collision_energy, charge, peptide_sequences = np.hstack([0.3170]), get_precursor_charge_onehot([3]), ["INIDHKFHRHL"]

In [6]:
input_ids, input_mask = TokenizePeptides(peptide_sequences)

#token_ids = tokenizer.encode(peptide)
#input_mask = np.ones_like(token_ids)

In [7]:
toy_data = {
    'collision_energy': torch.FloatTensor(collision_energy.astype(np.float32)),
    'charge': torch.FloatTensor(charge.astype(np.float32)),
    'input_ids' : torch.from_numpy(input_ids.astype(np.int64)),
    'input_mask' : torch.from_numpy(input_mask.astype(np.int64))
        }

### Get model

In [283]:
!ls /sdd/berzelius/final_results/

delta_0.0   delta_0.1	delta_0.19  delta_0.24.zip  delta_0.34	delta_1.0
delta_0.05  delta_0.15	delta_0.24  delta_0.29	    delta_0.72


In [284]:
!ls /sdd/berzelius/torch_model

args.json  checkpoint.bin  config.json	log  pytorch_model.bin


In [8]:
model = ProteinBertForValuePredictionFragmentationProsit.from_pretrained("/sdd/berzelius/torch_model")


### Load GPU

In [9]:
model = model.to(torch.device('cuda:0'))
toy_data = {name: tensor.cuda(device=torch.device('cuda:0'), non_blocking=True)
                     for name, tensor in toy_data.items()}

### Predict

In [10]:
toy_data

{'collision_energy': tensor([0.3170], device='cuda:0'),
 'charge': tensor([[0., 0., 1., 0., 0., 0.]], device='cuda:0'),
 'input_ids': tensor([[ 2, 13, 17, 13,  8, 12, 14, 10, 12, 21, 12, 15,  3,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]],
        device='cuda:0'),
 'input_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0')}

In [11]:
x = model(**toy_data)[0].cpu().detach().numpy()

torch.Size([1, 32, 768])


In [13]:
_, x = cleanTapeOutput().getIntensitiesAndSpectralAngle(x, x, charge, 
                                                        input_ids, start_stop_token=True)

In [15]:
x

array([[ 5.9691880e-02,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  9.8905250e-02,  0.0000000e+00,
         0.0000000e+00,  2.1635844e-01,  0.0000000e+00,  0.0000000e+00,
         9.4094940e-02,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  2.0559675e-01,  2.6962804e-04,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         3.7001500e-01,  5.3603617e-03,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  1.6010933e-01,  5.4949373e-02,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         3.8084365e-02,  5.2488464e-01,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  4.2967413e-02,  3.3757457e-01,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00,
         0.0000000e+00,  3.7672037e-01,  6.2140974e-04,  0.0000000e+00,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  1.00000