# Ref: https://github.com/mathcom/rapppid/blob/main/docs/infer.md

In [1]:
import os
from rapppid import infer

# Random Seed

In [2]:
seed = 8675309
infer.pl_seed.seed_everything(seed, workers=True)

Global seed set to 8675309


8675309

# Pretrained Model

In [3]:
# Path to the RAPPPID chkpt
ckpt_path = os.path.join('data', 'pretrained_weights', '1690837077.519848_red-dreamy', '1690837077.519848_red-dreamy.ckpt')
model = infer.load_chkpt(ckpt_path)

Applying weight drop of 0.3 to weight_hh_l0
Applying weight drop of 0.2 to weight
Applying weight drop of 0.2 to weight


# Tokenizer

In [4]:
# Path to the SentencePiece Model
model_file = os.path.join('data', 'pretrained_weights', '1690837077.519848_red-dreamy', 'spm.model')
spp = infer.sp.SentencePieceProcessor(model_file=model_file)

# Example

In [5]:
seqs = [
    'LVYTDCTESGQNLCLCEGSNVCGQGNKCILGSDGEKNQCVTGEGTPKPQSHNDGDFEEIPEEYLQ',
    'QVQLKQSGPGLVQPSQSLSITCTVSGFSLTNYGVHWVRQSPGKGLEWLGVIWSGGNTDYNTPFTSRLSINKDNSKSQVFFKMNSLQSNDTAIYYCARALTYYDYEFAYWGQGTLVTVSAASTKGPSVFPLAPSSKSTSGGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYICNVNHKPSNTKVDKRVEPKSPKSCDKTHTCPPCPAPELLGGP'
]

In [6]:
toks = infer.process_seqs(spp, seqs, 1500)

In [7]:
toks.shape

torch.Size([2, 1500])

In [8]:
out = infer.get_embeddings(model, toks)

In [9]:
embedding_one = out[0].unsqueeze(0)
embedding_two = out[1].unsqueeze(0)

In [10]:
prob = infer.predict(model, embedding_one, embedding_two).item()
print(prob)

0.07035128772258759
