In [1]:
# How to work with msgpack outputs
from anarcii import Anarcii

# Lets create a msgpack file by entering batch mode (set max_seqs_len to 2)
model = Anarcii(
    seq_type="antibody",
    mode="accuracy",
    max_seqs_len=2,
    ncpu=1
)

seq = "./example_data/monoclonals_clean.fasta"

# Results is the path to the msgpack file.
results = model.number(seq)

Using device CUDA with 1 CPUs

 Serialising output to anarcii-7a357575-3f3b-45d7-82c9-abe13ceb4c98-imgt.msgpack as the number of sequences exceeds the serialisation limit of 2.



In [2]:
# A new msgpack file will be created in the current directory
# This can be opened with the from_msgpakc_map function
from anarcii.utils import from_msgpack_map

gen_object = from_msgpack_map(results)

# This is a generator object that can be iterated over
dt = next(gen_object)
print(dt)

{'sp|P01629|KV2A4_MOUSE Ig kappa chain V-II region 2S1.3 OS=Mus musculus OX=10090 PE=1 SV=1': {'numbering': (((1, ' '), 'D'), ((2, ' '), 'I'), ((3, ' '), 'V'), ((4, ' '), 'M'), ((5, ' '), 'T'), ((6, ' '), 'Q'), ((7, ' '), 'A'), ((8, ' '), 'A'), ((9, ' '), 'F'), ((10, ' '), 'S'), ((11, ' '), 'N'), ((12, ' '), 'P'), ((13, ' '), 'V'), ((14, ' '), 'T'), ((15, ' '), 'L'), ((16, ' '), 'G'), ((17, ' '), 'T'), ((18, ' '), 'S'), ((19, ' '), 'A'), ((20, ' '), 'S'), ((21, ' '), 'F'), ((22, ' '), 'S'), ((23, ' '), 'C'), ((24, ' '), 'R'), ((25, ' '), 'S'), ((26, ' '), 'S'), ((27, ' '), 'K'), ((28, ' '), 'S'), ((29, ' '), 'L'), ((30, ' '), 'Q'), ((31, ' '), 'Q'), ((32, ' '), 'S'), ((33, ' '), '-'), ((34, ' '), 'K'), ((35, ' '), 'G'), ((36, ' '), 'I'), ((37, ' '), 'T'), ((38, ' '), 'Y'), ((39, ' '), 'L'), ((40, ' '), 'Y'), ((41, ' '), 'W'), ((42, ' '), 'Y'), ((43, ' '), 'L'), ((44, ' '), 'Q'), ((45, ' '), 'K'), ((46, ' '), 'P'), ((47, ' '), 'G'), ((48, ' '), 'Q'), ((49, ' '), 'S'), ((50, ' '), 'P'), 

In [3]:
# Or you can iterate over the generator object

# Must reload
gen_object = from_msgpack_map(results)
for i in gen_object:
    print(i)

{'sp|P01629|KV2A4_MOUSE Ig kappa chain V-II region 2S1.3 OS=Mus musculus OX=10090 PE=1 SV=1': {'numbering': (((1, ' '), 'D'), ((2, ' '), 'I'), ((3, ' '), 'V'), ((4, ' '), 'M'), ((5, ' '), 'T'), ((6, ' '), 'Q'), ((7, ' '), 'A'), ((8, ' '), 'A'), ((9, ' '), 'F'), ((10, ' '), 'S'), ((11, ' '), 'N'), ((12, ' '), 'P'), ((13, ' '), 'V'), ((14, ' '), 'T'), ((15, ' '), 'L'), ((16, ' '), 'G'), ((17, ' '), 'T'), ((18, ' '), 'S'), ((19, ' '), 'A'), ((20, ' '), 'S'), ((21, ' '), 'F'), ((22, ' '), 'S'), ((23, ' '), 'C'), ((24, ' '), 'R'), ((25, ' '), 'S'), ((26, ' '), 'S'), ((27, ' '), 'K'), ((28, ' '), 'S'), ((29, ' '), 'L'), ((30, ' '), 'Q'), ((31, ' '), 'Q'), ((32, ' '), 'S'), ((33, ' '), '-'), ((34, ' '), 'K'), ((35, ' '), 'G'), ((36, ' '), 'I'), ((37, ' '), 'T'), ((38, ' '), 'Y'), ((39, ' '), 'L'), ((40, ' '), 'Y'), ((41, ' '), 'W'), ((42, ' '), 'Y'), ((43, ' '), 'L'), ((44, ' '), 'Q'), ((45, ' '), 'K'), ((46, ' '), 'P'), ((47, ' '), 'G'), ((48, ' '), 'Q'), ((49, ' '), 'S'), ((50, ' '), 'P'), 

In [4]:
# If the msgpack file is huge then only a predefined batch size will be loaded

gen_object = from_msgpack_map(results, chunk_size=1)
for i in gen_object:
    print(i)

{'sp|P01629|KV2A4_MOUSE Ig kappa chain V-II region 2S1.3 OS=Mus musculus OX=10090 PE=1 SV=1': {'numbering': (((1, ' '), 'D'), ((2, ' '), 'I'), ((3, ' '), 'V'), ((4, ' '), 'M'), ((5, ' '), 'T'), ((6, ' '), 'Q'), ((7, ' '), 'A'), ((8, ' '), 'A'), ((9, ' '), 'F'), ((10, ' '), 'S'), ((11, ' '), 'N'), ((12, ' '), 'P'), ((13, ' '), 'V'), ((14, ' '), 'T'), ((15, ' '), 'L'), ((16, ' '), 'G'), ((17, ' '), 'T'), ((18, ' '), 'S'), ((19, ' '), 'A'), ((20, ' '), 'S'), ((21, ' '), 'F'), ((22, ' '), 'S'), ((23, ' '), 'C'), ((24, ' '), 'R'), ((25, ' '), 'S'), ((26, ' '), 'S'), ((27, ' '), 'K'), ((28, ' '), 'S'), ((29, ' '), 'L'), ((30, ' '), 'Q'), ((31, ' '), 'Q'), ((32, ' '), 'S'), ((33, ' '), '-'), ((34, ' '), 'K'), ((35, ' '), 'G'), ((36, ' '), 'I'), ((37, ' '), 'T'), ((38, ' '), 'Y'), ((39, ' '), 'L'), ((40, ' '), 'Y'), ((41, ' '), 'W'), ((42, ' '), 'Y'), ((43, ' '), 'L'), ((44, ' '), 'Q'), ((45, ' '), 'K'), ((46, ' '), 'P'), ((47, ' '), 'G'), ((48, ' '), 'Q'), ((49, ' '), 'S'), ((50, ' '), 'P'), 

In [5]:
# Even if the number of seqs does not exceed batch size - you can still output to msgpack
from anarcii import Anarcii

model = Anarcii()
seq = "./example_data/monoclonals_clean.fasta"
results = model.number(seq)
model.to_msgpack("tmp/test.msgpack")

Using device CUDA with 12 CPUs
Last output saved to tmp/test.msgpack in scheme: None.


In [6]:
# You can convert to an alt number scheme.
kab = model.to_scheme("kabat")
model.to_msgpack("tmp/test_kabat.msgpack")

Last output converted to kabat 

Last output saved to tmp/test_kabat.msgpack in scheme: kabat.
