In [25]:
from anarcii import Anarcii

model = Anarcii(
    seq_type="antibody",
    batch_size=128,
    cpu=False,
    ncpu=1,
    mode="accuracy",
    verbose=False,
)

seq = "SYVLTQPPSVSVAPGKTARITCGGNNIGSKSVHWYQQKPGQAPVLVVYDDSDRPSGIPERFSGSNSGNTATLTISRVEAGDEADYFCQVWDGSGDHPGYVFGTGTKVTVL"

results = model.number(seq)

Using device CUDA with 1 CPUs


In [26]:
schemes = ["martin","kabat","martin","chothia","imgt", "aho"]

for sch in schemes:
    res = model.to_scheme(sch)
    print(res['Sequence']['numbering'][-24:])

Last output converted to martin
[((88, ' '), 'C'), ((89, ' '), 'Q'), ((90, ' '), 'V'), ((91, ' '), 'W'), ((92, ' '), 'D'), ((93, ' '), 'G'), ((94, ' '), 'S'), ((95, ' '), 'G'), ((95, 'A'), 'D'), ((95, 'B'), 'H'), ((95, 'C'), 'P'), ((95, 'D'), 'G'), ((96, ' '), 'Y'), ((97, ' '), 'V'), ((98, ' '), 'F'), ((99, ' '), 'G'), ((100, ' '), 'T'), ((101, ' '), 'G'), ((102, ' '), 'T'), ((103, ' '), 'K'), ((104, ' '), 'V'), ((105, ' '), 'T'), ((106, ' '), 'V'), ((107, ' '), 'L')]
Last output converted to kabat
[((88, ' '), 'C'), ((89, ' '), 'Q'), ((90, ' '), 'V'), ((91, ' '), 'W'), ((92, ' '), 'D'), ((93, ' '), 'G'), ((94, ' '), 'S'), ((95, ' '), 'G'), ((95, 'A'), 'D'), ((95, 'B'), 'H'), ((95, 'C'), 'P'), ((95, 'D'), 'G'), ((96, ' '), 'Y'), ((97, ' '), 'V'), ((98, ' '), 'F'), ((99, ' '), 'G'), ((100, ' '), 'T'), ((101, ' '), 'G'), ((102, ' '), 'T'), ((103, ' '), 'K'), ((104, ' '), 'V'), ((105, ' '), 'T'), ((106, ' '), 'V'), ((107, ' '), 'L')]
Last output converted to martin
[((88, ' '), 'C'), ((89

In [27]:
import string

# We can create vectors for all possible number for each scheme.
# This is used to ensure each csv header has a ground truth.

# IMGT
# All upper case letters, then all upper case letters doubled, then a space.
alphabet = (
    # All upper case letters.
    list(string.ascii_uppercase)
    # All upper case letters, doubled.
    + [2 * letter for letter in string.ascii_uppercase]
)

# Allowed and forbidden IMGT instertion
full_cdrs = list(range(27, 39)) + list(range(56, 66)) + list(range(105, 118))

cdr_instertion_starts = [32, 60, 111]
cdr_instertion_ends = [33, 61, 112]


allowed_non_cdr_instertions = [x for x in range(1, 129) if x not in full_cdrs]

# Create a list which we will modify
all_imgt_nums = [x for x in range(1, 129)]

for num in range(1, 128):
    if num in allowed_non_cdr_instertions:
        for letter in alphabet:
            all_imgt_nums.insert(all_imgt_nums.index(num+1), str(num) + letter)

    elif num in cdr_instertion_starts:
        for letter in alphabet:
            all_imgt_nums.insert(all_imgt_nums.index(num+1), str(num) + letter)
    elif num in cdr_instertion_ends:
        for letter in list(reversed(alphabet)):
            all_imgt_nums.insert(all_imgt_nums.index(num+1), str(num) + letter)
    
    else:
        continue


for num in range(1, 128):
    print(all_imgt_nums[all_imgt_nums.index(num):all_imgt_nums.index(num+1)])

print(all_imgt_nums[-1:])

[1, '1A', '1B', '1C', '1D', '1E', '1F', '1G', '1H', '1I', '1J', '1K', '1L', '1M', '1N', '1O', '1P', '1Q', '1R', '1S', '1T', '1U', '1V', '1W', '1X', '1Y', '1Z', '1AA', '1BB', '1CC', '1DD', '1EE', '1FF', '1GG', '1HH', '1II', '1JJ', '1KK', '1LL', '1MM', '1NN', '1OO', '1PP', '1QQ', '1RR', '1SS', '1TT', '1UU', '1VV', '1WW', '1XX', '1YY', '1ZZ']
[2, '2A', '2B', '2C', '2D', '2E', '2F', '2G', '2H', '2I', '2J', '2K', '2L', '2M', '2N', '2O', '2P', '2Q', '2R', '2S', '2T', '2U', '2V', '2W', '2X', '2Y', '2Z', '2AA', '2BB', '2CC', '2DD', '2EE', '2FF', '2GG', '2HH', '2II', '2JJ', '2KK', '2LL', '2MM', '2NN', '2OO', '2PP', '2QQ', '2RR', '2SS', '2TT', '2UU', '2VV', '2WW', '2XX', '2YY', '2ZZ']
[3, '3A', '3B', '3C', '3D', '3E', '3F', '3G', '3H', '3I', '3J', '3K', '3L', '3M', '3N', '3O', '3P', '3Q', '3R', '3S', '3T', '3U', '3V', '3W', '3X', '3Y', '3Z', '3AA', '3BB', '3CC', '3DD', '3EE', '3FF', '3GG', '3HH', '3II', '3JJ', '3KK', '3LL', '3MM', '3NN', '3OO', '3PP', '3QQ', '3RR', '3SS', '3TT', '3UU', '3VV', '3

In [28]:
# For other schemes numbering is seqeuntial.
# AHo maxes at 149
# Kabat starts at 0
all_other_nums = [x for x in range(0, 150)]

for num in range(0, 149):
    for letter in alphabet:
        all_other_nums.insert(all_other_nums.index(num+1), str(num) + letter)

for num in range(0, 149):
    print(all_other_nums[all_other_nums.index(num):all_other_nums.index(num+1)])

[0, '0A', '0B', '0C', '0D', '0E', '0F', '0G', '0H', '0I', '0J', '0K', '0L', '0M', '0N', '0O', '0P', '0Q', '0R', '0S', '0T', '0U', '0V', '0W', '0X', '0Y', '0Z', '0AA', '0BB', '0CC', '0DD', '0EE', '0FF', '0GG', '0HH', '0II', '0JJ', '0KK', '0LL', '0MM', '0NN', '0OO', '0PP', '0QQ', '0RR', '0SS', '0TT', '0UU', '0VV', '0WW', '0XX', '0YY', '0ZZ']
[1, '1A', '1B', '1C', '1D', '1E', '1F', '1G', '1H', '1I', '1J', '1K', '1L', '1M', '1N', '1O', '1P', '1Q', '1R', '1S', '1T', '1U', '1V', '1W', '1X', '1Y', '1Z', '1AA', '1BB', '1CC', '1DD', '1EE', '1FF', '1GG', '1HH', '1II', '1JJ', '1KK', '1LL', '1MM', '1NN', '1OO', '1PP', '1QQ', '1RR', '1SS', '1TT', '1UU', '1VV', '1WW', '1XX', '1YY', '1ZZ']
[2, '2A', '2B', '2C', '2D', '2E', '2F', '2G', '2H', '2I', '2J', '2K', '2L', '2M', '2N', '2O', '2P', '2Q', '2R', '2S', '2T', '2U', '2V', '2W', '2X', '2Y', '2Z', '2AA', '2BB', '2CC', '2DD', '2EE', '2FF', '2GG', '2HH', '2II', '2JJ', '2KK', '2LL', '2MM', '2NN', '2OO', '2PP', '2QQ', '2RR', '2SS', '2TT', '2UU', '2VV', '2

In [29]:
# WE can use this code to generate a fake anitbody sequence which we can try to converts to the other number schemes.

import re

def split_numbers_and_letters(input_string):
    try: 
        numbers = re.findall(r'\d+', input_string)[0]  # Find all sequences of digits
    except:
        numbers = input_string

    try:
        letters = re.findall(r'[a-zA-Z]+', input_string)[0]  # Find all sequences of letters
    except:
        letters = " "
    return numbers, letters

fake_numbering = []

for num in all_imgt_nums[:-1]:
    n, l = split_numbers_and_letters(num)
    fake_numbering.append(((int(n), l), "A"))

fake_numbering.append(((128, " "), "A"))


for num in range(1, 127):
    print(fake_numbering[fake_numbering.index(((num, " "), "A")):fake_numbering.index(((num+1, " "), "A"))])

print(fake_numbering[-1:])


[((1, ' '), 'A'), ((1, 'A'), 'A'), ((1, 'B'), 'A'), ((1, 'C'), 'A'), ((1, 'D'), 'A'), ((1, 'E'), 'A'), ((1, 'F'), 'A'), ((1, 'G'), 'A'), ((1, 'H'), 'A'), ((1, 'I'), 'A'), ((1, 'J'), 'A'), ((1, 'K'), 'A'), ((1, 'L'), 'A'), ((1, 'M'), 'A'), ((1, 'N'), 'A'), ((1, 'O'), 'A'), ((1, 'P'), 'A'), ((1, 'Q'), 'A'), ((1, 'R'), 'A'), ((1, 'S'), 'A'), ((1, 'T'), 'A'), ((1, 'U'), 'A'), ((1, 'V'), 'A'), ((1, 'W'), 'A'), ((1, 'X'), 'A'), ((1, 'Y'), 'A'), ((1, 'Z'), 'A'), ((1, 'AA'), 'A'), ((1, 'BB'), 'A'), ((1, 'CC'), 'A'), ((1, 'DD'), 'A'), ((1, 'EE'), 'A'), ((1, 'FF'), 'A'), ((1, 'GG'), 'A'), ((1, 'HH'), 'A'), ((1, 'II'), 'A'), ((1, 'JJ'), 'A'), ((1, 'KK'), 'A'), ((1, 'LL'), 'A'), ((1, 'MM'), 'A'), ((1, 'NN'), 'A'), ((1, 'OO'), 'A'), ((1, 'PP'), 'A'), ((1, 'QQ'), 'A'), ((1, 'RR'), 'A'), ((1, 'SS'), 'A'), ((1, 'TT'), 'A'), ((1, 'UU'), 'A'), ((1, 'VV'), 'A'), ((1, 'WW'), 'A'), ((1, 'XX'), 'A'), ((1, 'YY'), 'A'), ((1, 'ZZ'), 'A')]
[((2, ' '), 'A'), ((2, 'A'), 'A'), ((2, 'B'), 'A'), ((2, 'C'), 'A'), ((2

In [30]:
# We can try to convert the fake numbering to alternate schemes, but this fails.

from anarcii.output_data_processing.schemes import convert_number_scheme

fake_dict = {
    "fake_h": {
        "numbering": fake_numbering,
        "chain_type": "H",
        "score": 99,
        "query_start": 0,
        "query_end": len,
        "error": None,
        "scheme": "imgt"},
    "fake_k": {
        "numbering": fake_numbering,
        "chain_type": "K",
        "score": 99,
        "query_start": 0,
        "query_end": len,
        "error": None,
        "scheme": "imgt"},
    "fake_l": {
        "numbering": fake_numbering,
        "chain_type": "L",
        "score": 99,
        "query_start": 0,
        "query_end": len,
        "error": None,
        "scheme": "imgt"}
    }

schemes = [
        "martin", 
        # "kabat", 
        # "chothia", 
        # "imgt", 
        # "aho"
           ]

for sch in schemes:
    res = convert_number_scheme(fake_dict, sch)
    print(res['fake_h']['numbering'][-24:])
    print(res['fake_k']['numbering'][-24:])
    print(res['fake_l']['numbering'][-24:])

IndexError: list index out of range