## 1.3 Processing SignalP Output

### Overview
After generating initial predictions with SignalP, the output need to be processed to extract and format useful data for further analysis. This step involves parsing the SignalP output file to retrieve detailed information about each predicted signal peptide.

### Install requirement
 


In [None]:
# Install Biopython, which provides tools for biological computation
!pip install biopython



# Criteria and funtions

Source: [ProtParam module](https://biopython.org/docs/1.75/api/Bio.SeqUtils.ProtParam.html)

 Methods used: **gravy(self)** --> Calculate the gravy according to Kyte and Doolittle.


In [None]:
def regions(dire):
    """
    This function extracts sequence regions from a specified results directory.
    
    Args:
    dire (str): Sub-directory under 'Results_toxin/' containing the result files.
    
    Returns:
    list: Contains regions 'N', 'H', 'C', the full signal peptide (SP) sequence, 
          the first 16 characters of the 'Pro' region, and the complete SP with 'Pro' region.
    """

    #Path to the folder where results are stored
    # Change the path to the folder
    link = 'Results_toxin/' + dire

    import csv
    matrix = []
    cont = 0
    # Reading the tab-delimited file
    with open(link, 'r') as file:
        reader = csv.reader(file, delimiter = '\t')
        for row in reader:
            matrix.append(row)
        # Ignore the first two header lines
        matrix = matrix[2::]

    matrix = [list(i) for i in zip(*matrix)]
    # Initialize region strings
    n_region = ''
    h_region = ''
    c_region = ''
    pro_region = ''

    cont = 0
    # Extract regions based on column values
    for i in range(len(matrix[0])):

        if matrix[2][i] == 'N':
            n_region = n_region + matrix[1][i]

        elif matrix[2][i] == 'H':
            h_region = h_region + matrix[1][i]

        elif matrix[2][i] == 'C':
            c_region = c_region + matrix[1][i]

        elif matrix[2][i] == 'O':
            pro_region = pro_region + matrix[1][i]

    # Concatenate N, H, C regions to form the full signal peptide
    full_sp = n_region + h_region + c_region
    # Concatenate full SP with Pro region
    complete_sp_pro = full_sp + pro_region

    return [n_region, h_region, c_region, full_sp, pro_region[0:16], complete_sp_pro]



def gravy(seq): 
    """
    Calculate the hydrophobicity index of a sequence using the Kyte & Doolittle method.
    
    Args:
    seq (str): Amino acid sequence for which to calculate the hydrophobicity.
    
    Returns:
    float: Hydrophobicity index rounded to two decimal places.
    """
    # Hydrophobicity scale from Kyte & Doolittle. J. Mol. Biol. 157:105-132(1982).
    g_kd = {
        "A": 1.8, "R": -4.5, "N": -3.5, "D": -3.5, "C": 2.5,
        "Q": -3.5, "E": -3.5, "G": -0.4, "H": -3.2, "I": 4.5,
        "L": 3.8, "K": -3.9, "M": 1.9, "F": 2.8, "P": -1.6,
        "S": -0.8, "T": -0.7, "W": -0.9, "Y": -1.3, "V": 4.2
    }

    kd = 0
    for i in seq:
        for res, resGrav in g_kd.items():
            if i == res:
                kd = kd + resGrav
    kd=kd/len(seq)
    return round(kd,2)


def name_select(name):
    """
    Extract the unique identifier from a given UniProt entry name.
    
    Args:
    name (str): Full UniProt entry name including pipes and other metadata.
    
    Returns:
    str: Extracted unique identifier (ID) from the name.
    """
    # Find positions of '|' characters to delineate the ID
    cod = []
    for n,l in enumerate(name):
        if name[n] == '|':
            cod.append(n)
    # Extract and return the ID part
    ID = name[cod[0]+1:cod[1]]

    return ID



def charge(seq):
    """
    Calculate the net charge of a sequence based on the presence of specific amino acids
    
    Args:
    seq (str): Amino acid sequence.
    """
    suma = 0

    for i in seq:
        if i in ['K', 'R']:
            suma = suma + 1

        if i == 'H':
            suma = suma + 0.1
        elif i in ['D', 'E']:
            suma = suma -1

    return suma


### Parsing Output

This part processes the output file "prediction_results_toxin.txt" from SignalP to extract essential information such as peptide scores, cleavage site positions, and prediction confidence.

In [None]:
import os
import csv

# Initialize an empty list to hold all rows from the CSV file
matrix = []
cont = 0

# Open the prediction result file from SignalP
with open('Results_toxin/prediction_results_toxin.txt', 'r') as file:
    reader = csv.reader(file, delimiter = '\t')
    for row in reader:
        matrix.append(row) # Append each row of data to the matrix list
        # Optionally print each row, currently commented out
        #print(row)

# Remove the first two lines
matrix = matrix[2::]
# Display the resulting matrix
matrix

[['sp|A0A0N9NCU6|YOPJ_YERPU_Serine/threonine-protein_acetyltransferase_YopJ_OS=Yer',
  'NO_SP',
  '1.000029',
  '0.000027',
  '0.000000',
  '0.000000',
  '0.000000',
  '0.000000',
  ''],
 ['sp|A0A1S4NYE3|CDIA_ECOST_tRNA_nuclease_CdiA_OS=Escherichia_coli_(strain_STEC_O3',
  'SP',
  '0.024348',
  '0.974034',
  '0.000681',
  '0.000438',
  '0.000238',
  '0.000230',
  'CS pos: 32-33. Pr: 0.8066'],
 ['sp|A0A2S3R7M0|MARTX_VIBVL_Multifunctional-autoprocessing_repeats-in-toxin_OS=Vi',
  'NO_SP',
  '1.000025',
  '0.000005',
  '0.000000',
  '0.000000',
  '0.000000',
  '0.000000',
  ''],
 ['sp|A0A482PDI9|NLEB_CITRO_Protein-arginine_N-acetylglucosaminyltransferase_NleB_',
  'NO_SP',
  '1.000047',
  '0.000000',
  '0.000000',
  '0.000000',
  '0.000000',
  '0.000000',
  ''],
 ['sp|A1YKW7|RTXA_KINKI_Cytolysin_RtxA_OS=Kingella_kingae_OX=504_GN=rtxA_PE=1_SV=1',
  'NO_SP',
  '1.000077',
  '0.000000',
  '0.000000',
  '0.000000',
  '0.000000',
  '0.000000',
  ''],
 ['sp|B3BM80|CDIA4_ECO5C_Deoxyribonuclease_

In [None]:
# To recognize name
def name_select(name):
    # Initialize an empty list
    cod = []
    for n,l in enumerate(name):
        # If the character is '|', append the index to 'cod'
        if name[n] == '|':
            cod.append(n)

    ID = name[cod[0]+1:cod[1]]
    # Return the extracted ID
    return ID

In [None]:
# The name and SP percentage column is selected
import os
# List to store file names within the directory
dir_archivos = []

with os.scandir('Results_toxin') as ficheros:
    for fichero in ficheros:
        dir_archivos.append(fichero.name)
        # Optionally print each file name
        #print(fichero.name)

# List to store data related to signal peptides
data_sp = []

# Iterate over each row in the matrix processed earlier
i = 0
for fila in matrix:
    i= i+ 1
    # Optionally print 
    #print(fila)
  
  
    # Filter rows where the second column indicates a signal peptide ("SP")
    if str(fila[1]) == "SP":
        name = '_'+name_select(fila[0])+ '_' # the file name is extracted
        name_dir = [s for s in dir_archivos if name in s][0]

        # file is accessed if name is found in path
        pep_regions = regions(name_dir)


        if "X" not in pep_regions[3] and "" not in pep_regions:

            region_n = pep_regions[0]    # N-region
            region_h = pep_regions[1]    # H-region
            #print("EEEEEEEEEEE", name, pep_regions)
            region_c = pep_regions[2]    # C-region
            SP       = pep_regions[3]    # Full Signal Peptide
            len_sp = len(pep_regions[3]) # Length of the signal peptide
            region_pro = pep_regions[4]  # Pro region
            seq_70aa = pep_regions[5]    # Complete fragment (70 amino acids)

            # Calculate net charge of the N-region

            carga_nlit = charge(region_n)

            # Calculate hydrophobicity of the H-region and full signal peptide
            hidro_h = ProteinAnalysis(region_h).gravy()
            hidro_sp = ProteinAnalysis(SP).gravy()


            # Compile all extracted and computed data into a list
            sp_fila = [name_select(fila[0]),     # Uniprot ID

                          fila[1],    # ID  (SP-> signal peptide)
                          fila[3],    # SP-likelihood 
                          region_n  , # N-region
                          region_h  , # H-region
                          region_c  , # C-region
                          SP,         # Full Signal Peptide
                          len_sp,     # Length of the signal peptide
                          region_pro, # Pro-region
                          seq_70aa,   # Complete fragment (70 amino acids)
                          #carga_n,
                          carga_nlit, # Net charge of N-region
                       hidro_h,       # Hydrophobicity of H-region
                       hidro_sp       # Hydrophobicity of full signal peptide
                      ]
        else:
            sp_fila = np.empty((1,13))
        
        print(sp_fila)
        # Append the row to the main data list
        data_sp.append(sp_fila)


EEEEEEEEEEE _A0A1S4NYE3_ ['MHQPPVRFPYR', 'LLSYLISTIIAGQPLL', 'PAVGA', 'MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGA', 'VITPQNGAGMDKAANG', 'MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGAVITPQNGAGMDKAANGVPVVNIATPNGAGISHNRFTDY']
['A0A1S4NYE3', 'SP', '0.974034', 'MHQPPVRFPYR', 'LLSYLISTIIAGQPLL', 'PAVGA', 'MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGA', 32, 'VITPQNGAGMDKAANG', 'MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGAVITPQNGAGMDKAANGVPVVNIATPNGAGISHNRFTDY', 2.1, 1.5750000000000002, 0.5656250000000002]
EEEEEEEEEEE _D5CBA0_ ['MMKQDQVRFSQR', 'ALSALLSVLLATQPLL', 'PAVA', 'MMKQDQVRFSQRALSALLSVLLATQPLLPAVA', 'ASITPSGNTQMDKAAN', 'MMKQDQVRFSQRALSALLSVLLATQPLLPAVAASITPSGNTQMDKAANGVPVVNIATPNQSGISHNKYND']
['D5CBA0', 'SP', '0.998014', 'MMKQDQVRFSQR', 'ALSALLSVLLATQPLL', 'PAVA', 'MMKQDQVRFSQRALSALLSVLLATQPLLPAVA', 32, 'ASITPSGNTQMDKAAN', 'MMKQDQVRFSQRALSALLSVLLATQPLLPAVAASITPSGNTQMDKAANGVPVVNIATPNQSGISHNKYND', 2, 1.8, 0.5656250000000003]
EEEEEEEEEEE _P01555_ ['MVK', 'IIFVFFIFLSSF', 'SYA', 'MVKIIFVFFIFLSSFSYA', 'NDDKLYRADSRPPDEI', 'MVKIIFVFFIFLSSFSYA

EEEEEEEEEEE _Q48258_ ['MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIATGAAVGTVSGLLGWGLKQAEE']
['Q48258', 'SP', '0.935472', 'MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIATGAAVGTVSGLLGWGLKQAEE', 2.1, 2.1066666666666665, 0.027272727272727372]
EEEEEEEEEEE _Q57478_ ['MKC', 'ILLKWILCLLLGFSSV', 'SYS', 'MKCILLKWILCLLLGFSSVSYS', 'QEFTIDFSTQQSYVSS', 'MKCILLKWILCLLLGFSSVSYSQEFTIDFSTQQSYVSSLNSIRTAISTPLEHISQGATSVSVINHTPPGS']
['Q57478', 'SP', '0.999182', 'MKC', 'ILLKWILCLLLGFSSV', 'SYS', 'MKCILLKWILCLLLGFSSVSYS', 22, 'QEFTIDFSTQQSYVSS', 'MKCILLKWILCLLLGFSSVSYSQEFTIDFSTQQSYVSSLNSIRTAISTPLEHISQGATSVSVINHTPPGS', 1, 2.1562500000000004, 1.4590909090909099]
EEEEEEEEEEE _Q8KTU8_ ['MKC', 'ILFKWVLCLLLGFSSV', 'SYS', 'MKCILFKWVLCLLLGFSSVSYS', 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYSREFTIDFSTQQSYVS

EEEEEEEEEEE _A0A0F4NLB3_ ['MNSHLILLNVRKNNMNNNK', 'VVNITLVSAAVSLGLF', 'STNAFA', 'MNSHLILLNVRKNNMNNNKVVNITLVSAAVSLGLFSTNAFA', 'AVDILDIKLQGQSCEM', 'MNSHLILLNVRKNNMNNNKVVNITLVSAAVSLGLFSTNAFAAVDILDIKLQGQSCEMGFSPIAADEVMPI']
['A0A0F4NLB3', 'SP', '0.998838', 'MNSHLILLNVRKNNMNNNK', 'VVNITLVSAAVSLGLF', 'STNAFA', 'MNSHLILLNVRKNNMNNNKVVNITLVSAAVSLGLFSTNAFA', 41, 'AVDILDIKLQGQSCEM', 'MNSHLILLNVRKNNMNNNKVVNITLVSAAVSLGLFSTNAFAAVDILDIKLQGQSCEMGFSPIAADEVMPI', 3.1, 2.05625, 0.42439024390243907]
EEEEEEEEEEE _A0A0F6WGF3_ ['MHQPPVRFPYR', 'LLSYLISTIIAGQPLL', 'PAVGA', 'MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGA', 'VITPQNGAGMDKAANG', 'MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGAVITPQNGAGMDKAANGVPVVNIATPNGAGISHNRFTDY']
['A0A0F6WGF3', 'SP', '0.974034', 'MHQPPVRFPYR', 'LLSYLISTIIAGQPLL', 'PAVGA', 'MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGA', 32, 'VITPQNGAGMDKAANG', 'MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGAVITPQNGAGMDKAANGVPVVNIATPNGAGISHNRFTDY', 2.1, 1.5750000000000002, 0.5656250000000002]
EEEEEEEEEEE _A0A0G4K079_ ['MKPIKTTQR', 'LMAYTLINLIAFQPVL', 'PA

EEEEEEEEEEE _A0A0T9QJY3_ ['MKQNKFKLSPAGK', 'LAAAVAIISVSVAT', 'CYA', 'MKQNKFKLSPAGKLAAAVAIISVSVATCYA', 'AGIVGAGDSAHRPEVN', 'MKQNKFKLSPAGKLAAAVAIISVSVATCYAAGIVGAGDSAHRPEVNSVNGVSVVDIVKPSASGLSHNQFD']
['A0A0T9QJY3', 'SP', '0.997368', 'MKQNKFKLSPAGK', 'LAAAVAIISVSVAT', 'CYA', 'MKQNKFKLSPAGKLAAAVAIISVSVATCYA', 30, 'AGIVGAGDSAHRPEVN', 'MKQNKFKLSPAGKLAAAVAIISVSVATCYAAGIVGAGDSAHRPEVNSVNGVSVVDIVKPSASGLSHNQFD', 4, 2.2928571428571423, 0.6666666666666665]
EEEEEEEEEEE _A0A0T9R5B6_ ['MKTHTFKLSPAGK', 'LAAAVTIISVSVAT', 'CYA', 'MKTHTFKLSPAGKLAAAVTIISVSVATCYA', 'AGIVGAGDSAHKPDVS', 'MKTHTFKLSPAGKLAAAVTIISVSVATCYAAGIVGAGDSAHKPDVSSVNGTSVINIVEPSASGLSHNKFQ']
['A0A0T9R5B6', 'SP', '0.996090', 'MKTHTFKLSPAGK', 'LAAAVTIISVSVAT', 'CYA', 'MKTHTFKLSPAGKLAAAVTIISVSVATCYA', 30, 'AGIVGAGDSAHKPDVS', 'MKTHTFKLSPAGKLAAAVTIISVSVATCYAAGIVGAGDSAHKPDVSSVNGTSVINIVEPSASGLSHNKFQ', 3.1, 2.1142857142857143, 0.7933333333333332]
EEEEEEEEEEE _A0A0T9SRS8_ ['MIPIYFRQK', 'LISYALIYLVAIQPIM', 'PVMA', 'MIPIYFRQKLISYALIYLVAIQPIMPVMA', 'AGIDV

EEEEEEEEEEE _A0A1I4XC96_ ['MDDQLTLSLPRR', 'LLSYLICSLIAFQPLL', 'PAFS', 'MDDQLTLSLPRRLLSYLICSLIAFQPLLPAFS', 'AAIAPVTPGTKVDAAG', 'MDDQLTLSLPRRLLSYLICSLIAFQPLLPAFSAAIAPVTPGTKVDAAGNGVPVINIATPNAAGLSHNQYQ']
['A0A1I4XC96', 'SP', '0.946707', 'MDDQLTLSLPRR', 'LLSYLICSLIAFQPLL', 'PAFS', 'MDDQLTLSLPRRLLSYLICSLIAFQPLLPAFS', 32, 'AAIAPVTPGTKVDAAG', 'MDDQLTLSLPRRLLSYLICSLIAFQPLLPAFSAAIAPVTPGTKVDAAGNGVPVINIATPNAAGLSHNQYQ', 0, 1.9312500000000001, 0.74375]
EEEEEEEEEEE _A0A1I6E059_ ['MKTIKKDKITESNLHLR', 'LAPLYLSLASIFSPL', 'AEA', 'MKTIKKDKITESNLHLRLAPLYLSLASIFSPLAEA', 'AGTVPDPRQPGGPTMG', 'MKTIKKDKITESNLHLRLAPLYLSLASIFSPLAEAAGTVPDPRQPGGPTMGKTANDTPMVNIVNPNAKGV']
['A0A1I6E059', 'SP', '0.998860', 'MKTIKKDKITESNLHLR', 'LAPLYLSLASIFSPL', 'AEA', 'MKTIKKDKITESNLHLRLAPLYLSLASIFSPLAEA', 35, 'AGTVPDPRQPGGPTMG', 'MKTIKKDKITESNLHLRLAPLYLSLASIFSPLAEAAGTVPDPRQPGGPTMGKTANDTPMVNIVNPNAKGV', 3.1, 1.533333333333333, 0.1600000000000001]
EEEEEEEEEEE _A0A1K0JBT9_ ['MNQNRYRLVFNKQRGMLMAVPECANGAHKAASGERAGDTGRSFLAT', 'LRPIAWALLLSTG

EEEEEEEEEEE _A0A209A5L7_ ['MKQNKFKLSPAGK', 'LAAAVAIISVSVAT', 'CYA', 'MKQNKFKLSPAGKLAAAVAIISVSVATCYA', 'AGIVGAGDPTHNPAIN', 'MKQNKFKLSPAGKLAAAVAIISVSVATCYAAGIVGAGDPTHNPAINSINGVPVIDIVKPSASGLSHNQYN']
['A0A209A5L7', 'SP', '0.998923', 'MKQNKFKLSPAGK', 'LAAAVAIISVSVAT', 'CYA', 'MKQNKFKLSPAGKLAAAVAIISVSVATCYA', 30, 'AGIVGAGDPTHNPAIN', 'MKQNKFKLSPAGKLAAAVAIISVSVATCYAAGIVGAGDPTHNPAINSINGVPVIDIVKPSASGLSHNQYN', 4, 2.2928571428571423, 0.6666666666666665]
EEEEEEEEEEE _A0A221JA87_ ['MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIASGAAVGTVSGLLGWGLKQAEE']
['A0A221JA87', 'SP', '0.954091', 'MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIASGAAVGTVSGLLGWGLKQAEE', 2.1, 2.1066666666666665, 0.027272727272727372]
EEEEEEEEEEE _A0A238GVV6_ ['MELQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MELQQTHRKMNRPLVSLVLA

EEEEEEEEEEE _A0A293RXB6_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQQSHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQQSHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQQSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['A0A293RXB6', 'SP', '0.972509', 'MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQQSHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQQSHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQQSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.293333333333334, 0.033333333333333305]
EEEEEEEEEEE _A0A293S1C8_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['A0A293S1C8', 'SP', '0.964691', 'MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.293333333333334, 0.033333333333333305]
EEEEEEEEEEE _A0A293S6G0_ ['MEIQQIHRKMNR', 'PLVSLALVGALVSIT', 'PQESHA', 'MEIQQI

EEEEEEEEEEE _A0A2A6W7J3_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['A0A2A6W7J3', 'SP', '0.964691', 'MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.293333333333334, 0.033333333333333305]
EEEEEEEEEEE _A0A2A6WCB5_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQQSHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQQSHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQQSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['A0A2A6WCB5', 'SP', '0.972509', 'MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQQSHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQQSHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQQSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.293333333333334, 0.033333333333333305]
EEEEEEEEEEE _A0A2A6WFC4_ ['MEIQQTHRKINR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQT

EEEEEEEEEEE _A0A2S5K769_ ['MDDRQPVSLARR', 'VLSYLICSLVAMQPVL', 'PAVA', 'MDDRQPVSLARRVLSYLICSLVAMQPVLPAVA', 'AQITPVTPGTQMDKAG', 'MDDRQPVSLARRVLSYLICSLVAMQPVLPAVAAQITPVTPGTQMDKAGNGVPVVNIATPNGAGISHNQYQ']
['A0A2S5K769', 'SP', '0.998840', 'MDDRQPVSLARR', 'VLSYLICSLVAMQPVL', 'PAVA', 'MDDRQPVSLARRVLSYLICSLVAMQPVLPAVA', 32, 'AQITPVTPGTQMDKAG', 'MDDRQPVSLARRVLSYLICSLVAMQPVLPAVAAQITPVTPGTQMDKAGNGVPVVNIATPNGAGISHNQYQ', 1, 1.9062499999999998, 0.6875]
EEEEEEEEEEE _A0A2T2XWR4_ ['MNQPPVRFTYR', 'LLSYLVSGLLATQPLL', 'PAVA', 'MNQPPVRFTYRLLSYLVSGLLATQPLLPAVA', 'ATLTPTGNTATDNAAN', 'MNQPPVRFTYRLLSYLVSGLLATQPLLPAVAATLTPTGNTATDNAANGVPIVNIATPNEAGISHNQFTDY']
['A0A2T2XWR4', 'SP', '0.997448', 'MNQPPVRFTYR', 'LLSYLVSGLLATQPLL', 'PAVA', 'MNQPPVRFTYRLLSYLVSGLLATQPLLPAVA', 31, 'ATLTPTGNTATDNAAN', 'MNQPPVRFTYRLLSYLVSGLLATQPLLPAVAATLTPTGNTATDNAANGVPIVNIATPNEAGISHNQFTDY', 2, 1.46875, 0.5612903225806454]
EEEEEEEEEEE _A0A2T3SSX2_ ['MKC', 'ILLKWILCLLLGFSSV', 'SYS', 'MKCILLKWILCLLLGFSSVSYS', 'REFTIDFSTQQSYVSS', 'MKCILLKWILCL

EEEEEEEEEEE _A0A2X2GB85_ ['MKNNNFRLSAAGK', 'LAASLAIILASL', 'GNGYA', 'MKNNNFRLSAAGKLAASLAIILASLGNGYA', 'GDIVAANGANGPGVST', 'MKNNNFRLSAAGKLAASLAIILASLGNGYAGDIVAANGANGPGVSTAGNGAQVVNIVTPNDHGLSHNQYQ']
['A0A2X2GB85', 'SP', '0.998875', 'MKNNNFRLSAAGK', 'LAASLAIILASL', 'GNGYA', 'MKNNNFRLSAAGKLAASLAIILASLGNGYA', 30, 'GDIVAANGANGPGVST', 'MKNNNFRLSAAGKLAASLAIILASLGNGYAGDIVAANGANGPGVSTAGNGAQVVNIVTPNDHGLSHNQYQ', 3, 2.4833333333333334, 0.4700000000000001]
EEEEEEEEEEE _A0A2X4Y701_ ['MLIVVAETTRSHRAGVSPQSGADARTGST', 'LTSILAPLAFGFLLAFSCL', 'TPAKA', 'MLIVVAETTRSHRAGVSPQSGADARTGSTLTSILAPLAFGFLLAFSCLTPAKA', 'AIVADNHAPGGQQPQI', 'MLIVVAETTRSHRAGVSPQSGADARTGSTLTSILAPLAFGFLLAFSCLTPAKAAIVADNHAPGGQQPQIA']
['A0A2X4Y701', 'SP', '0.534053', 'MLIVVAETTRSHRAGVSPQSGADARTGST', 'LTSILAPLAFGFLLAFSCL', 'TPAKA', 'MLIVVAETTRSHRAGVSPQSGADARTGSTLTSILAPLAFGFLLAFSCLTPAKA', 53, 'AIVADNHAPGGQQPQI', 'MLIVVAETTRSHRAGVSPQSGADARTGSTLTSILAPLAFGFLLAFSCLTPAKAAIVADNHAPGGQQPQIA', 1.1, 2.0684210526315794, 0.5792452830188678]
EEEEEEEEEEE _A

EEEEEEEEEEE _A0A387K3F7_ ['MELQQTHRKINR', 'PLVSLALAGALISIT', 'PQQSHA', 'MELQQTHRKINRPLVSLALAGALISITPQQSHA', 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALAGALISITPQQSHAAFFTTVIIPAIVGGIATGAAVGTVSGLLGWGLKQAEE']
['A0A387K3F7', 'SP', '0.942235', 'MELQQTHRKINR', 'PLVSLALAGALISIT', 'PQQSHA', 'MELQQTHRKINRPLVSLALAGALISITPQQSHA', 33, 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALAGALISITPQQSHAAFFTTVIIPAIVGGIATGAAVGTVSGLLGWGLKQAEE', 2.1, 1.966666666666667, -0.057575757575757606]
EEEEEEEEEEE _A0A387K3G6_ ['MELQQTHRKINR', 'PLVSLALVGLLVSIT', 'PQKSHA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHA', 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['A0A387K3G6', 'SP', '0.966533', 'MELQQTHRKINR', 'PLVSLALVGLLVSIT', 'PQKSHA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHA', 33, 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.2399999999999998, 0.05454545454545444]
EEEEEEEEEEE _A0A387K3G7_ ['MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQ

EEEEEEEEEEE _A0A3S4JB87_ ['MEKHPVGLAKR', 'LLSYLVIFLLAGQPVF', 'PAVA', 'MEKHPVGLAKRLLSYLVIFLLAGQPVFPAVA', 'ATINPVTPGAQMDQAG', 'MEKHPVGLAKRLLSYLVIFLLAGQPVFPAVAATINPVTPGAQMDQAGNGVPVLNIATPNQAGISHNQFQD']
['A0A3S4JB87', 'SP', '0.999044', 'MEKHPVGLAKR', 'LLSYLVIFLLAGQPVF', 'PAVA', 'MEKHPVGLAKRLLSYLVIFLLAGQPVFPAVA', 31, 'ATINPVTPGAQMDQAG', 'MEKHPVGLAKRLLSYLVIFLLAGQPVFPAVAATINPVTPGAQMDQAGNGVPVLNIATPNQAGISHNQFQD', 2.1, 1.9812500000000002, 0.9225806451612905]
EEEEEEEEEEE _A0A3S4JTX5_ ['MKNNNFRLSAAGK', 'LAAALAIILAA', 'SGNVCA', 'MKNNNFRLSAAGKLAAALAIILAASGNVCA', 'AEIVAANGANGPGVTT', 'MKNNNFRLSAAGKLAAALAIILAASGNVCAAEIVAANGANGPGVTTVANGAQVVDIVAPNGHGLSHNQYQ']
['A0A3S4JTX5', 'SP', '0.998964', 'MKNNNFRLSAAGK', 'LAAALAIILAA', 'SGNVCA', 'MKNNNFRLSAAGKLAAALAIILAASGNVCA', 30, 'AEIVAANGANGPGVTT', 'MKNNNFRLSAAGKLAAALAIILAASGNVCAAEIVAANGANGPGVTTVANGAQVVDIVAPNGHGLSHNQYQ', 3, 2.8363636363636364, 0.7700000000000001]
EEEEEEEEEEE _A0A3S4YDE6_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQE

EEEEEEEEEEE _A0A438UNR8_ ['MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLGWGLKQAEE']
['A0A438UNR8', 'SP', '0.952107', 'MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLGWGLKQAEE', 2.1, 2.1066666666666665, 0.027272727272727372]
EEEEEEEEEEE _A0A438UQY3_ ['MELQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MELQQTHRKINRPLVSLALVGALVSITPQQSHA', 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIASGTAVGTVSGLLGWGLKQAEE']
['A0A438UQY3', 'SP', '0.973132', 'MELQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MELQQTHRKINRPLVSLALVGALVSITPQQSHA', 33, 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIASGTAVGTVSGLLGWGLKQAEE', 2.1, 2.1066666666666665, 0.006060606060606066]
EEEEEEEEEEE _A0A438V0L6_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQ

EEEEEEEEEEE _A0A496EZL0_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISSI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISSIPQESHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISSIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['A0A496EZL0', 'SP', '0.965989', 'MEIQQTHRKMNR', 'PLVSLVLAGALISSI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISSIPQESHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISSIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.12, -0.045454545454545525]
EEEEEEEEEEE _A0A496F353_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['A0A496F353', 'SP', '0.964691', 'MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.293333333333334, 0.033333333333333305]
EEEEEEEEEEE _A0A496F981_ ['MEIQQTHRKINR', 'PIISLALVGVLMGT', 'ELGA', 'MEIQQTHRKINRPIISLALVG

EEEEEEEEEEE _A0A4U3IH25_ ['MRRSEKLKMDVRQFAFLARQPSATLKPRNAFFGLPKRG', 'LALILANALFWQPL', 'LAQA', 'MRRSEKLKMDVRQFAFLARQPSATLKPRNAFFGLPKRGLALILANALFWQPLLAQA', 'EGIVVSAPGTTVGA', 'MRRSEKLKMDVRQFAFLARQPSATLKPRNAFFGLPKRGLALILANALFWQPLLAQAEGIVVSAPGTTVGA']
['A0A4U3IH25', 'SP', '0.501375', 'MRRSEKLKMDVRQFAFLARQPSATLKPRNAFFGLPKRG', 'LALILANALFWQPL', 'LAQA', 'MRRSEKLKMDVRQFAFLARQPSATLKPRNAFFGLPKRGLALILANALFWQPLLAQA', 56, 'EGIVVSAPGTTVGA', 'MRRSEKLKMDVRQFAFLARQPSATLKPRNAFFGLPKRGLALILANALFWQPLLAQAEGIVVSAPGTTVGA', 8, 1.5857142857142859, -0.005357142857142644]
EEEEEEEEEEE _A0A4U8TNF7_ ['MR', 'YIFFLLMTFMLI', 'AYS', 'MRYIFFLLMTFMLIAYS', 'STPKTDDLGYLPMNKK', 'MRYIFFLLMTFMLIAYSSTPKTDDLGYLPMNKKDLGIGSTPTPPPNEPIPSEKSRKLRMSMEKTRVPIIS']
['A0A4U8TNF7', 'SP', '0.990794', 'MR', 'YIFFLLMTFMLI', 'AYS', 'MRYIFFLLMTFMLIAYS', 17, 'STPKTDDLGYLPMNKK', 'MRYIFFLLMTFMLIAYSSTPKTDDLGYLPMNKKDLGIGSTPTPPPNEPIPSEKSRKLRMSMEKTRVPIIS', 1, 2.5500000000000003, 1.6294117647058823]
EEEEEEEEEEE _A0A4Y4SGY8_ ['MEIQQTHRKINR', 'PIISLALVGVLMGT

EEEEEEEEEEE _A0A5C8VVS8_ ['MTR', 'FVLDALALLALLL', 'GERG', 'MTRFVLDALALLALLLGERG', 'ADTVKAGLDGSVMTTV', 'MTRFVLDALALLALLLGERGADTVKAGLDGSVMTTVNLAEVISYYAKLGAGRHDIEMLLRPLPIRLFPVD']
['A0A5C8VVS8', 'SP', '0.998878', 'MTR', 'FVLDALALLALLL', 'GERG', 'MTRFVLDALALLALLLGERG', 20, 'ADTVKAGLDGSVMTTV', 'MTRFVLDALALLALLLGERGADTVKAGLDGSVMTTVNLAEVISYYAKLGAGRHDIEMLLRPLPIRLFPVD', 1, 2.7307692307692313, 1.1700000000000004]
EEEEEEEEEEE _A0A5D8MUH9_ ['MHQPPVRFTYR', 'LLSYLISTIIAGQPLL', 'PAVGA', 'MHQPPVRFTYRLLSYLISTIIAGQPLLPAVGA', 'VITPQNGAGMDKAANG', 'MHQPPVRFTYRLLSYLISTIIAGQPLLPAVGAVITPQNGAGMDKAANGVPVVNIATPNGAGISHNRFTDY']
['A0A5D8MUH9', 'SP', '0.972485', 'MHQPPVRFTYR', 'LLSYLISTIIAGQPLL', 'PAVGA', 'MHQPPVRFTYRLLSYLISTIIAGQPLLPAVGA', 32, 'VITPQNGAGMDKAANG', 'MHQPPVRFTYRLLSYLISTIIAGQPLLPAVGAVITPQNGAGMDKAANGVPVVNIATPNGAGISHNRFTDY', 2.1, 1.5750000000000002, 0.5937500000000002]
EEEEEEEEEEE _A0A5E1AJ98_ ['MKQDQVRFSQR', 'ALSALLSVLLATQPLL', 'PAVA', 'MKQDQVRFSQRALSALLSVLLATQPLLPAVA', 'ASITPSGNTQMDKAAN', 'MKQDQVRFSQRAL

EEEEEEEEEEE _A0A6I4CI71_ ['MEIQQTNRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTNRKMNRPLVSLVLAGALISAIPQESHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTNRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['A0A6I4CI71', 'SP', '0.964736', 'MEIQQTNRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTNRKMNRPLVSLVLAGALISAIPQESHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTNRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2, 2.293333333333334, 0.024242424242424298]
EEEEEEEEEEE _A0A6I4CPX3_ ['MKQFKKKSKKIKRSQKIILKR', 'PLWLMPLLIGGFA', 'SGAYA', 'MKQFKKKSKKIKRSQKIILKRPLWLMPLLIGGFASGAYA', 'DGTDILGLSWGEKSQK', 'MKQFKKKSKKIKRSQKIILKRPLWLMPLLIGGFASGAYADGTDILGLSWGEKSQKVCVHHPWYALWSCDK']
['A0A6I4CPX3', 'SP', '0.997490', 'MKQFKKKSKKIKRSQKIILKR', 'PLWLMPLLIGGFA', 'SGAYA', 'MKQFKKKSKKIKRSQKIILKRPLWLMPLLIGGFASGAYA', 39, 'DGTDILGLSWGEKSQK', 'MKQFKKKSKKIKRSQKIILKRPLWLMPLLIGGFASGAYADGTDILGLSWGEKSQKVCVHHPWYALWSCDK', 11, 1.6384615384615389, -0.2128205128205127]
EEEEEEEEEEE _A0A6I4D2J5_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISS

EEEEEEEEEEE _A0A7G7WF01_ ['MKSKNFKLSPSGR', 'LAASLAIIFVSL', 'NAYA', 'MKSKNFKLSPSGRLAASLAIIFVSLNAYA', 'GGIVPDAGNQGPNVSS', 'MKSKNFKLSPSGRLAASLAIIFVSLNAYAGGIVPDAGNQGPNVSSVNGGTQVINIVTPNNEGISHNQYQD']
['A0A7G7WF01', 'SP', '0.999003', 'MKSKNFKLSPSGR', 'LAASLAIIFVSL', 'NAYA', 'MKSKNFKLSPSGRLAASLAIIFVSLNAYA', 29, 'GGIVPDAGNQGPNVSS', 'MKSKNFKLSPSGRLAASLAIIFVSLNAYAGGIVPDAGNQGPNVSSVNGGTQVINIVTPNNEGISHNQYQD', 4, 2.6, 0.49655172413793097]
EEEEEEEEEEE _A0A7H4ZCF0_ ['MR', 'INPILLISILTFISI', 'KTSHS', 'MRINPILLISILTFISIKTSHS', 'REAEPKHHFNKESFSQ', 'MRINPILLISILTFISIKTSHSREAEPKHHFNKESFSQYQARHCKWETGYTCKDVPKDQGMPPPRTSEEI']
['A0A7H4ZCF0', 'SP', '0.999110', 'MR', 'INPILLISILTFISI', 'KTSHS', 'MRINPILLISILTFISIKTSHS', 22, 'REAEPKHHFNKESFSQ', 'MRINPILLISILTFISIKTSHSREAEPKHHFNKESFSQYQARHCKWETGYTCKDVPKDQGMPPPRTSEEI', 1, 2.253333333333333, 0.990909090909091]
EEEEEEEEEEE _A0A7H8UCQ1_ ['MMKQDQVRFSQR', 'ALSALLSVLLATQPLL', 'PAMA', 'MMKQDQVRFSQRALSALLSVLLATQPLLPAMA', 'ATITPSGNTQMDRAAN', 'MMKQDQVRFSQRALSALLSVLLATQPLLPAMAA

EEEEEEEEEEE _B4F0Z9_ ['MKSKNFKLSPSGR', 'LAASLAIIFVSL', 'NAYG', 'MKSKNFKLSPSGRLAASLAIIFVSLNAYG', 'NGIVPDAGHQGPDVSA', 'MKSKNFKLSPSGRLAASLAIIFVSLNAYGNGIVPDAGHQGPDVSAVNGGTQVINIVTPNNEGISHNQYQD']
['B4F0Z9', 'SP', '0.998959', 'MKSKNFKLSPSGR', 'LAASLAIIFVSL', 'NAYG', 'MKSKNFKLSPSGRLAASLAIIFVSLNAYG', 29, 'NGIVPDAGHQGPDVSA', 'MKSKNFKLSPSGRLAASLAIIFVSLNAYGNGIVPDAGHQGPDVSAVNGGTQVINIVTPNNEGISHNQYQD', 4, 2.6, 0.4206896551724137]
EEEEEEEEEEE _B5Z7P6_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['B5Z7P6', 'SP', '0.964691', 'MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALISAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.293333333333334, 0.033333333333333305]
EEEEEEEEEEE _B6JMA8_ ['MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 'AFFTTVIIPAIVGGIA', 'MEI

EEEEEEEEEEE _E3DGX9_ ['MNKLCYRIIFNRARGLLMVVADIARSRTGSSRTRRDKIPMRCCR', 'LSPLSCAMLAAFAFVT', 'SPGEAQA', 'MNKLCYRIIFNRARGLLMVVADIARSRTGSSRTRRDKIPMRCCRLSPLSCAMLAAFAFVTSPGEAQA', 'GIV', 'MNKLCYRIIFNRARGLLMVVADIARSRTGSSRTRRDKIPMRCCRLSPLSCAMLAAFAFVTSPGEAQAGIV']
['E3DGX9', 'SP', '0.559970', 'MNKLCYRIIFNRARGLLMVVADIARSRTGSSRTRRDKIPMRCCR', 'LSPLSCAMLAAFAFVT', 'SPGEAQA', 'MNKLCYRIIFNRARGLLMVVADIARSRTGSSRTRRDKIPMRCCRLSPLSCAMLAAFAFVTSPGEAQA', 67, 'GIV', 'MNKLCYRIIFNRARGLLMVVADIARSRTGSSRTRRDKIPMRCCRLSPLSCAMLAAFAFVTSPGEAQAGIV', 10, 1.8062500000000001, 0.11343283582089547]
EEEEEEEEEEE _E3DJX2_ ['MMNDCQPVSLARR', 'ALSYLICYLIAFQPLL', 'TAAA', 'MMNDCQPVSLARRALSYLICYLIAFQPLLTAAA', 'AEITPVTPGTQMDAAG', 'MMNDCQPVSLARRALSYLICYLIAFQPLLTAAAAEITPVTPGTQMDAAGNGVPVVNIAAPNQAGISYNQY']
['E3DJX2', 'SP', '0.998983', 'MMNDCQPVSLARR', 'ALSYLICYLIAFQPLL', 'TAAA', 'MMNDCQPVSLARRALSYLICYLIAFQPLLTAAA', 33, 'AEITPVTPGTQMDAAG', 'MMNDCQPVSLARRALSYLICYLIAFQPLLTAAAAEITPVTPGTQMDAAGNGVPVVNIAAPNQAGISYNQY', 1, 1.7750000000000001, 0.827272

EEEEEEEEEEE _I9UI64_ ['MEIQQTHRKMNR', 'PLVSLVLAGALVSAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALVSAIPQESHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALVSAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['I9UI64', 'SP', '0.962319', 'MEIQQTHRKMNR', 'PLVSLVLAGALVSAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALVSAIPQESHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALVSAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.273333333333334, 0.02424242424242419]
EEEEEEEEEEE _I9VD98_ ['MEIQQTHRKMNR', 'PLVSLVLAGALVSAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALVSAIPQESHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALVSAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['I9VD98', 'SP', '0.962319', 'MEIQQTHRKMNR', 'PLVSLVLAGALVSAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALVSAIPQESHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKMNRPLVSLVLAGALVSAIPQESHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.273333333333334, 0.02424242424242419]
EEEEEEEEEEE _I9VEN3_ ['MEIQQTHRKMNR', 'PLVSLVLAGALISAI', 'PQESHA', 'MEIQQTHRKMNRPLVSLVLAGALISAIP

EEEEEEEEEEE _K4XJD7_ ['MK', 'IIIFRVLTFFFVIFSV', 'NVVA', 'MKIIIFRVLTFFFVIFSVNVVA', 'KEFTLDFSTAKTYVDS', 'MKIIIFRVLTFFFVIFSVNVVAKEFTLDFSTAKTYVDSLNVIRSAIGTPLQTISSGGTSLLMIDSGTGDN']
['K4XJD7', 'SP', '0.999073', 'MK', 'IIIFRVLTFFFVIFSV', 'NVVA', 'MKIIIFRVLTFFFVIFSVNVVA', 22, 'KEFTLDFSTAKTYVDS', 'MKIIIFRVLTFFFVIFSVNVVAKEFTLDFSTAKTYVDSLNVIRSAIGTPLQTISSGGTSLLMIDSGTGDN', 1, 2.6500000000000004, 2.140909090909091]
EEEEEEEEEEE _K7Y329_ ['MEKTHRKINR', 'PLVSLVLAGALISAT', 'PQESKA', 'MEKTHRKINRPLVSLVLAGALISATPQESKA', 'AFFTTVIIPAIIGGIA', 'MEKTHRKINRPLVSLVLAGALISATPQESKAAFFTTVIIPAIIGGIASGVAAGTASGLLSWGLKQAEQAN']
['K7Y329', 'SP', '0.967850', 'MEKTHRKINR', 'PLVSLVLAGALISAT', 'PQESKA', 'MEKTHRKINRPLVSLVLAGALISATPQESKA', 31, 'AFFTTVIIPAIIGGIA', 'MEKTHRKINRPLVSLVLAGALISATPQESKAAFFTTVIIPAIIGGIASGVAAGTASGLLSWGLKQAEQAN', 3.1, 1.946666666666667, -0.11612903225806462]
EEEEEEEEEEE _K7YBY9_ ['MELQQTHRKINR', 'PIISLALVGALIGT', 'ELGA', 'MELQQTHRKINRPIISLALVGALIGTELGA', 'NTPNDPIHSESRAFFT', 'MELQQTHRKINRPIISLALVGALIGTELGAN

EEEEEEEEEEE _O34111_ ['MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE']
['O34111', 'SP', '0.952925', 'MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 33, 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHAAFFTTVIIPAIVGGIATGTAVGTVSGLLSWGLKQAEE', 2.1, 2.1066666666666665, 0.027272727272727372]
EEEEEEEEEEE _O85370_ ['MK', 'ALKITGLSLIISATLA', 'AQAGA', 'MKALKITGLSLIISATLAAQAGA', 'AEPIYPDQLRLFSLGE', 'MKALKITGLSLIISATLAAQAGAAEPIYPDQLRLFSLGEDVCGADYRPINREEAQSVRNNIVAMMGQWQI']
['O85370', 'SP', '0.999108', 'MK', 'ALKITGLSLIISATLA', 'AQAGA', 'MKALKITGLSLIISATLAAQAGA', 23, 'AEPIYPDQLRLFSLGE', 'MKALKITGLSLIISATLAAQAGAAEPIYPDQLRLFSLGEDVCGADYRPINREEAQSVRNNIVAMMGQWQI', 1, 1.675, 1.1434782608695655]
EEEEEEEEEEE _O87018_ ['MEIQQTHRKINR', 'PLVSLALVGALVSIT', 'PQQSHA', 'MEIQQTHRKINRPLVSLALVGALVSITPQQSHA', 'AFFTTVIIPAIVGGIA', 'MEIQQTHRKINRPLVSLALVGALV

EEEEEEEEEEE _Q5WPW9_ ['MKC', 'ILFKWVLCLLLGFSSV', 'SYS', 'MKCILFKWVLCLLLGFSSVSYS', 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYSREFTIDFSTQQSYVSSLNSIRTEISTPLEHISQGTTSVSVINHTPPGS']
['Q5WPW9', 'SP', '0.999204', 'MKC', 'ILFKWVLCLLLGFSSV', 'SYS', 'MKCILFKWVLCLLLGFSSVSYS', 22, 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYSREFTIDFSTQQSYVSSLNSIRTEISTPLEHISQGTTSVSVINHTPPGS', 1, 2.075, 1.4000000000000001]
[[0.0e+000 0.0e+000 0.0e+000 0.0e+000 0.0e+000 0.0e+000 0.0e+000 4.9e-324
  9.9e-324 9.9e-324 1.5e-323 2.0e-323 2.0e-323]]
[[0.0e+000 0.0e+000 0.0e+000 0.0e+000 0.0e+000 0.0e+000 0.0e+000 4.9e-324
  9.9e-324 9.9e-324 1.5e-323 2.0e-323 2.0e-323]]
EEEEEEEEEEE _Q5WPY1_ ['MKC', 'ILFKWVLCLLLGF', 'SSVSYF', 'MKCILFKWVLCLLLGFSSVSYF', 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYFREFTIDFSTQQSYVSSLNSIRTEISTPLEHISQGTTSVSVINHTPPGS']
['Q5WPY1', 'SP', '0.560646', 'MKC', 'ILFKWVLCLLLGF', 'SSVSYF', 'MKCILFKWVLCLLLGFSSVSYF', 22, 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYFREFTIDFSTQQSYVSSLNSIRTEISTPLEHISQGTTSVSVINHTPPGS'

EEEEEEEEEEE _Q8GGK8_ ['MKC', 'ILFKWVLCLLLGFSSV', 'SYS', 'MKCILFKWVLCLLLGFSSVSYS', 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYSREFTIDFSTQQSYVSSLNSIRTEISTPLEHISQGTTSVSVINHTPPGS']
['Q8GGK8', 'SP', '0.999204', 'MKC', 'ILFKWVLCLLLGFSSV', 'SYS', 'MKCILFKWVLCLLLGFSSVSYS', 22, 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYSREFTIDFSTQQSYVSSLNSIRTEISTPLEHISQGTTSVSVINHTPPGS', 1, 2.075, 1.4000000000000001]
EEEEEEEEEEE _Q8GGK9_ ['MKC', 'ILFKWVLCLLLGFSSV', 'SYS', 'MKCILFKWVLCLLLGFSSVSYS', 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYSREFTIDFSTQQSYVSSLNSIRTEISTPLEHISQGTTSVSVINHTPPGS']
['Q8GGK9', 'SP', '0.999204', 'MKC', 'ILFKWVLCLLLGFSSV', 'SYS', 'MKCILFKWVLCLLLGFSSVSYS', 22, 'REFTIDFSTQQSYVSS', 'MKCILFKWVLCLLLGFSSVSYSREFTIDFSTQQSYVSSLNSIRTEISTPLEHISQGTTSVSVINHTPPGS', 1, 2.075, 1.4000000000000001]
EEEEEEEEEEE _Q8GN63_ ['MKK', 'LKITGLSLIISGLLM', 'AQAHA', 'MKKLKITGLSLIISGLLMAQAHA', 'AEPVYPDQLRLFSLGQ', 'MKKLKITGLSLIISGLLMAQAHAAEPVYPDQLRLFSLGQEVCGDKYRPITREEAQSVKSNIVNMMGQWQI']
['Q8GN63', 'SP', '0.999103', 'MKK', 

EEEEEEEEEEE _Q9ZHU7_ ['MELQQTHRKINR', 'PLVSLALVGLLVSIT', 'PQKSHA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHA', 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHAAFFTTVIIPAIVGGIATGAAAGTVSGFLAGGLKQAEE']
['Q9ZHU7', 'SP', '0.991455', 'MELQQTHRKINR', 'PLVSLALVGLLVSIT', 'PQKSHA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHA', 33, 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHAAFFTTVIIPAIVGGIATGAAAGTVSGFLAGGLKQAEE', 2.1, 2.2399999999999998, 0.05454545454545444]
EEEEEEEEEEE _Q9ZHU8_ ['MELQQTHRKINR', 'PLVSLALVGLLVSIT', 'PQKSHA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHA', 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHAAFFTTVIIPAIVGGIATGAAVGTVSGLLGWGLKQAEE']
['Q9ZHU8', 'SP', '0.952868', 'MELQQTHRKINR', 'PLVSLALVGLLVSIT', 'PQKSHA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHA', 33, 'AFFTTVIIPAIVGGIA', 'MELQQTHRKINRPLVSLALVGLLVSITPQKSHAAFFTTVIIPAIVGGIATGAAVGTVSGLLGWGLKQAEE', 2.1, 2.2399999999999998, 0.05454545454545444]
EEEEEEEEEEE _Q9ZHU9_ ['MELQQTHRKINR', 'PLVSLALVGLLVSIT', 'PQKSHA', 'MELQQTHRKINRPLVSLALVGLLVSI

EEEEEEEEEEE _A0A060AKM1_ ['MKK', 'IILALVLMLFSFC', 'TLG', 'MKKIILALVLMLFSFCTLG', 'QETASMHLDDTLSAPI', 'MKKIILALVLMLFSFCTLGQETASMHLDDTLSAPIAAEINRKACDTQTPSPSEENDDWCCEVCCNPACA']
['A0A060AKM1', 'SP', '0.999018', 'MKK', 'IILALVLMLFSFC', 'TLG', 'MKKIILALVLMLFSFCTLG', 19, 'QETASMHLDDTLSAPI', 'MKKIILALVLMLFSFCTLGQETASMHLDDTLSAPIAAEINRKACDTQTPSPSEENDDWCCEVCCNPACA', 2, 3.0307692307692307, 1.9052631578947365]
EEEEEEEEEEE _A0A068ADS1_ ['MVK', 'IIFVFFIFLSSF', 'SYA', 'MVKIIFVFFIFLSSFSYA', 'NDDKLYRADSRPPDEI', 'MVKIIFVFFIFLSSFSYANDDKLYRADSRPPDEIKQSGGLMP']
['A0A068ADS1', 'SP', '0.753461', 'MVK', 'IIFVFFIFLSSF', 'SYA', 'MVKIIFVFFIFLSSFSYA', 18, 'NDDKLYRADSRPPDEI', 'MVKIIFVFFIFLSSFSYANDDKLYRADSRPPDEIKQSGGLMP', 1, 2.8249999999999997, 1.9888888888888892]
EEEEEEEEEEE _A0A076F8D7_ ['MK', 'IFIKLLLVISFVI', 'PSFS', 'MKIFIKLLLVISFVIPSFS', 'AEDENVMPLVSLRSLT', 'MKIFIKLLLVISFVIPSFSAEDENVMPLVSLRSLTTGILIAYEDNAPNLIDRNWRLKDVILPFEISKHYP']
['A0A076F8D7', 'SP', '0.998037', 'MK', 'IFIKLLLVISFVI', 'PSFS', 'MKIFIKLLLVISFVIPSFS

EEEEEEEEEEE _A0A0T9Q4I3_ ['MKK', 'IVFVLTLMLFSF', 'GTLG', 'MKKIVFVLTLMLFSFGTLG', 'QETASGQVGDVSSSTI', 'MKKIVFVLTLMLFSFGTLGQETASGQVGDVSSSTIATEVSEAECGTQSATTQGENEWDWCCELCCNPACF']
['A0A0T9Q4I3', 'SP', '0.998919', 'MKK', 'IVFVLTLMLFSF', 'GTLG', 'MKKIVFVLTLMLFSFGTLG', 19, 'QETASGQVGDVSSSTI', 'MKKIVFVLTLMLFSFGTLGQETASGQVGDVSSSTIATEVSEAECGTQSATTQGENEWDWCCELCCNPACF', 2, 2.7583333333333333, 1.5526315789473688]
EEEEEEEEEEE _A0A0T9Q5J1_ ['MRK', 'IVFVLVLMLSSF', 'GTFG', 'MRKIVFVLVLMLSSFGTFG', 'QETASRALSDALSTPI', 'MRKIVFVLVLMLSSFGTFGQETASRALSDALSTPIAAEVNKKACDTQPQIPQGDIDWSVCCEICCIPACF']
['A0A0T9Q5J1', 'SP', '0.998843', 'MRK', 'IVFVLVLMLSSF', 'GTFG', 'MRKIVFVLVLMLSSFGTFG', 19, 'QETASRALSDALSTPI', 'MRKIVFVLVLMLSSFGTFGQETASRALSDALSTPIAAEVNKKACDTQPQIPQGDIDWSVCCEICCIPACF', 2, 2.8666666666666667, 1.536842105263158]
EEEEEEEEEEE _A0A0T9QS05_ ['MKKNKFKLSPAGK', 'LTVILSLIITPVTF', 'SYA', 'MKKNKFKLSPAGKLTVILSLIITPVTFSYA', 'SGIVATTPNDIPGYLV', 'MKKNKFKLSPAGKLTVILSLIITPVTFSYASGIVATTPNDIPGYLVSFSDRENMTPEVQQVPGAATEIKI']
[

EEEEEEEEEEE _A0A1J5UMI1_ ['MKK', 'YYLLIALLIWTF', 'ADASA', 'MKKYYLLIALLIWTFADASA', 'TIFPSEDKQREIFTNN', 'MKKYYLLIALLIWTFADASATIFPSEDKQREIFTNNSASERDKIKSVYSEKMRKQTEGLEITSEEKLYFD']
['A0A1J5UMI1', 'SP', '0.996024', 'MKK', 'YYLLIALLIWTF', 'ADASA', 'MKKYYLLIALLIWTFADASA', 20, 'TIFPSEDKQREIFTNN', 'MKKYYLLIALLIWTFADASATIFPSEDKQREIFTNNSASERDKIKSVYSEKMRKQTEGLEITSEEKLYFD', 2, 2.0500000000000003, 0.9900000000000002]
EEEEEEEEEEE _A0A1M4N4K8_ ['MSG', 'AVMFLMGLASLGILGGL', 'SGG', 'MSGAVMFLMGLASLGILGGLSGG', 'MGSEDSGESDDDRIVG', 'MSGAVMFLMGLASLGILGGLSGGMGSEDSGESDDDRIVGDAEDDTLEGADGNDLLLGEEGNDLLDGGQGN']
['A0A1M4N4K8', 'SP', '0.569459', 'MSG', 'AVMFLMGLASLGILGGL', 'SGG', 'MSGAVMFLMGLASLGILGGLSGG', 23, 'MGSEDSGESDDDRIVG', 'MSGAVMFLMGLASLGILGGLSGGMGSEDSGESDDDRIVGDAEDDTLEGADGNDLLLGEEGNDLLDGGQGN', 0, 2.088235294117647, 1.504347826086957]
EEEEEEEEEEE _A0A1N6LRN8_ ['MR', 'FLSFFVIIFLIFFGAA', 'RNTSA', 'MRFLSFFVIIFLIFFGAARNTSA', 'QPAQIPEFVYRADTRA', 'MRFLSFFVIIFLIFFGAARNTSAQPAQIPEFVYRADTRAPNEVRRTGGFIARGVDASRPGTIVDLSLYN

EEEEEEEEEEE _A0A2A9K9V9_ ['MKKFTPFPNIKK', 'LFSIFIMTLWV', 'SHSAIA', 'MKKFTPFPNIKKLFSIFIMTLWVSHSAIA', 'ASPESFTKISISNVWS', 'MKKFTPFPNIKKLFSIFIMTLWVSHSAIAASPESFTKISISNVWSGAIIVVWATGGNSWLWGYTPYDAQS']
['A0A2A9K9V9', 'SP', '0.998414', 'MKKFTPFPNIKK', 'LFSIFIMTLWV', 'SHSAIA', 'MKKFTPFPNIKKLFSIFIMTLWVSHSAIA', 29, 'ASPESFTKISISNVWS', 'MKKFTPFPNIKKLFSIFIMTLWVSHSAIAASPESFTKISISNVWSGAIIVVWATGGNSWLWGYTPYDAQS', 4, 2.354545454545455, 0.6275862068965516]
EEEEEEEEEEE _A0A2C9NZ91_ ['MTK', 'LIFIMAILL', 'SGSVFA', 'MTKLIFIMAILLSGSVFA', 'NNLEFNLSFKDMQSYV', 'MTKLIFIMAILLSGSVFANNLEFNLSFKDMQSYVQSLQRIREGLGHSMPNVVVGTTSVYQINAGATNDGV']
['A0A2C9NZ91', 'SP', '0.999007', 'MTK', 'LIFIMAILL', 'SGSVFA', 'MTKLIFIMAILLSGSVFA', 18, 'NNLEFNLSFKDMQSYV', 'MTKLIFIMAILLSGSVFANNLEFNLSFKDMQSYVQSLQRIREGLGHSMPNVVVGTTSVYQINAGATNDGV', 1, 3.488888888888889, 1.9722222222222219]
EEEEEEEEEEE _A0A2D0IXY7_ ['MSEQHSPKMK', 'YLIYLITYLTAVSPL', 'HPAIG', 'MSEQHSPKMKYLIYLITYLTAVSPLHPAIG', 'KTNPDAHGLQEGDIVK', 'MSEQHSPKMKYLIYLITYLTAVSPLHPAIGKTNPDAHGL

EEEEEEEEEEE _A0A2N7JWN6_ ['MNTR', 'FLLLLCCLSF', 'AGFS', 'MNTRFLLLLCCLSFAGFS', 'QPFDTLKQPNRSEEEI', 'MNTRFLLLLCCLSFAGFSQPFDTLKQPNRSEEEIIQLAEDFKDWSKASSGWRYSFITANEKEAVEDFSIS']
['A0A2N7JWN6', 'SP', '0.999122', 'MNTR', 'FLLLLCCLSF', 'AGFS', 'MNTRFLLLLCCLSFAGFS', 18, 'QPFDTLKQPNRSEEEI', 'MNTRFLLLLCCLSFAGFSQPFDTLKQPNRSEEEIIQLAEDFKDWSKASSGWRYSFITANEKEAVEDFSIS', 1, 2.88, 1.4111111111111112]
EEEEEEEEEEE _A0A2N7JWQ3_ ['MKK', 'FIWVLCLSLLTSF', 'SVQS', 'MKKFIWVLCLSLLTSFSVQS', 'EEFAEFKGRSLSHETQ', 'MKKFIWVLCLSLLTSFSVQSEEFAEFKGRSLSHETQQQIADNLMDWATTHHVNAEKKMVPEEYSAIKNYG']
['A0A2N7JWQ3', 'SP', '0.998780', 'MKK', 'FIWVLCLSLLTSF', 'SVQS', 'MKKFIWVLCLSLLTSFSVQS', 20, 'EEFAEFKGRSLSHETQ', 'MKKFIWVLCLSLLTSFSVQSEEFAEFKGRSLSHETQQQIADNLMDWATTHHVNAEKKMVPEEYSAIKNYG', 2, 2.2153846153846155, 1.1]
EEEEEEEEEEE _A0A2N7K5Q5_ ['MSYSH', 'IHYMLALCLTAIPF', 'TSLA', 'MSYSHIHYMLALCLTAIPFTSLA', 'ALKYNAQYYSADSIEA', 'MSYSHIHYMLALCLTAIPFTSLAALKYNAQYYSADSIEAVLEDMSDPGFESNYGNFAKIRSQLHQQAGMQ']
['A0A2N7K5Q5', 'SP', '0.998814', 'MSYSH', '

EEEEEEEEEEE _A0A329VXE3_ ['MSKRNNSVAR', 'GTSYLLIYLTAIQPL', 'HPAIA', 'MSKRNNSVARGTSYLLIYLTAIQPLHPAIA', 'AGITPDNNHTQVQNQG', 'MSKRNNSVARGTSYLLIYLTAIQPLHPAIAAGITPDNNHTQVQNQGNVPVVNIATPNNAGISHNTYKEFN']
['A0A329VXE3', 'SP', '0.998834', 'MSKRNNSVAR', 'GTSYLLIYLTAIQPL', 'HPAIA', 'MSKRNNSVARGTSYLLIYLTAIQPLHPAIA', 30, 'AGITPDNNHTQVQNQG', 'MSKRNNSVARGTSYLLIYLTAIQPLHPAIAAGITPDNNHTQVQNQGNVPVVNIATPNNAGISHNTYKEFN', 3, 1.0466666666666666, 0.18]
EEEEEEEEEEE _A0A329W4Q1_ ['MDRRNGPMAR', 'GACYLLIYLTAVYPL', 'HPAIA', 'MDRRNGPMARGACYLLIYLTAVYPLHPAIA', 'AGIAPDNNRTQVQNQG', 'MDRRNGPMARGACYLLIYLTAVYPLHPAIAAGIAPDNNRTQVQNQGNVPIVNIATPNGAGISHNTYKEFN']
['A0A329W4Q1', 'SP', '0.999085', 'MDRRNGPMAR', 'GACYLLIYLTAVYPL', 'HPAIA', 'MDRRNGPMARGACYLLIYLTAVYPLHPAIA', 30, 'AGIAPDNNRTQVQNQG', 'MDRRNGPMARGACYLLIYLTAVYPLHPAIAAGIAPDNNRTQVQNQGNVPIVNIATPNGAGISHNTYKEFN', 2, 1.5599999999999998, 0.3266666666666668]
EEEEEEEEEEE _A0A329W852_ ['MDRRNGPMAR', 'GACYLLIYLTAVYPL', 'HPAIA', 'MDRRNGPMARGACYLLIYLTAVYPLHPAIA', 'AGIAPDNNRTQVQNQG', 

EEEEEEEEEEE _A0A3T5LC12_ ['MIK', 'HVLLFFVFISF', 'SVSA', 'MIKHVLLFFVFISFSVSA', 'NDFFRADSRTPDEIRR', 'MIKHVLLFFVFISFSVSANDFFRADSRTPDEIRRAGGLLPRGQQEAYERGTPININLYEHARGTVTGNTR']
['A0A3T5LC12', 'SP', '0.998959', 'MIK', 'HVLLFFVFISF', 'SVSA', 'MIKHVLLFFVFISFSVSA', 18, 'NDFFRADSRTPDEIRR', 'MIKHVLLFFVFISFSVSANDFFRADSRTPDEIRRAGGLLPRGQQEAYERGTPININLYEHARGTVTGNTR', 1, 2.518181818181818, 1.9222222222222223]
EEEEEEEEEEE _A0A3W4GSP4_ ['MKK', 'LMLAIFISVLSF', 'PSFS', 'MKKLMLAIFISVLSFPSFS', 'QKAESVDSSKEKITLD', 'MKKLMLAIFISVLSFPSFSQKAESVDSSKEKITLDTKKCNVVKNNSEKKSENMNNTFYCCELCCNPACAG']
['A0A3W4GSP4', 'SP', '0.999033', 'MKK', 'LMLAIFISVLSF', 'PSFS', 'MKKLMLAIFISVLSFPSFS', 19, 'QKAESVDSSKEKITLD', 'MKKLMLAIFISVLSFPSFSQKAESVDSSKEKITLDTKKCNVVKNNSEKKSENMNNTFYCCELCCNPACAG', 2, 2.6916666666666664, 1.3684210526315788]
EEEEEEEEEEE _A0A3Z5DTE1_ ['MK', 'IIIFRVLTFFFVIFSV', 'NVVA', 'MKIIIFRVLTFFFVIFSVNVVA', 'KEFTLDFSTAKTYVDS', 'MKIIIFRVLTFFFVIFSVNVVAKEFTLDFSTAKTYVDSLNVIRSAIGIPLQTISSGGTSLLMIDSGTGDN']
['A0A3Z5DTE1', 'SP', 

EEEEEEEEEEE _A0A5C4RM01_ ['MSKHNNSVAR', 'GTSYLLIYLTAIQPL', 'HPAIA', 'MSKHNNSVARGTSYLLIYLTAIQPLHPAIA', 'AGITPDNNHTQVQNQS', 'MSKHNNSVARGTSYLLIYLTAIQPLHPAIAAGITPDNNHTQVQNQSNIPVVNIATPNNAGISHNTYKEFN']
['A0A5C4RM01', 'SP', '0.998530', 'MSKHNNSVAR', 'GTSYLLIYLTAIQPL', 'HPAIA', 'MSKHNNSVARGTSYLLIYLTAIQPLHPAIA', 30, 'AGITPDNNHTQVQNQS', 'MSKHNNSVARGTSYLLIYLTAIQPLHPAIAAGITPDNNHTQVQNQSNIPVVNIATPNNAGISHNTYKEFN', 2.1, 1.0466666666666666, 0.22333333333333322]
EEEEEEEEEEE _A0A5C5CFI6_ ['MNNQPPVRLTYR', 'LLSYLISALLAGQPLL', 'PAMA', 'MNNQPPVRLTYRLLSYLISALLAGQPLLPAMA', 'ATLTPQGKATTDKAAN', 'MNNQPPVRLTYRLLSYLISALLAGQPLLPAMAATLTPQGKATTDKAANGVPVVNINTPNGTGISHNQFKD']
['A0A5C5CFI6', 'SP', '0.998351', 'MNNQPPVRLTYR', 'LLSYLISALLAGQPLL', 'PAMA', 'MNNQPPVRLTYRLLSYLISALLAGQPLLPAMA', 32, 'ATLTPQGKATTDKAAN', 'MNNQPPVRLTYRLLSYLISALLAGQPLLPAMAATLTPQGKATTDKAANGVPVVNINTPNGTGISHNQFKD', 2, 1.6437500000000003, 0.48125000000000007]
EEEEEEEEEEE _A0A5C5H8E5_ ['MKHNNFRLSAAGK', 'LTAAQAIILAA', 'SSSAYA', 'MKHNNFRLSAAGKLTAAQAIILAASSS

EEEEEEEEEEE _A0A6M8M6V7_ ['MDVRQFAFLARQPSAALKPRDSFFGLPKRG', 'LVLILINALFWQPL', 'LAQA', 'MDVRQFAFLARQPSAALKPRDSFFGLPKRGLVLILINALFWQPLLAQA', 'EGIVVSAPGTTVGQAG', 'MDVRQFAFLARQPSAALKPRDSFFGLPKRGLVLILINALFWQPLLAQAEGIVVSAPGTTVGQAGNGVPVV']
['A0A6M8M6V7', 'SP', '0.818962', 'MDVRQFAFLARQPSAALKPRDSFFGLPKRG', 'LVLILINALFWQPL', 'LAQA', 'MDVRQFAFLARQPSAALKPRDSFFGLPKRGLVLILINALFWQPLLAQA', 48, 'EGIVVSAPGTTVGQAG', 'MDVRQFAFLARQPSAALKPRDSFFGLPKRGLVLILINALFWQPLLAQAEGIVVSAPGTTVGQAGNGVPVV', 4, 1.9500000000000004, 0.41875]
EEEEEEEEEEE _A0A6M8SYA4_ ['MKKN', 'IWMIVISTGLALGGA', 'PTEGIVA', 'MKKNIWMIVISTGLALGGAPTEGIVA', 'GTLSPYINTEIKKATE', 'MKKNIWMIVISTGLALGGAPTEGIVAGTLSPYINTEIKKATEGNDTANILAHGVWGAMEAASQGGSALGG']
['A0A6M8SYA4', 'SP', '0.998861', 'MKKN', 'IWMIVISTGLALGGA', 'PTEGIVA', 'MKKNIWMIVISTGLALGGAPTEGIVA', 26, 'GTLSPYINTEIKKATE', 'MKKNIWMIVISTGLALGGAPTEGIVAGTLSPYINTEIKKATEGNDTANILAHGVWGAMEAASQGGSALGG', 2, 1.8133333333333337, 0.8499999999999998]
EEEEEEEEEEE _A0A6M8T0G4_ ['MKK', 'NIWMIVISTGL', 'ALG', 'MKKNIWM

EEEEEEEEEEE _A0A7U9KKI1_ ['MKNNNFRLSGAGK', 'LAASLAIILASLG', 'STYA', 'MKNNNFRLSGAGKLAASLAIILASLGSTYA', 'GEIVAANGANGPGVST', 'MKNNNFRLSGAGKLAASLAIILASLGSTYAGEIVAANGANGPGVSTAGNGAQVVNIVTPNDQGLSHNQYQ']
['A0A7U9KKI1', 'SP', '0.998900', 'MKNNNFRLSGAGK', 'LAASLAIILASLG', 'STYA', 'MKNNNFRLSGAGKLAASLAIILASLGSTYA', 30, 'GEIVAANGANGPGVST', 'MKNNNFRLSGAGKLAASLAIILASLGSTYAGEIVAANGANGPGVSTAGNGAQVVNIVTPNDQGLSHNQYQ', 3, 2.2615384615384615, 0.4766666666666666]
EEEEEEEEEEE _A0A7U9PVJ6_ ['MKHNNNQNLNIPQR', 'LLSYTLCALLAGQPLL', 'PALA', 'MKHNNNQNLNIPQRLLSYTLCALLAGQPLLPALA', 'EGVNVAEGNTRVDQAA', 'MKHNNNQNLNIPQRLLSYTLCALLAGQPLLPALAEGVNVAEGNTRVDQAANGVPVINIATPNQAGISHNK']
['A0A7U9PVJ6', 'SP', '0.998867', 'MKHNNNQNLNIPQR', 'LLSYTLCALLAGQPLL', 'PALA', 'MKHNNNQNLNIPQRLLSYTLCALLAGQPLLPALA', 34, 'EGVNVAEGNTRVDQAA', 'MKHNNNQNLNIPQRLLSYTLCALLAGQPLLPALAEGVNVAEGNTRVDQAANGVPVINIATPNQAGISHNK', 2.1, 1.5250000000000001, 0.0794117647058824]
EEEEEEEEEEE _A0A7V7JCM6_ ['MMKQDQVRFSQR', 'ALSALLSVLLATQPLL', 'PAVA', 'MMKQDQVRFSQRALSALLS

EEEEEEEEEEE _A0A7Z8ZTD8_ ['MVVADIARSGRAGTSLSSRTGYPHRQRICR', 'VTPLAFSLWLASGMV', 'HSVSA', 'MVVADIARSGRAGTSLSSRTGYPHRQRICRVTPLAFSLWLASGMVHSVSA', 'AGIVADHGAPGHQQPT', 'MVVADIARSGRAGTSLSSRTGYPHRQRICRVTPLAFSLWLASGMVHSVSAAGIVADHGAPGHQQPTITQT']
['A0A7Z8ZTD8', 'SP', '0.577654', 'MVVADIARSGRAGTSLSSRTGYPHRQRICR', 'VTPLAFSLWLASGMV', 'HSVSA', 'MVVADIARSGRAGTSLSSRTGYPHRQRICRVTPLAFSLWLASGMVHSVSA', 50, 'AGIVADHGAPGHQQPT', 'MVVADIARSGRAGTSLSSRTGYPHRQRICRVTPLAFSLWLASGMVHSVSAAGIVADHGAPGHQQPTITQT', 5.1, 1.5266666666666664, 0.184]
EEEEEEEEEEE _A0A7Z9D171_ ['MVVADIARSGRAGTSLSSRTGYPHRQRICR', 'VTPLAFSLWLASGMV', 'HSVSA', 'MVVADIARSGRAGTSLSSRTGYPHRQRICRVTPLAFSLWLASGMVHSVSA', 'AGIVADHGAPGHQQPT', 'MVVADIARSGRAGTSLSSRTGYPHRQRICRVTPLAFSLWLASGMVHSVSAAGIVADHGAPGHQQPTITQT']
['A0A7Z9D171', 'SP', '0.577654', 'MVVADIARSGRAGTSLSSRTGYPHRQRICR', 'VTPLAFSLWLASGMV', 'HSVSA', 'MVVADIARSGRAGTSLSSRTGYPHRQRICRVTPLAFSLWLASGMVHSVSA', 50, 'AGIVADHGAPGHQQPT', 'MVVADIARSGRAGTSLSSRTGYPHRQRICRVTPLAFSLWLASGMVHSVSAAGIVADHGAPGHQQPTITQT', 5.

EEEEEEEEEEE _E3PPC0_ ['MKK', 'SILFIFLSVLSF', 'SPFA', 'MKKSILFIFLSVLSFSPFA', 'QDAKPAGSSKEKITLE', 'MKKSILFIFLSVLSFSPFAQDAKPAGSSKEKITLESKKCNIVKKNNESSPESMNSSNYCCELCCNPACTG']
['E3PPC0', 'SP', '0.999169', 'MKK', 'SILFIFLSVLSF', 'SPFA', 'MKKSILFIFLSVLSFSPFA', 19, 'QDAKPAGSSKEKITLE', 'MKKSILFIFLSVLSFSPFAQDAKPAGSSKEKITLESKKCNIVKKNNESSPESMNSSNYCCELCCNPACTG', 2, 2.5500000000000003, 1.4157894736842105]
EEEEEEEEEEE _E6YVQ4_ ['MFR', 'LFITLIIVIFSHF', 'VFA', 'MFRLFITLIIVIFSHFVFA', 'DDDRIVYRAAMDTPKD', 'MFRLFITLIIVIFSHFVFADDDRIVYRAAMDTPKDLKAAGGFFPRGMDRTRPNQPPPDISLWNHVNGTGT']
['E6YVQ4', 'SP', '0.999131', 'MFR', 'LFITLIIVIFSHF', 'VFA', 'MFRLFITLIIVIFSHFVFA', 19, 'DDDRIVYRAAMDTPKD', 'MFRLFITLIIVIFSHFVFADDDRIVYRAAMDTPKDLKAAGGFFPRGMDRTRPNQPPPDISLWNHVNGTGT', 1, 2.576923076923077, 2.2368421052631575]
EEEEEEEEEEE _E9CK44_ ['MGR', 'LAYSVALLFLI', 'RSSACFA', 'MGRLAYSVALLFLIRSSACFA', 'VLPLEVYRSVMEDPEL', 'MGRLAYSVALLFLIRSSACFAVLPLEVYRSVMEDPELVKADDGFLPKGMDGTRPNQPIPSVSLYNHAMGS']
['E9CK44', 'SP', '0.999149', 'MGR', 'LA

EEEEEEEEEEE _V5SKL9_ ['MVK', 'IIFVFFIFLSSF', 'SYA', 'MVKIIFVFFIFLSSFSYA', 'NDDKLYRADSRPPDEI', 'MVKIIFVFFIFLSSFSYANDDKLYRADSRPPDEIKQSGGLMPRGQSEYFDRGTQMNINLYDHARGTQTGF']
['V5SKL9', 'SP', '0.998963', 'MVK', 'IIFVFFIFLSSF', 'SYA', 'MVKIIFVFFIFLSSFSYA', 18, 'NDDKLYRADSRPPDEI', 'MVKIIFVFFIFLSSFSYANDDKLYRADSRPPDEIKQSGGLMPRGQSEYFDRGTQMNINLYDHARGTQTGF', 1, 2.8249999999999997, 1.9888888888888892]
EEEEEEEEEEE _V5YUX5_ ['MQDPTAIYGETWMKNNNFRLSAAGK', 'LAAALAIILAA', 'SAGAYA', 'MQDPTAIYGETWMKNNNFRLSAAGKLAAALAIILAASAGAYA', 'AEIVAANGANGPGVST', 'MQDPTAIYGETWMKNNNFRLSAAGKLAAALAIILAASAGAYAAEIVAANGANGPGVSTAATGAQVVDIVA']
['V5YUX5', 'SP', '0.992186', 'MQDPTAIYGETWMKNNNFRLSAAGK', 'LAAALAIILAA', 'SAGAYA', 'MQDPTAIYGETWMKNNNFRLSAAGKLAAALAIILAASAGAYA', 42, 'AEIVAANGANGPGVST', 'MQDPTAIYGETWMKNNNFRLSAAGKLAAALAIILAASAGAYAAEIVAANGANGPGVSTAATGAQVVDIVA', 1, 2.8363636363636364, 0.3404761904761906]
EEEEEEEEEEE _V8CLS4_ ['MR', 'LAVVVLVLLGV', 'CWA', 'MRLAVVVLVLLGVCWA', 'QEVGVSVDLGIGDSPT', 'MRLAVVVLVLLGVCWAQEVGVSVDLGIGDSPTP

## Output File Generation

The final processed data (data_sp) is saved in a data frame and then in a  xlsx file (Excel file) providing a clean and accessible format. 

In [None]:
# Import the pandas library
import pandas as pd
# Convert the list of data into a pandas DataFrame
df = pd.DataFrame(data_sp)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,A0A1S4NYE3,SP,0.974034,MHQPPVRFPYR,LLSYLISTIIAGQPLL,PAVGA,MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGA,32.0,VITPQNGAGMDKAANG,MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGAVITPQNGAGMDKAA...,2.1,1.575000,0.565625
1,D5CBA0,SP,0.998014,MMKQDQVRFSQR,ALSALLSVLLATQPLL,PAVA,MMKQDQVRFSQRALSALLSVLLATQPLLPAVA,32.0,ASITPSGNTQMDKAAN,MMKQDQVRFSQRALSALLSVLLATQPLLPAVAASITPSGNTQMDKA...,2.0,1.800000,0.565625
2,P01555,SP,0.998963,MVK,IIFVFFIFLSSF,SYA,MVKIIFVFFIFLSSFSYA,18.0,NDDKLYRADSRPPDEI,MVKIIFVFFIFLSSFSYANDDKLYRADSRPPDEIKQSGGLMPRGQS...,1.0,2.825000,1.988889
3,P01556,SP,0.998953,MIK,LKFGVFFTVLLSS,AYAHG,MIKLKFGVFFTVLLSSAYAHG,21.0,TPQNITDLCAEYHNTQ,MIKLKFGVFFTVLLSSAYAHGTPQNITDLCAEYHNTQIYTLNDKIF...,1.0,1.661538,1.085714
4,P04977,SP,0.998410,MRCTRAIRQTART,GWLTWLAILAVTAPV,TSPAWA,MRCTRAIRQTARTGWLTWLAILAVTAPVTSPAWA,34.0,DDPPATVYRYDSRPPE,MRCTRAIRQTARTGWLTWLAILAVTAPVTSPAWADDPPATVYRYDS...,4.0,1.633333,0.382353
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2197,W6HYF7,SP,0.998774,MTN,FIQTVFKIGLFSIFTFFSQL,SSA,MTNFIQTVFKIGLFSIFTFFSQLSSA,26.0,DIDAELIDDMVQVNPL,MTNFIQTVFKIGLFSIFTFFSQLSSADIDAELIDDMVQVNPLISLR...,0.0,1.390000,0.988462
2198,W6HZS4,SP,0.998695,MTN,FIQTVFKIGLFSIFTFF,SQFSSA,MTNFIQTVFKIGLFSIFTFFSQFSSA,26.0,DIDAELIDDMVQVNPL,MTNFIQTVFKIGLFSIFTFFSQFSSADIDAELIDDMVQVNPLISLR...,0.0,1.664706,0.950000
2199,W6HZU5,SP,0.998769,MRH,YIIYGFLLIACFY,STQA,MRHYIIYGFLLIACFYSTQA,20.0,NSDSLNPNLLTDTPTP,MRHYIIYGFLLIACFYSTQANSDSLNPNLLTDTPTPPGKVGEIVGE...,1.1,2.053846,0.885000
2200,W6IBI6,SP,0.998736,MRH,YIIYGFLLIACFY,STQA,MRHYIIYGFLLIACFYSTQA,20.0,NSDSLNPNLLTDTPTP,MRHYIIYGFLLIACFYSTQANSDSLNPNLLTDTPTPPGKVGEIVGE...,1.1,2.053846,0.885000


In [None]:
# Remove duplicate rows based on column 6 and drop any rows with missing values

toxin_database = df.drop_duplicates(subset=6).dropna()
toxin_database


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,A0A1S4NYE3,SP,0.974034,MHQPPVRFPYR,LLSYLISTIIAGQPLL,PAVGA,MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGA,32.0,VITPQNGAGMDKAANG,MHQPPVRFPYRLLSYLISTIIAGQPLLPAVGAVITPQNGAGMDKAA...,2.1,1.575000,0.565625
1,D5CBA0,SP,0.998014,MMKQDQVRFSQR,ALSALLSVLLATQPLL,PAVA,MMKQDQVRFSQRALSALLSVLLATQPLLPAVA,32.0,ASITPSGNTQMDKAAN,MMKQDQVRFSQRALSALLSVLLATQPLLPAVAASITPSGNTQMDKA...,2.0,1.800000,0.565625
2,P01555,SP,0.998963,MVK,IIFVFFIFLSSF,SYA,MVKIIFVFFIFLSSFSYA,18.0,NDDKLYRADSRPPDEI,MVKIIFVFFIFLSSFSYANDDKLYRADSRPPDEIKQSGGLMPRGQS...,1.0,2.825000,1.988889
3,P01556,SP,0.998953,MIK,LKFGVFFTVLLSS,AYAHG,MIKLKFGVFFTVLLSSAYAHG,21.0,TPQNITDLCAEYHNTQ,MIKLKFGVFFTVLLSSAYAHGTPQNITDLCAEYHNTQIYTLNDKIF...,1.0,1.661538,1.085714
4,P04977,SP,0.998410,MRCTRAIRQTART,GWLTWLAILAVTAPV,TSPAWA,MRCTRAIRQTARTGWLTWLAILAVTAPVTSPAWA,34.0,DDPPATVYRYDSRPPE,MRCTRAIRQTARTGWLTWLAILAVTAPVTSPAWADDPPATVYRYDS...,4.0,1.633333,0.382353
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2192,W3V795,SP,0.998635,MSKLNNSVVR,GTSYLLIYLTAIQPL,HPAIA,MSKLNNSVVRGTSYLLIYLTAIQPLHPAIA,30.0,AGITPDNNHTQVQNQG,MSKLNNSVVRGTSYLLIYLTAIQPLHPAIAAGITPDNNHTQVQNQG...,2.0,1.046667,0.536667
2193,W3V835,SP,0.999009,MDRCNNPMAR,GACYLLIYLTATYPL,HPAIA,MDRCNNPMARGACYLLIYLTATYPLHPAIA,30.0,AGITPDNNRTQVQNQG,MDRCNNPMARGACYLLIYLTATYPLHPAIAAGITPDNNRTQVQNQG...,1.0,1.233333,0.293333
2194,W3VCV6,SP,0.998977,MDKRSNPLVR,ATSYLLIYLTAIQPL,HPAIA,MDKRSNPLVRATSYLLIYLTAIQPLHPAIA,30.0,AGITPDNDRTQVQHQG,MDKRSNPLVRATSYLLIYLTAIQPLHPAIAAGITPDNDRTQVQHQG...,2.0,1.193333,0.293333
2197,W6HYF7,SP,0.998774,MTN,FIQTVFKIGLFSIFTFFSQL,SSA,MTNFIQTVFKIGLFSIFTFFSQLSSA,26.0,DIDAELIDDMVQVNPL,MTNFIQTVFKIGLFSIFTFFSQLSSADIDAELIDDMVQVNPLISLR...,0.0,1.390000,0.988462


In [None]:
# Save the cleaned data to an Excel file
toxin_database.to_excel("toxin_data.xlsx")