In [2]:
import pandas as pd
from Bio import SeqIO
import os
from io import StringIO
import subprocess
import random
import numpy as np

#### Standard AlphaFold

In [10]:
# Scompose the fasta files into the inputfiles

protein='ska3'
directory_output=f'/home/nicola/internship/colabfold-1.5.5/input_fastas/{protein}'
fasta_input=f'/home/nicola/internship/winghel/alignments/{protein}_to_predict_WHD.fasta'

for record in SeqIO.parse(fasta_input, "fasta"):
    # sequence=(f">{record.id}\n{record.seq}\n")
    basename=record.id
    output_file=os.path.join(directory_output, basename + '.fasta')

    with open(output_file, "w") as output_handle:
        SeqIO.write(record, output_handle, "fasta")

In [20]:
# Create MSA of the records
def reorder_msa_by_id(input_fasta_path, target_id):
    # Read the sequences from the input file
    records = list(SeqIO.parse(input_fasta_path, "fasta"))
    
    # Separate the target sequence and other sequences
    target_record = None
    other_records = []
    for record in records:
        if record.id == target_id:
            target_record = record
        else:
            other_records.append(record)
    
    # Ensure the target sequence was found
    if not target_record:
        raise ValueError(f"Sequence with ID {target_id} not found in the provided MSA.")
    
    # Reorder the sequences with the target sequence first
    reordered_records = [target_record] + other_records
    return(reordered_records)


#### Strange domains AlfaPhold (MSA)

In [2]:
no_WHD_prot={'ask1': ['COEREV004001', 'DIALUT000917', 'PALBIL007647', 'PLABRA003073'], 
 'dam1': ['BIGNAT010037', 'GALSUL003538', 'GONPAC051218', 'RIGRAM016075'], 
 'ska2': ['CHOSPE041422'],
 'ska3': ['DIPROT040626','EUGLON011654', 'GEFOKE024180', 'LENLIM009702', 'LEUCOM016550', 'MANSPH005803', 'MONEXI012361', 'RHIIRR025659', 'TELSPE030098', 'TRIADH009951'],
 'spc19': ['COLSPE004231', 'MANSPH002854', 'RAMBRE007886'],
 'spc34': ['AURLIM007031', 'COEREV001366', 'GONPAC028305', 'GUITHE009619'],
}

In [None]:
for protein in no_WHD_prot.keys():
    subprocess.run(['mkdir', '-p', f'/home/nicola/internship/colabfold-1.5.5/results_MSA/{protein}'])
    for id in no_WHD_prot[protein]:
        input_path = f"/home/nicola/internship/winghel/alignments/{protein}_curated_WHD_aligned_ginsi.fasta"
        output_path_fasta = f"/home/nicola/internship/colabfold-1.5.5/results_MSA/{protein}/{id}_MSA_temp.fasta"
        output_path_A3M = f"/home/nicola/internship/colabfold-1.5.5/results_MSA/{protein}/{id}_MSA.a3m"
        reformat='/home/nicola/internship/hhsuite/hh-suite/scripts/reformat.pl'


        # Reorder the MSA
        new_msa=reorder_msa_by_id(input_path, id)

        # Write the reordered sequences to the output file
        SeqIO.write(new_msa, output_path_fasta, "fasta")

        subprocess.run(['perl', reformat, 'fas', 'a3m', output_path_fasta, output_path_A3M])

        os.remove(output_path_fasta)


#### CREATE MSA FOR ALL PROTEINS

In [3]:
## FUNCTIONS
def clean_seq(remove_X=True):
    input_str = input("Enter a string: ")
    cleaned_str = input_str.replace("\n", "").replace("-", "").replace(" ", "")
    if 'X' in cleaned_str and remove_X:
        index = cleaned_str.find('X')
        cleaned_str = cleaned_str.replace("X", "")
        print(f'An X was replaced replaced at position {index}')
    print(f'Sequence with length: {len(cleaned_str)}')
    return cleaned_str

def remove_gaps_from_alignment(input_file, output_file):
    # Open the output file in write mode
    with open(output_file, 'w') as output_handle:
        # Iterate over each record in the input FASTA file
        for record in SeqIO.parse(input_file, "fasta"):
            # Remove all gap characters from the sequence
            gapless_sequence = str(record.seq).replace('-', '')
            # Update the record's sequence with the gapless sequence
            record.seq = gapless_sequence
            # Write the updated record to the output file
            SeqIO.write(record, output_handle, "fasta")

def combine_sequences(basenames, directory, output_file):
    """
    Combines sequences from multiple FASTA files into a single output file.
    
    :param basenames: A list of basenames (filename without the .fasta extension) of the files to include.
    :param directory: The directory containing the FASTA files.
    :param output_file: The path to the output FASTA file to create.
    """

    sequences = []  # List to store sequences
    
    # Iterate over each basename and construct the full path to the FASTA file
    for basename in basenames:
        fasta_file = os.path.join(directory, basename + '.fasta')
        
        # Check if the FASTA file exists
        if os.path.isfile(fasta_file):
            # Read the sequence from the file and add it to the list
            for record in SeqIO.parse(fasta_file, "fasta"):
                sequences.append(record)
        else:
            print(f"File {fasta_file} not found.")
    
    # Write all collected sequences to the output file
    with open(output_file, "w") as output_handle:
        SeqIO.write(sequences, output_handle, "fasta")
    
    print(f"All sequences have been combined into {output_file}")

def clean_fasta_seq(record, remove_X=True):
    cleaned_str = str(record.seq).replace("-", "").replace(" ", "")
    if 'X' in cleaned_str and remove_X:
        index = cleaned_str.find('X')
        cleaned_str = cleaned_str.replace("X", "")
        print(f'An X was replaced at position {index}')

    if '*' in cleaned_str and remove_X:
        index = cleaned_str.find('*')
        cleaned_str = cleaned_str.replace("*", "")
        print(f'An * was replaced at position {index}')
    return cleaned_str

def get_fasta_sequence(protein, sequence_id):
    fasta_file=f"/home/nicola/internship/winghel/fastas/{protein}/{protein}_conc.fasta"
    found = False
    for record in SeqIO.parse(fasta_file, "fasta"):
        if record.id == sequence_id:
            return(record)
            found = True
            break
    if not found:
        return(f"Sequence ID {sequence_id} not found in the file {fasta_file}.")

def clean_fasta_sequences(fasta_input, is_path=False):
    if is_path:
        # Check if the file exists
        if not os.path.isfile(fasta_input):
            raise FileNotFoundError(f"No file found at the provided path: {fasta_input}")
        
        # Open the file and parse the sequences
        with open(fasta_input, 'r') as file:
            records = list(SeqIO.parse(file, "fasta"))
    else:
        # Parse the sequences from the string
        fasta_io = StringIO(fasta_input)
        records = list(SeqIO.parse(fasta_io, "fasta"))
    
    cleaned_fasta = ""
    for record in records:
        cleaned_sequence = clean_fasta_seq(record)
        record_id_modified = record.id.replace("/", " ")
        cleaned_fasta += f">{record_id_modified}\n{cleaned_sequence}\n"
    
    return cleaned_fasta

def get_fasta_sequences(protein, fasta_input, is_path=False):

    if is_path:
        # Check if the file exists
        if not os.path.isfile(fasta_input):
            raise FileNotFoundError(f"No file found at the provided path: {fasta_input}")
        
        # Open the file and parse the sequences
        with open(fasta_input, 'r') as file:
            records = list(SeqIO.parse(file, "fasta"))
    else:
        # Parse the sequences from the string
        fasta_io = StringIO(fasta_input)
        records = list(SeqIO.parse(fasta_io, "fasta"))
    
    fasta_sequences = ""
    for record in records:
        record_id_modified = record.id.split("/")[0]
        fasta_seq = get_fasta_sequence(protein, record_id_modified)
        fasta_sequences += f">{record_id_modified}\n{fasta_seq}\n"
    
    return fasta_sequences

def create_msa(input_fasta, msa_output):
    subprocess.run(["mafft", "--localpair", "--maxiterate", "1000", input_fasta], stdout=open(msa_output, "w"))


def reorder_msa_by_id(input_fasta_path, target_id):
    # Read the sequences from the input file
    records = list(SeqIO.parse(input_fasta_path, "fasta"))
    
    # Separate the target sequence and other sequences
    target_record = None
    other_records = []
    for record in records:
        if record.id == target_id:
            target_record = record
        else:
            other_records.append(record)
    
    # Ensure the target sequence was found
    if not target_record:
        raise ValueError(f"Sequence with ID {target_id} not found in the provided MSA.")
    
    # Reorder the sequences with the target sequence first
    reordered_records = [target_record] + other_records
    return(reordered_records)

In [13]:
protein='ska3'
dictonary_of_ids={}
dictonary_of_ids[protein]=[]

In [None]:
## Create complete sequences fasta file

fasta_input_WHD=f'/home/nicola/internship/winghel/alignments/{protein}_curated_WHD_aligned_ginsi.fasta'
fasta_input_complete_seq=f'/home/nicola/internship/winghel/alignments/{protein}_curated_complete_seq_not_aligned.fasta'

new_fille=get_fasta_sequences(protein=protein, fasta_input=fasta_input_WHD, is_path=True)
new_fille=clean_fasta_sequences(new_fille)

with open(fasta_input_complete_seq, 'w') as outfile:
    outfile.write(new_fille)


In [None]:
## Create MSA of complete sequences fasta file

fasta_input_complete_seq_aligned=f'/home/nicola/internship/winghel/alignments/{protein}_curated_complete_seq_aligned_linsi.fasta'
create_msa(fasta_input_complete_seq, fasta_input_complete_seq_aligned)

In [16]:
## Extract all IDS in complete sequences fasta file

for record in SeqIO.parse(fasta_input_complete_seq_aligned, "fasta"):
    basename=record.id
    dictonary_of_ids[protein].append(basename)

In [None]:
for id in dictonary_of_ids[protein]:
    subprocess.run(['mkdir', '-p', f'/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/{id}_MSA'])
    output_path_fasta = f"/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/{id}_MSA/{id}_MSA_temp.fasta"
    output_path_A3M = f"/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/{id}_MSA/{id}_MSA.a3m"
    reformat='/home/nicola/internship/hhsuite/hh-suite/scripts/reformat.pl'

    # Reorder the MSA
    new_msa=reorder_msa_by_id(fasta_input_complete_seq_aligned, id)

    # Write the reordered sequences to the output file
    SeqIO.write(new_msa, output_path_fasta, "fasta")

    subprocess.run(['perl', reformat, 'fas', 'a3m', output_path_fasta, output_path_A3M])

    os.remove(output_path_fasta)

In [None]:
dictonary_of_ids={}

prots=['ask1', 'dam1', 'spc19', 'spc34', 'ska1', 'ska2', 'ska3']

for protein in prots:
    dictonary_of_ids[protein]=[]

    ## Create complete sequences fasta file
    fasta_input_WHD=f'/home/nicola/internship/winghel/alignments/{protein}_curated_WHD_aligned_ginsi.fasta'
    fasta_input_complete_seq=f'/home/nicola/internship/winghel/alignments/{protein}_curated_complete_seq_not_aligned.fasta'

    new_fille=get_fasta_sequences(protein=protein, fasta_input=fasta_input_WHD, is_path=True)
    new_fille=clean_fasta_sequences(new_fille)

    with open(fasta_input_complete_seq, 'w') as outfile:
        outfile.write(new_fille)


    ## Create MSA of complete sequences fasta file
    fasta_input_complete_seq_aligned=f'/home/nicola/internship/winghel/alignments/{protein}_curated_complete_seq_aligned_linsi.fasta'
    create_msa(fasta_input_complete_seq, fasta_input_complete_seq_aligned)

    
    ## Extract all IDS in complete sequences fasta file
    for record in SeqIO.parse(fasta_input_complete_seq_aligned, "fasta"):
        basename=record.id
        dictonary_of_ids[protein].append(basename)

    ## Reorder alginment and save
    for id in dictonary_of_ids[protein]:
        subprocess.run(['mkdir', '-p', f'/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/{id}_MSA'])
        output_path_fasta = f"/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/{id}_MSA/{id}_MSA_temp.fasta"
        output_path_A3M = f"/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/{id}_MSA/{id}_MSA.a3m"
        reformat='/home/nicola/internship/hhsuite/hh-suite/scripts/reformat.pl'

        # Reorder the MSA
        new_msa=reorder_msa_by_id(fasta_input_complete_seq_aligned, id)

        # Write the reordered sequences to the output file
        SeqIO.write(new_msa, output_path_fasta, "fasta")

        subprocess.run(['perl', reformat, 'fas', 'a3m', output_path_fasta, output_path_A3M])

        os.remove(output_path_fasta)

In [4]:
import json

with open('/home/nicola/internship/winghel/WHD_calls_my_vs_max.json', 'r') as file:
    json_of_differencies = json.load(file)   


In [5]:

### Function to create MSA of proteins I did not call
prots=['ask1', 'dam1', 'spc19', 'spc34', 'ska1', 'ska2', 'ska3']

for protein in prots:

    fasta_input_complete_seq=f'/home/nicola/internship/winghel/alignments/{protein}_curated_complete_seq_not_aligned.fasta'

    # Get the id and than the sequences of the max calls I missed to create a new MSA for each of them 
    for id in json_of_differencies[protein]['not_in_my']:

        # Create new direcotory for each protein and setting the path
        subprocess.run(['mkdir', '-p', f'/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/max_{id}_MSA'])
        output_path_fasta = f"/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/max_{id}_MSA/{id}_temp.fasta"
        output_path_MSA = f"/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/max_{id}_MSA/{id}_MSA_temp.fasta"
        output_path_A3M = f"/home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/{protein}/max_{id}_MSA/{id}_MSA.a3m"
        reformat='/home/nicola/internship/hhsuite/hh-suite/scripts/reformat.pl'

        # Get and clean the sequience
        sequence = get_fasta_sequence(protein=protein, sequence_id=id)
        sequence.seq = clean_fasta_seq(sequence)

        # Add the sequence to the fasta and save it into a temp file
        with open(fasta_input_complete_seq, 'r') as file:
            existing_sequences = list(SeqIO.parse(file, 'fasta'))

        all_sequences = [sequence] + existing_sequences

        with open(output_path_fasta, 'w') as file:
            SeqIO.write(all_sequences, file, 'fasta')

        # Create the MSA
        create_msa(output_path_fasta, output_path_MSA)

        subprocess.run(['perl', reformat, 'fas', 'a3m', output_path_MSA, output_path_A3M])

        os.remove(output_path_fasta)
        os.remove(output_path_MSA)



outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   20 / 31
done.

Progressive alignment ... 
STEP    24 /30 
Reallocating..done. *alloclen = 4184
STEP    30 /30 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   20 / 31
Segment   1/  1    1-2555
STEP 016-009-1  rejected..    identical.    identical.    rejected. rejected. rejected. rejected. rejected. accepted. accepted. rejected. rejected. rejecte

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ask1/max_NUTLON018888_MSA/NUTLON018888_MSA_temp.fasta with 31 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ask1/max_NUTLON018888_MSA/NUTLON018888_MSA.a3m
An X was replaced at position 72


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   20 / 31
done.

Progressive alignment ... 
STEP    25 /30 
Reallocating..done. *alloclen = 4243
STEP    30 /30 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   20 / 31
Segment   1/  1    1-2588
STEP 008-021-1  rejected..    identical.    rejected. accepted. rejected. rejected. rejected. rejected. rejected. rejected. rejected. accepted. rejected. r

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ask1/max_HALSPE003961_MSA/HALSPE003961_MSA_temp.fasta with 31 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ask1/max_HALSPE003961_MSA/HALSPE003961_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   20 / 31
done.

Progressive alignment ... 
STEP    25 /30 
Reallocating..done. *alloclen = 4155
STEP    30 /30 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   20 / 31
Segment   1/  1    1-2568
STEP 014-009-1  rejected..    accepted. rejected. rejected. rejected. rejected. rejected. rejected. accepted. rejected. rejected. rejected. rejected. rejec

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ask1/max_MICMAR005749_MSA/MICMAR005749_MSA_temp.fasta with 31 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ask1/max_MICMAR005749_MSA/MICMAR005749_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   60 / 68
done.

Progressive alignment ... 
STEP    46 /67 
Reallocating..done. *alloclen = 1809
STEP    67 /67 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   60 / 68
Segment   1/  1    1- 630
STEP 006-008-1  identical.    rejected. rejected. accepted. rejected. rejected. rejected. identical.    identical.    identical.    accepted. accepted. rej

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska1/max_ACASPE024827_MSA/ACASPE024827_MSA_temp.fasta with 68 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska1/max_ACASPE024827_MSA/ACASPE024827_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   60 / 68
done.

Progressive alignment ... 
STEP    44 /67 
Reallocating..done. *alloclen = 1770
STEP    67 /67 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   60 / 68
Segment   1/  1    1- 672
STEP 008-008-1  identical.    rejected. rejected. rejected. rejected. rejected. rejected. rejected. identical.    identical.    accepted. accepted. accepte

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska1/max_CHABRA030472_MSA/CHABRA030472_MSA_temp.fasta with 68 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska1/max_CHABRA030472_MSA/CHABRA030472_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   30 / 37
done.

Progressive alignment ... 
STEP    36 /36 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   30 / 37
Segment   1/  1    1-1226
STEP 006-025-0  rejected..    identical.    identical.    rejected. rejected. accepted. accepted. identical.    rejected. rejected. rejected. rejected. rejected. identical.    identical.    rejected. identica

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_HALSPE003657_MSA/HALSPE003657_MSA_temp.fasta with 37 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_HALSPE003657_MSA/HALSPE003657_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   30 / 37
done.

Progressive alignment ... 
STEP    36 /36 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   30 / 37
Segment   1/  1    1-1229
STEP 006-020-0  rejected..    rejected. identical.    accepted. accepted. accepted. identical.    identical.    accepted. rejected. rejected. accepted. rejected. rejected. rejected. identical.    rejected. re

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_CHOSPE037842_MSA/CHOSPE037842_MSA_temp.fasta with 37 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_CHOSPE037842_MSA/CHOSPE037842_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   30 / 37
done.

Progressive alignment ... 
STEP    36 /36 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   30 / 37
Segment   1/  1    1-1231
STEP 006-018-1  rejected..    accepted. rejected. rejected. rejected. rejected. accepted. rejected. rejected. rejected. rejected. identical.    identical.    rejected. rejected. rejected. rejected. rejected. 

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_PORPUR006809_MSA/PORPUR006809_MSA_temp.fasta with 37 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_PORPUR006809_MSA/PORPUR006809_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   30 / 37
done.

Progressive alignment ... 
STEP    36 /36 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   30 / 37
Segment   1/  1    1-1254
STEP 006-009-1  identical.   epted. rejected. accepted. rejected. rejected. rejected. rejected. identical.    rejected. rejected. rejected. rejected. rejected. accepted. accepted. identical.    rejected. reje

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_GALSUL006602_MSA/GALSUL006602_MSA_temp.fasta with 37 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_GALSUL006602_MSA/GALSUL006602_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   30 / 37
done.

Progressive alignment ... 
STEP    36 /36 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   30 / 37
Segment   1/  1    1-1245
STEP 012-028-1  identical.    accepted. rejected. rejected. accepted. rejected. rejected. rejected. rejected. accepted. rejected. rejected. rejected. accepted. rejected. accepted. identical.    rejected. reje

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_BAFFRI047167_MSA/BAFFRI047167_MSA_temp.fasta with 37 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_BAFFRI047167_MSA/BAFFRI047167_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   30 / 37
done.

Progressive alignment ... 
STEP    36 /36 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   30 / 37
Segment   1/  1    1-1226
STEP 006-006-1  identical.   epted. rejected. rejected. accepted. accepted. rejected. rejected. identical.    rejected. rejected. rejected. rejected. accepted. identical.    identical.    rejected. rejected. 

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_MONBRE003867_MSA/MONBRE003867_MSA_temp.fasta with 37 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_MONBRE003867_MSA/MONBRE003867_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   30 / 37
done.

Progressive alignment ... 
STEP    36 /36 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   30 / 37
Segment   1/  1    1-1240
STEP 008-019-1  identical.   epted. rejected. identical.    rejected. rejected. rejected. accepted. rejected. rejected. rejected. identical.    identical.    rejected. rejected. accepted. identical.    reject

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_DIALUT007416_MSA/DIALUT007416_MSA_temp.fasta with 37 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_DIALUT007416_MSA/DIALUT007416_MSA.a3m
An * was replaced at position 326


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   30 / 37
done.

Progressive alignment ... 
STEP    36 /36 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   30 / 37
Segment   1/  1    1-1235
STEP 006-010-1  rejected..   ected. rejected. rejected. accepted. accepted. rejected. accepted. rejected. rejected. identical.    identical.    rejected. identical.    identical.    rejected. rejected. reject

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_DIPROT021438_MSA/DIPROT021438_MSA_temp.fasta with 37 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska2/max_DIPROT021438_MSA/DIPROT021438_MSA.a3m
An * was replaced at position 259


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   40 / 45
done.

Progressive alignment ... 
STEP    40 /44 
Reallocating..done. *alloclen = 3818
STEP    44 /44 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   40 / 45
Segment   1/  1    1-2070
STEP 014-008-1  rejected..    accepted. rejected. rejected. accepted. accepted. rejected. rejected. rejected. rejected. rejected. rejected. rejected. rejec

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_FABTRO019274_MSA/FABTRO019274_MSA_temp.fasta with 45 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_FABTRO019274_MSA/FABTRO019274_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   40 / 45
done.

Progressive alignment ... 
STEP    40 /44 
Reallocating..done. *alloclen = 3827
STEP    44 /44 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   40 / 45
Segment   1/  1    1-2083
done 016-001-1  rejected..   ected. rejected. rejected. accepted. rejected. rejected. rejected. rejected. accepted. accepted. rejected. rejected. rejected.

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_DIALUT012015_MSA/DIALUT012015_MSA_temp.fasta with 45 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_DIALUT012015_MSA/DIALUT012015_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   40 / 45
done.

Progressive alignment ... 
STEP    40 /44 
Reallocating..done. *alloclen = 3809
STEP    44 /44 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   40 / 45
Segment   1/  1    1-2071
done 016-001-1  identical.    rejected. rejected. rejected. accepted. rejected. identical.    rejected. rejected. rejected. rejected. rejected. rejected. r

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_CAULEN001600_MSA/CAULEN001600_MSA_temp.fasta with 45 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_CAULEN001600_MSA/CAULEN001600_MSA.a3m


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   40 / 45
done.

Progressive alignment ... 
STEP    40 /44 
Reallocating..done. *alloclen = 3791
STEP    44 /44 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   40 / 45
Segment   1/  1    1-2043
STEP 012-003-1  identical.    accepted. rejected. rejected. rejected. rejected. rejected. rejected. rejected. accepted. accepted. rejected. rejected. rejec

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_ARATHA018306_MSA/ARATHA018306_MSA_temp.fasta with 45 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_ARATHA018306_MSA/ARATHA018306_MSA.a3m
An X was replaced at position 0


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   40 / 45
done.

Progressive alignment ... 
STEP    40 /44 
Reallocating..done. *alloclen = 4288
STEP    44 /44 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   40 / 45
Segment   1/  1    1-2540
STEP 016-002-1  rejected..    rejected. rejected. accepted. rejected. rejected. rejected. rejected. rejected. accepted. accepted. rejected. rejected. accep

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_HALSPE001185_MSA/HALSPE001185_MSA_temp.fasta with 45 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_HALSPE001185_MSA/HALSPE001185_MSA.a3m
An * was replaced at position 269


outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.453
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, compacttree = 0
Constructing a UPGMA tree ... 
   40 / 45
done.

Progressive alignment ... 
STEP    40 /44 
Reallocating..done. *alloclen = 3805
STEP    44 /44 
done.
tbfast (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
1 thread(s)

minimumweight = 0.000010
autosubalignment = 0.000000
nthread = 0
randomseed = 0
blosum 62 / kimura 200
poffset = 0
niter = 16
sueff_global = 0.100000
nadd = 16
Loading 'hat3' ... done.
rescale = 1

   40 / 45
Segment   1/  1    1-2060
STEP 016-004-1  rejected..   epted. rejected. rejected. identical.    rejected. rejected. rejected. rejected. accepted. rejected. rejected. rejected. rejec

Reformatted /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_PHYPOL014428_MSA/PHYPOL014428_MSA_temp.fasta with 45 sequences from fas to a3m and written to file /home/nicola/internship/colabfold-1.5.5/results_MSA_complete_seq/ska3/max_PHYPOL014428_MSA/PHYPOL014428_MSA.a3m


STEP 016-002-1  identical.   
Oscillating.

done
dvtditr (aa) Version 7.453
alg=A, model=BLOSUM62, 1.53, -0.00, -0.00, noshift, amax=0.0
0 thread(s)


Strategy:
 L-INS-i (Probably most accurate, very slow)
 Iterative refinement method (<16) with LOCAL pairwise alignment information

If unsure which option to use, try 'mafft --auto input > output'.
For more information, see 'mafft --help', 'mafft --man' and the mafft page.

The default gap scoring scheme has been changed in version 7.110 (2013 Oct).
It tends to insert more gaps into gap-rich regions than previous versions.
To disable this change, add the --leavegappyregion option.

