In [7]:
import pandas as pd
import multiprocessing as mp
import ast
from itertools import product
import numpy as np
import os
from Bio import SeqIO
import subprocess

In [None]:
!cp "Overlap analysis.ipynb" "/n/home11/rkapoor/overlap analysis.ipynb"

In [None]:
d=[x.replace(".csv","") for x in os.listdir("hmmer_result_csv") if ".ipynb" not in x and "XP_050043612.1" not in x] 


# Make a directory for each interval

In [None]:
os.mkdir("hmmer_phylo_data")

In [None]:
def mkdir(x):
    os.mkdir(f"hmmer_phylo_data/{x}")
    subprocess.run(["cp",f"hmmer_result_csv/{x}.csv",f"hmmer_phylo_data/{x}/hmmer_result.tsv"])
    

In [None]:
with mp.Pool(64) as p:
    inter_cs = p.map(mkdir, d)

# Identify putative secondary chimeras 
Secondary chimeras are defined as sequences with intervals with HMMER hits E<.01 to all HGT and metazoan regions. HGT and metazoan homologous regions should overlap<15 residues. 

In [None]:
ds=set([x.split(";")[0]+";"+x.split(";")[1] for x in d if "ipynb" not in x])
#make a dictionary between chimera gene and its intervals
dm={}
for di in ds:
    dm[di]=[x for x in d if di in x]


In [None]:
cols=["target_name", "target_accession","tlen", "query name","accession","qlen", "E-value", "score1", "bias1",   "#", "of", "c-Evalue",  "i-Evalue",  "score",  "bias",  "hmmfrom",  "hmmto",    "alifrom",  "alito", "envfrom",  "envto", "acc", "description of target", "species"]

In [None]:
#takes two lists of intervals and computes the overlap between all intervals in each list. returns the maximum
#written by RK and ChatGPT
def compute_max_overlap(intervals1, intervals2):
    max_overlap = 0
    for interval1 in intervals1:
        for interval2 in intervals2:
            overlap = min(interval1[1], interval2[1]) - max(interval1[0], interval2[0])
            if overlap > max_overlap:
                max_overlap = overlap
    return max_overlap

#takes a HMMER result dataframe and a protein name and returns all hit intervals in the protein
def get_intercs(c,df):
    dfi = df[df.target_name == c]
    return set(zip(dfi['alifrom'].tolist(), dfi['alito'].tolist()))

#finds all putative secondary chimeras and writes to a file 
#written by RK and ChatGPT
def get_2nd_chimera(n):
    #all putative secondary chimeras without sequence overlap
    lt=dm[n]
    ni=lt[0]
    df=pd.read_csv(f"hmmer_phylo_data/{ni}/hmmer_result.tsv",sep="\t",comment='#',names=cols)
    df=df[df["i-Evalue"]<.01]
    chimeras=set(df.target_name)
    for ni in lt[1:]:
        df=pd.read_csv(f"hmmer_phylo_data/{ni}/hmmer_result.tsv",sep="\t",comment='#',names=cols)
        df=df[df["i-Evalue"]<.01]
        chimeras=set(df.target_name)&chimeras
    #make a dataframe where rows are secondary chimeras, columns are all intervals w/in secondary chimera with HMMER E-value<.01
    inters = {}
    for ni in lt:
        df = pd.read_csv(f"hmmer_phylo_data/{ni}/hmmer_result.tsv", sep="\t", comment='#', names=cols)
        df=df[df["i-Evalue"]<.01]
        inters[ni]={}
        for c in chimeras:
            inters[ni][c]=get_intercs(c,df)
    inter_df=pd.DataFrame(inters)
    
    #find maximal overlap between all hgt and metazoan intervals for each putative chimera
    hgts=[x for x in lt if "HGT" in x]
    metas=[x for x in lt if "HGT" not in x]
    pairs = list(product(hgts,metas))
    # Apply the function to compute maximal overlap for each row
    for i in range(len(pairs)):
        inter_df[f'max_overlap_{i}'] = inter_df.apply(lambda row: compute_max_overlap(row[pairs[i][0]], row[pairs[i][1]]), axis=1)
    inter_df["max_overlap"]=inter_df.loc[:,[x for x in inter_df.columns if "max_overlap" in x]].max(axis=1)
    f=open("putative_secondary_chimeras.txt","a")
    f.write(n+":"+str(list(inter_df[inter_df.max_overlap<15].index)))
    f.write("\n")
    f.close()
    return 

In [None]:
with mp.Pool(12) as p:
    inter_cs = p.map(get_2nd_chimera, set(dm.keys())-set(dp2_mp.keys()))

In [None]:
dps2=open("putative_secondary_chimeras.txt","r").readlines()
dp2_mp={x.split(":")[0]:x.split(":")[1].strip().replace("[","").replace("]","").replace(" ","").replace("'","").split(",") for x in dps2}

# Identify Arthropod Specific Chimeras

In [None]:
import matplotlib.pyplot as plt
from Bio import SeqIO
import os
import sys
import os
import subprocess
from Bio import SearchIO
sys.path.insert(0, '/n/home11/rkapoor')
import tax_pkg
from tax_pkg import taxid
from tax_pkg import accession2taxid
import pandas as pd
import multiprocessing as mp
import numpy as np

In [None]:
def get_phylum(x):
    try:
        ti=accession2taxid.get_taxid(x)
        l=taxid.get_lineage(ti,{})

        o=taxid.get_phylum(ti,l)
    except:
        print(x)
        ti="nan"
        return ("nan","nan","nan","nan","nan","nan")

    return o

In [None]:
dps=open("putative_secondary_chimeras.txt","r").readlines()
dp_mp={x.split(":")[0]:x.split(":")[1].strip().replace("[","").replace("]","").replace(" ","").replace("'","").split(",") for x in dps}

In [None]:
def get_ap(x):
    with mp.Pool(63) as p:
        inter_cs = p.map(get_phylum, dp_mp[x])
    amap={xi for xi,yi in zip(dp_mp[x],inter_cs) if yi=="Arthropoda"}
    return amap

In [None]:
mids={}
for x in dp_mp:
    ids=get_ap(x)
    mids[x]=ids

In [None]:
import pickle

In [None]:
with open('arthropod_dps.pickle', 'wb') as file:
    pickle.dump(mids,file)

# Secondary chimera blast

In [None]:
import re

In [None]:
with open('arthropod_dps.pickle', 'rb') as file:
    chimeras=pickle.load(file)

In [None]:
tm=[x for x in os.listdir("hmmer_phylo_data") if ".ipynb" not in x]

In [None]:
rdict=SeqIO.to_dict(SeqIO.parse(f"query_sequences.fasta", "fasta"))

In [None]:
query="XP_023324493.1"

In [None]:
des=rdict[query].description

In [None]:
species = re.findall(r'\[(.*?)\]', des)[-1]


In [None]:
#make a dataframe of all secondary chimeras for each chimeric interval
#limiting to one per species and using the same chimera across all intervals
def make_chimera_df(xi):
    inters=[x for x in tm if xi in x]
    i=0
    for x in inters:
        df=pd.read_csv(f"hmmer_phylo_data/{x}/hmmer_result.tsv",sep="\t",comment='#',names=cols)
        df=df[df.species!="synthetic construct"]
        query=x.split(";")[1]
        if i==0:
            #write info for secondary chimeras to csv
            chs=set(chimeras[xi])-set([query])
            df=df[df.target_name.isin(chs)]
           
            #write info for primary chimera to csv
            leng=len(rdict[query].seq)
            des=rdict[query].description
            coords=ast.literal_eval(x.split("_")[-1])
            qspecies=re.findall(r'\[(.*?)\]', des)[-1]
            df=df[df.species!=qspecies]
            last=df.shape[0]
            df.loc[last,'envfrom']=max(0,coords[0]-10)
            df.loc[last,'envto']=min(coords[1]+10,leng-1)
            df.loc[last,'description of target']=des
            df.loc[last,"target_name"]=query
            df.loc[last,"species"] = qspecies
            
            #limit to one query with lowest e-value per species
            df=df.sort_values("i-Evalue").groupby('species').first()
            chs=set(df.target_name)
        else:
            chs=chs-set([query])
            df=df[df.target_name.isin(chs)]
            
            #write info for primary chimera to csv
            leng=len(rdict[query].seq)
            des=rdict[query].description
            coords=ast.literal_eval(x.split("_")[-1])
            qspecies=re.findall(r'\[(.*?)\]', des)[-1]
            df=df[df.species!=qspecies]
            
            last=df.shape[0]
            df.loc[last,'envfrom']=max(0,coords[0]-10)
            df.loc[last,'envto']=min(coords[1]+10,leng-1)
            df.loc[last,'description of target']=des
            df.loc[last,"target_name"]=query
            df.loc[last,"species"] = qspecies
            
            #limit to one query with lowest e-value per species
            df=df.sort_values("i-Evalue").groupby('species').first()
            chs=set(df.target_name)
            
            
        df.to_csv(f"hmmer_phylo_data/{x}/secondary_chimera.tsv",sep="\t")
        i+=1

In [None]:
cols=["target_name", "target_accession","tlen", "query name","accession","qlen", "E-value", "score1", "bias1",   "#", "of", "c-Evalue",  "i-Evalue",  "score",  "bias",  "hmmfrom",  "hmmto",    "alifrom",  "alito", "envfrom",  "envto", "acc", "description of target", "species"]

In [None]:
with mp.Pool(40) as p:
    inter_cs = p.map(make_chimera_df, chimeras.keys())

In [10]:
#append a fasta of sequence n1 to file n2
def get_fasta(n):
    n1=n[0]
    n2=n[1]
    if n1 in rdict.keys():
        f=open(n2,"a")
        f.write(f">{n1}")
        f.write("\n")
        f.write(str(rdict[n1].seq))
        f.write("\n")
        f.close()
    else:
        subprocess.run(["sh","query_nr_protein.sh",n1,n2])
    return n
from Bio import SeqIO
#make a new fasta in output_file by using the HMMER coordinates in df 
def copy_fasta_with_substr(fasta_file, df, output_file):
    with open(output_file, "w") as out_handle:
        for seq_record in SeqIO.parse(fasta_file, "fasta"):
            seq_name = seq_record.id
            if seq_name in df["target_name"].values:
                sub_df = df[df["target_name"] == seq_name]
                for _, row in sub_df.iterrows():
                    sstart = int(row["envfrom"])
                    send = int(row["envto"])
                    subseq = seq_record.seq[sstart:send]
                    subseq_name = f"{seq_name}_{sstart}_{send}"
                    subseq_record = seq_record
                    subseq_record.id = subseq_name
                    subseq_record.description = ""
                    subseq_record.seq = subseq
                    SeqIO.write(subseq_record, out_handle, "fasta")

In [None]:
def write_fasta(n):
    df=pd.read_csv(f"hmmer_phylo_data/{n}/secondary_chimera.tsv",sep="\t")
    with mp.Pool(40) as p:
        inter_cs = p.map(get_fasta,  [(x,f"hmmer_phylo_data/{n}/secondary_chimera.fasta") for x in df.target_name])
    copy_fasta_with_substr(f"hmmer_phylo_data/{n}/secondary_chimera.fasta",df,f"hmmer_phylo_data/{n}/sub_secondary_chimera.fasta")
    return

In [None]:
for n in tm:
    write_fasta(n)

In [None]:
#merge all secondary chimera intervals into one fasta
#written by RK and ChatGPT
directory = "hmmer_phylo_data"
fasta_files = []
added
# Iterate over the subdirectories within the main directory and append 
for root, dirs, files in os.walk(directory):
    if "sub_secondary_chimera.fasta" in files:
        fasta_path = os.path.join(root, "sub_secondary_chimera.fasta")
        fasta_files.append(fasta_path)

# Create a new file to store the merged fasta data
output_file = "secondary_chimera_intervals.fasta"

# Open the output file in append mode
with open(output_file, "a") as output:
    for fasta_file in fasta_files:
        with open(fasta_file, "r") as input_file:
            fasta_data = input_file.read()
            output.write(fasta_data)


In [None]:
#remove redundant seqs from interval fasta
input_file = "secondary_chimera_intervals.fasta"
output_file = "filtered_secondary_chimera_intervals.fasta"

sequences = {}
current_header = ''

# Read input file and store unique sequences per header
with open(input_file, 'r') as file:
    for line in file:
        line = line.strip()
        if line.startswith('>'):
            current_header = line
            if current_header not in sequences:
                sequences[current_header] = ''
        else:
            sequences[current_header] += line

# Write unique sequences to output file
with open(output_file, 'w') as file:
    for header, sequence in sequences.items():
        file.write(header + '\n')
        file.write(sequence + '\n')


In [None]:
#run diamond blast on secondary chimera intervls
!sbatch secondary_chimera_blast.sh

In [None]:
#split secondary chimera 
!sbatch "split_blast_table.sh" "secondary_chimera_blast_results" "secondary_chimera_results"

# Filter chimeras by secondary chimera results

In [None]:
!cd /n/holyscratch01/extavour_lab/Lab/rkapoor

In [2]:
##annotate each secondary chimera as "Meta" "HGT" or "Indeterminate"
def check_annot(n):
    dfo=pd.read_csv(f"secondary_chimera_blast_results/{n}",sep="\t", names="qseqid sseqid stitle staxids sscinames sphylums skingdoms pident length mismatch gapopen qstart qend sstart send evalue bitscore".split(" "))
    gene=";".join(n.split(";")[0:2])
    dfo=dfo[~dfo.sphylums.astype(str).str.contains("Arthropoda")]
    dfo=dfo[~dfo.sphylums.astype(str).str.contains("Rotifera")]
    dfo=dfo[dfo.staxids.astype(str)!="nan"]
    dfo=dfo[dfo.staxids!=32630]
    dfm=dfo.iloc[0:30000,:]
    dfmeta=dfm[dfm.skingdoms.astype(str).str.contains("Metazoa")]
    dfhgt=dfm[~dfm.skingdoms.astype(str).str.contains("Metazoa")]
    dfhgt["AI"]=np.log10(dfmeta.evalue.min()+1e-200)-np.log10(dfhgt.evalue+1e-200)
    dfmeta["MI"]=np.log10(dfhgt.evalue.min()+1e-200)-np.log10(dfmeta.evalue+1e-200)
    dfmi=dfm.iloc[0:300,:]
    dfmetai=dfmi[dfmi.skingdoms.astype(str).str.contains("Metazoa")]
    dfhgti=dfmi[~dfmi.skingdoms.astype(str).str.contains("Metazoa")]

#     print(len(set(dfhgt[dfhgt.AI>5].staxids)),(len(set(dfhgti.staxids))/len(set(dfmi.staxids)),len(set(dfmeta[dfmeta.MI>5].staxids))),len(set(dfmetai.staxids))/len(set(dfmi.staxids)))
#     print(dfmeta.evalue.min()<.1, len(set(dfmeta.staxids)),len(set(dfmeta[dfmeta.MI>0].staxids)))
    if dfm.shape[0]>0:
        if dfmeta.evalue.min()<.1  and (dfhgt.shape[0]==0 or len(set(dfmeta[dfmeta.MI>1].staxids))>5 or (len(set(dfmetai.staxids))/len(set(dfmi.staxids))>.40 and len(set(dfhgt[dfhgt.AI>5].staxids))<2)):
            ret= "Meta"
        elif dfhgt.evalue.min()<.1 and len(set(dfhgt.staxids))>10 and (dfmeta.shape[0]==0 or len(set(dfhgt[dfhgt.AI>5].staxids))>10 or (len(set(dfhgti.staxids))/len(set(dfmi.staxids))>.90 and len(set(dfmeta[dfmeta.MI>5].staxids))<2)) :
            ret= "HGT"
        else:
            ret="Indeterminate"
    else:
        ret="Indeterminate"
    f=open("secondary_blast_annot.txt","a")
    n0=n.split(".tsv")[0]
    f.write(f"{n0}--{ret}")
    f.write("\n")
    f.close()
    return ret

In [20]:
#run chimera annotations and load output as dictionary
# ls=[x for x in os.listdir("secondary_chimera_blast_results") if "ipynb" not in x]
# with mp.Pool(30) as p:
#     inter_cs = p.map(check_annot, ls)
results=open("/n/holyscratch01/extavour_lab/Lab/rkapoor/secondary_blast_annot.txt","r").readlines()
rm={x.split("--")[0].replace(".txt",""):x.split("--")[1].strip() for x in results}

In [21]:
def add_interval_map(x):
    rdict=SeqIO.to_dict(SeqIO.parse(f"{directory}/{x}/sub_secondary_chimera.fasta", "fasta")).keys()
        
    interval_map[x]=rdict
    print(x)
    return rdict

In [9]:
directory = "/n/holyscratch01/extavour_lab/Lab/rkapoor/hmmer_phylo_data"


In [23]:

# make a dictionary between primary chimera and secondary chimera intervals
directory = "/n/holyscratch01/extavour_lab/Lab/rkapoor/hmmer_phylo_data"
interval_map={}
for x in os.listdir(directory)[::-1]:
    if ".ipynb" not in x:
        
        rdict=SeqIO.to_dict(SeqIO.parse(f"{directory}/{x}/sub_secondary_chimera.fasta", "fasta")).keys()
        
        interval_map[x]=rdict
        
        

    

In [12]:
#make a dictionary storing the percentage of secondary chimeras with each annotation type
pmeta={} 
phgt={}
pint={}
for x in interval_map:
    
    xmp=interval_map[x]

    pmeta[x]=len([rm[i] for i in xmp if i in rm.keys() and rm[i]=="Meta" ])/len([i for i in xmp if i in rm.keys()])
    phgt[x]=len([rm[i] for i in xmp if i in rm.keys() and rm[i]=="HGT"])/len([i for i in xmp if i in rm.keys()])
    pint[x]=len([rm[i] for i in xmp  if i in rm.keys() and rm[i]=="Indeterminate"])/len([i for i in xmp if i in rm.keys()])


In [13]:
#hgt and meta intervals based off percent of secondary chimeras w/ annotation type
hgt_true=[x for x in pmeta.keys() if "HGT" in x and pmeta[x]<=(1/3) and phgt[x]>pmeta[x]]
meta_true=[x for x in pmeta.keys() if "Meta" in x and phgt[x]<=(1/3) and pmeta[x]>phgt[x] ]

In [73]:
len(chimeras)

378

In [14]:
chimeras=set([x.split(";")[1] for x in meta_true])&set([x.split(";")[1] for x in hgt_true])
hgt_true2=[x for x in hgt_true if x.split(";")[1] in chimeras]
meta_true2=[x for x in meta_true if x.split(";")[1] in chimeras]

In [116]:
chimera_intervals=set(hgt_true2)|set(meta_true2)

In [78]:
#make a dictionary between queries and confirmed secondary chimeras
secondary_chimeras={}
for gene in set([x.split(";")[1] for x in chimera_intervals]):
    inters=[x for x in chimera_intervals if gene in x]
    secondary_chimera_map={}
    for i in inters:
        annot=i.split(";")[-1].split("_")[0]
        secondary=[]
        for sec in interval_map[i]:
            if sec in rm.keys() and (rm[sec]==annot or rm[sec]=="Indeterminate"): 
                secondary.append("_".join(sec.split("_")[0:-2]))
        secondary_chimera_map[i]=secondary
    final_secondary = set.intersection(*[set(x) for x in secondary_chimera_map.values()])
    secondary_chimeras[gene]=final_secondary

In [117]:
#write all chimera intervals to an output file
import pickle


file_path = 'chimera_intervals.pickle'
with open(file_path, 'wb') as file:
    pickle.dump(chimera_intervals, file)


In [79]:
file_path = 'secondary_chimera_dict.pickle'
with open(file_path, 'wb') as file:
    pickle.dump(secondary_chimeras, file)

# Make a df with all primary and secondary chimeras and taxonomic info
one per species/taxid 

In [4]:
import pickle
file_path = 'secondary_chimera_dict.pickle'
with open(file_path, 'rb') as file:
    secondary_chimera_dict=pickle.load(file)

In [5]:

file_path = 'chimera_intervals.pickle'
with open(file_path, 'rb') as file:
    chimera_intervals=pickle.load(file)

In [11]:
def make_taxdf(x):
    chdf=pd.read_csv(f"{directory}/{x}/secondary_chimera.tsv",sep="\t")
    chdf=chdf[chdf.target_name.isin(secondary_chimera_dict[x.split(";")[1]])]
    df=pd.read_csv(f"{directory}/{x}/hmmer_result.tsv",sep="\t",names=["target_name", "target_accession","tlen", "query name","accession","qlen", "E-value", "score1", "bias1",   "#", "of", "c-Evalue",  "i-Evalue",  "score",  "bias",  "hmmfrom",  "hmmto",    "alifrom",  "alito", "envfrom",  "envto", "acc", "description of target", "species"])
    df=df[df.species!="synthetic construct"]

    # Sort the dataframe by 'E-value' in ascending order
    sorted_df = df.sort_values('i-Evalue')

    # Group the dataframe by 'species' and select the row with the lowest 'E-value'
    new_df = sorted_df.groupby('species').first().reset_index().sort_values("i-Evalue")
    new_df=new_df.sort_values("i-Evalue")
    new_df=new_df[~new_df.species.isin(list(chdf.species))].iloc[0:1000-chdf.shape[0],:]
    new_df.loc[:,"chimera"]=False
    chdf.loc[:,"chimera"]=True
    df_tax=pd.concat([chdf,new_df])
    df_tax.to_csv(f"{directory}/{x}/phylo_tax.tsv",sep="\t")
    return 

In [12]:
with mp.Pool(63) as p:
    inter_cs = p.map(make_taxdf, chimera_intervals)

In [13]:
import matplotlib.pyplot as plt
from Bio import SeqIO
import os
import sys
import os
import subprocess
from Bio import SearchIO
sys.path.insert(0, '/n/home11/rkapoor')
import tax_pkg
from tax_pkg import taxid
from tax_pkg import accession2taxid
import pandas as pd
import multiprocessing as mp
import numpy as np

In [14]:
def get_taxinfo(x):
    try:
        ti=accession2taxid.get_taxid(x)
        l=taxid.get_lineage(ti,{})
        sk=taxid.get_superkingdom(ti,l)
        k=taxid.get_kingdom(ti,l)
        p=taxid.get_phylum(ti,l)
        o=taxid.get_order(ti,l)
        s=taxid.get_species(ti,l)
        c=taxid.get_colors(ti,l)
        
    except:
        print(x)
        ti="nan"
        return ("nan","nan","nan","nan","nan","nan","nan")

    return (ti,sk,k,p,o,s,c)

In [15]:
def fill_taxonomic_info(x):
    df=pd.read_csv(f"{directory}/{x}/phylo_tax.tsv",sep="\t")
    with mp.Pool(63) as p:
        hgts = p.map(get_taxinfo, df.target_name)
    df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
    chi=df[df.chimera].index
    for index, row in df.iterrows():
        df.loc[index,"rE-value"]=np.min
        df.loc[index, "rE-value"]=row["i-Evalue"]
        
    df2=df.sort_values("rE-value").groupby('taxid').first().reset_index().sort_values("i-Evalue")
    df2=df2.drop("rE-value",axis=1)
    df2.to_csv(f"{directory}/{x}/phylo_tax.tsv",sep="\t")

    

In [None]:
for x in chimera_intervals:
    fill_taxonomic_info(x)

  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701893.1
prf||1107279B


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


CRY93850.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


BDA76956.1
QSV51899.1
AHZ97961.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|S38811|
WP_106170182.1
AFR53956.1
QXV47503.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


prf||2121219A


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_243864564.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_199756008.1
MBE6761797.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdo

HBW63721.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


SJL87863.1
SJL86761.1
SJL87854.1SJL87857.1

SJL87851.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6799352.1
SJL87481.1
SJL87560.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|B84500|
MCF8701928.1
prf||1107279B


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|S22293|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


QIR82180.1
ALC76156.1
CRY97241.1
QGF19362.1
ALC76159.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


DAK69929.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6698785.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBH8560799.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|G89007|
WP_221565183.1
SBS70265.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


DAK69929.1
WP_244896703.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


XP_050048162.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE7013699.1
WP_170215940.1
pir|S54987|
pir|S54991|
pir|S54990|
pir|S54988|
pir|S54995|
pir|B56679|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBL3675340.1
NNH78043.1
pir|S54990|
pir|S54995|
pir|S54997|
pir|S54987|
pir|S54988|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBD5114801.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6801969.1
CAG8998987.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_127023951.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WGH58587.1
prf||1007200C


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBY8917251.1
UVF58862.1
QEA06020.1
MBE6784606.1
WP_127022801.1
BBB44434.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6801969.1
MCJ1441678.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


KAH3663165.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


SJL87854.1SJL87851.1

SJL87857.1
SJL86761.1
SJL87863.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBP0949421.1
WP_081897168.1
WP_244862022.1
NER60841.1
HBU00609.1
MCE7970155.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


NLL62962.1
WP_143558977.1
MCU0597179.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBY8917251.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


XP_011196011.1
MBQ3189829.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBW0107275.1
pir|T25782|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


QVT77350.1
QVT77310.1
QVT77330.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701924.1
prf||1107279B
MCH8487189.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE7045954.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|B56679|
pir|T19605|
WGH58587.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_261916743.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


SJL87863.1
SJL86761.1
SJL87851.1
SJL87857.1
SJL87854.1
7OBI_A


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|B56679|
MBH8560799.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


UVF58862.1
WP_106169939.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6609057.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WGH58587.1
MCU0580733.1
WP_199756008.1
MBT1065318.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


prf||1709357B


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|A39652|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1440324.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|S54988|
pir|S54992|
pir|S54987|
pir|S54990|
pir|T19605|
pir|B56679|
UVF58862.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701902.1
prf||1107279B
MCU0597179.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6738399.1
MBE4735250.1
MBM7026257.1
WAC07380.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|B84500|
MCF8701902.1
prf||1107279B


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE7066255.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|B56679|
pir|S42831|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6559409.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|S54987|pir|S54995|

pir|S54990|
pir|S54988|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


ADD95833.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBH8563347.1
VDS02580.1
WP_244967808.1
WP_244590647.1
MBH8560799.1
pir|T26836|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|T34263|
pir|S54988|
pir|S54990|
pir|S54987|
XP_050048162.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


NCJ05912.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


AGU10417.1
MCF8701934.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdo

CRY93850.1
HCS02006.1
DAO96067.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_232475500.1
MBK5072365.1
pir|AD0531|
MCC3294819.1
MBE7051595.1
MCU0595913.1
MBA2117270.1
pir|T31583|
MBE6564449.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBH8563215.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MPR12036.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6559409.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|I48283|
pir|S37765|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1442698.1
pir|I48171|
prf||1010303Q
SJL86761.1
SJL87854.1
SJL87863.1
SJL87857.1
SJL87851.1
pir|S54988|
pir|S54990|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdo

WP_274043557.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|I53106|
ATQ62672.1ATQ62666.1ATQ62669.1ATQ62673.1



ATQ62676.1
ATQ62668.1
pir|S54988|
pir|S54995|
pir|S54990|
pir|S54987|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


BDA76956.1
ADZ30891.1
QMU24032.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WGH58587.1
WAC07964.1
ABZ06576.1
1NA0_A


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


SJL86761.1
SJL87854.1SJL87863.1

SJL87857.1
SJL87851.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


CAM98678.1
CAM98677.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


UVF58862.1
6OB5_C
WGH58587.1
NEU80153.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


NHO33720.1
AFR53956.1
MPR10488.1
MBH8566095.1
RUS92242.1
WP_212925619.1
WP_244721351.1
WP_244960958.1
HCB04000.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBV2113383.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701907.1
prf||1107279B


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


CRY94011.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCU0595913.1
MBT2988222.1
MBH8560799.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


ABZ10029.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCH8487189.1
WAC06688.1
MBE6918786.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


HBM02918.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701927.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1444475.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701928.1
pir|PC1232|
KAH3663304.1
prf||1107279B
pir|S18210|
MBE6801969.1
CAG8998987.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBH8560799.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1447719.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1443143.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|B88633|
MBE6537720.1
MCJ1443878.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1440888.1
ABZ10029.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdo

6OB5_C
WP_106169939.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|B56679|
pir|PC1123|
pir|T25005|
MCJ1446699.1
pir|S54991|pir|S54995|

pir|S54988|
pir|S54992|
pir|S54987|
pir|S54990|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


ABZ06576.1
pir|S54987|
pir|S54990|pir|S54988|

pir|S54995|
QJC19409.1
UID85567.1
UID85568.1
AAO43224.1
BAJ41852.1
AFN89782.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


AFR53956.1
QXV47503.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE7011603.1
WP_244966314.1
MBE6913014.1
MBI6120453.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|T26836|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1441902.1
MBB2199797.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


AGU10929.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


AAM82289.1
ADQ57456.1
pir|T26925|
AEZ51500.1
AAT79489.1
NLL62676.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1443143.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBH8560799.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBD9061442.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701902.1
pir|PC1232|
prf||1107279B


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


HHY72689.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1441651.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBV2113395.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBV2113489.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


SJL87857.1
SJL86761.1
SJL87863.1
SJL87854.1
SJL87851.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6630047.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WGH58587.1
NEU80153.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701899.1
pir|E84492|
pir|S00954|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBH8560799.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_199755952.1
RUT03355.1
ABZ80160.1
WAC06707.1
MCU0594663.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBV2113391.1
MCF8701926.1
MBB2199797.1
WP_244899669.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


UID85567.1BAJ41852.1

QJC19409.1
AFN89782.1
AAO43224.1
UID85568.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


HAQ64148.1
MBY0795068.1
WP_219888941.1
SBS75743.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


UVF58862.1
QEA06020.1
UVF58865.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBH8560799.1
CAG8998217.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF6774896.1
ABZ79920.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


2K16_A
MBV4412147.1
MCJ1447719.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6736852.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


YP_010298132.1
YP_010298518.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBE6790735.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_244755929.1
MCR6691922.1
WP_219854952.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_143558977.1
HCA05370.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


UID85567.1
QJC19409.1AFN89782.1

BAJ41852.1
UID85568.1
AAO43224.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF7184019.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|T31583|
QVT77350.1
WP_143558977.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


UVF58862.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


UVF58865.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


HBH95733.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1449493.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


ABZ79920.1
pir|T25005|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


prf||2117157A
MCJ1440785.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WP_181246423.1
MBE6559409.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdo

XP_011196011.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


pir|T25005|
pir|B56679|
pir|C87861|


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MBS1473995.1
MBH8560799.1
KAH3678297.1
MBH8560799.1
QFR04620.1
HBZ11562.1
QFR04621.1
QFR04619.1
AGT20780.1
AHF53542.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


HCT16532.1
QDS02902.1
3CYW_A


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


UVF58862.1
UVF58865.1
pir|B56679|
pir|T31857|
WAC07380.1
MBE6817847.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCF8701898.1
pir|PC1232|
prf||1107279B
QJC19409.1
AFN89782.1
UID85567.1BAJ41852.1

AAO43224.1
UID85568.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WGH58587.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


WGH58587.1
NEU80153.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


MCJ1444902.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


ABZ79920.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts
  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


HBU00217.1
MBN7784340.1


  df.loc[:,["taxid","superkingdom","kingdom","phylum","order","sci_name","color"]]=hgts


# Run phylogenetics
Makes a fasta file for each chimera interval, then runs MUSCLE and IQ-Tree for tree inference


In [2]:
import pickle
file_path = 'chimera_intervals.pickle'
with open(file_path, 'rb') as file:
    chimera_intervals=pickle.load(file)


In [3]:
#append a fasta of sequence n1 to file n2
def get_fasta(n):
    n1=n[0]
    n2=n[1]
    try:
        subprocess.run(["sh",f"/n/holyscratch01/extavour_lab/Lab/rkapoor/query_nr_protein.sh",n1,n2])
    except:
        print(n1)
    return n
from Bio import SeqIO
#make a new fasta in output_file by using the HMMER coordinates in df 
def copy_fasta_with_substr(fasta_file, df, output_file):
    with open(output_file, "w") as out_handle:
        for seq_record in SeqIO.parse(fasta_file, "fasta"):
            seq_name = seq_record.id
            if seq_name in df["target_name"].values:
                sub_df = df[df["target_name"] == seq_name]
                for _, row in sub_df.iterrows():
                    sstart = int(row["envfrom"])
                    send = int(row["envto"])
                    subseq = seq_record.seq[sstart:send]
                    subseq_name = f"{seq_name}_{sstart}_{send}"
                    subseq_record = seq_record
                    subseq_record.id = subseq_name
                    subseq_record.description = ""
                    subseq_record.seq = subseq
                    SeqIO.write(subseq_record, out_handle, "fasta")

In [4]:
def write_fasta(n):
    df=pd.read_csv(f"{directory}/{n}/phylo_tax.tsv",sep="\t")
    dft=df[df.chimera==False]
    with mp.Pool(39) as p:
        inter_cs = p.map(get_fasta,  [(x,f"{directory}/{n}/phylo_tax.fasta") for x in dft.target_name])
    copy_fasta_with_substr(f"{directory}/{n}/phylo_tax.fasta",dft,f"{directory}/{n}/sub_phylo_tax.fasta")
    # Paths to the FASTA files
    fasta1_path = f'{directory}/{n}/sub_phylo_tax.fasta'
    fasta2_path = f'{directory}/{n}/sub_secondary_chimera.fasta'

    # Read the FASTA files
    fasta1_records = list(SeqIO.parse(fasta1_path, 'fasta'))
    fasta2_records = list(SeqIO.parse(fasta2_path, 'fasta'))

    # Merge the records
    merged_records = fasta1_records + fasta2_records

    # Path to save the merged FASTA file
    merged_fasta_path = f'{directory}/{n}/merged.fasta'

    # Write the merged records to a new FASTA file
    SeqIO.write(merged_records, merged_fasta_path, 'fasta')
    return

In [8]:
for n in chimera_intervals:
    if "merged.fasta" not in os.listdir(f"{directory}/{n}"):
        write_fasta(n)

In [4]:
import subprocess
import os

In [7]:
for n in chimera_intervals:
    if "MSA_hmm_output_final.fasta" not in os.listdir(f"{directory}/{n}"):
        subprocess.run(["sbatch", "align_iq_pipe.sh",n])

Submitted batch job 61796906
Submitted batch job 61796907
Submitted batch job 61796908
Submitted batch job 61796909
Submitted batch job 61796910
Submitted batch job 61796911
Submitted batch job 61796912
Submitted batch job 61796913
Submitted batch job 61796914
Submitted batch job 61796915
Submitted batch job 61796916
Submitted batch job 61796917
Submitted batch job 61796918
Submitted batch job 61796919
Submitted batch job 61796920
Submitted batch job 61796921
Submitted batch job 61796922
Submitted batch job 61796923
Submitted batch job 61796924
Submitted batch job 61796925
Submitted batch job 61796926
Submitted batch job 61796927
Submitted batch job 61796928
Submitted batch job 61796929
Submitted batch job 61796930
Submitted batch job 61796931
Submitted batch job 61796932
Submitted batch job 61796933
Submitted batch job 61796934
Submitted batch job 61796935
Submitted batch job 61796936
Submitted batch job 61796937
Submitted batch job 61796939
Submitted batch job 61796940
Submitted batc

In [80]:
!cp -r /n/holyscratch01/extavour_lab/Lab/rkapoor/root_annotate_upload_trees.ipynb pipeline_final/root_annotate_upload_trees.ipynb

In [10]:
!rm -r pipeline_final/hmmer_pipe/inter_diamond2_split