In [1]:
import pandas as pd
import numpy as np
import csv
from Bio import SeqIO
import os

In [2]:
#functions 

#fn for creating files from a list and file_name
def create_files(pep, file_name):

    ofile = open(file_name + ".fasta", "w")

    for i in range(len(pep)):
        ofile.write('>' + '\n' + pep[i] + '\n')
    ofile.close()

    # peptide list for parsing
    ofile = open(file_name + '_2.fasta', "w")

    for i in range(len(pep)):
        ofile.write(pep[i] + '\n')
    ofile.close()
    
    return print("\t" + "files", file_name, ",", file_name + '_2', "created" )

#fn for parsing and categorizing blast output
def parse_categorize(database_fasta, blast_out, fasta2): #requires inputs with file extension

    input1= SeqIO.parse(database_fasta,"fasta") # blastp reference database
    seqdb={}
    for record in input1:
        seq=str(record.seq)
        if record.id not in seqdb:
            seqdb[record.id]=seq

    input2= open(blast_out,"r") # blastp output
    input3= open(fasta2,"r") # novel peptide tab txt
    output= open('categorized_' + blast_out,"w")


    blastout={}
    hits_dic={}
    for line in input2:
        row=line.strip().split("\t")
        qid=row[-2]
        sid=row[1]
        sseq=seqdb[sid]
        ident=row[2]
        peplen=int(row[3])
        mismatch=row[4]
        alignlen=int(row[6])-int(row[5])+1
        sstart=int(row[7])
        send=int(row[8])
        gap=row[11]
        evalue=float(row[-5])
        alignseq=row[-1]
        category="NA"
        single_sub_pos="NA"
        if sstart>3:
            Nterm_seq=sseq[sstart-4:sstart+2] #check up 3 amino acid before N-term of this peptide
        else:
            Nterm_seq=sseq[:sstart]

        if len(sseq)-send<3:
            Cterm_seq=sseq[send-1:]
        else:
            Cterm_seq=sseq[send-3:send+3]

        if alignlen==peplen:
            if float(ident)==100:
                category="match to known protein"
            
            elif int(gap)==0 and int(mismatch)==1:
                category="map to known protein with 1 aa mismatch"
                for i in range(peplen):
                    if qid[i]!=alignseq[i]:
                        single_sub_pos=str(i+1)

            elif int(gap)==1 and int(mismatch)==0:
                category="map to known protein with 1 aa insertion"
            else:
                category="novelpep (map to known protein with more than 2 mismatched aa)"
        elif peplen-alignlen==1 and float(ident)==100:
            category="map to known protein with 1 aa deletion"

        else:
            category="novelpep (map to known protein with more than 2 mismatched aa)"
        
        if qid not in hits_dic:
            hits_dic[qid]=evalue
            blastout[qid]=[category,sid,ident,peplen,single_sub_pos,Nterm_seq,alignseq,Cterm_seq,alignlen,mismatch,gap]
        else:
            if evalue<hits_dic[qid]:
                hits_dic[qid]=evalue
                blastout[qid]=[category,sid,ident,peplen,single_sub_pos,Nterm_seq,alignseq,Cterm_seq,alignlen,mismatch,gap]

    #header=input3.readline().strip().split("\t")

    header=["Query","blastp_category","blastp_match","identity","peplen","sub_pos","Nterm-seq(3aa)","aligned_seq","Cterm-seq(3aa)","alignlen","mismatch","gap"]
    output.write("\t".join(header)+"\n")

    for line in input3:
        row=line.strip().split("\t")
        peptide=row[0]
        if peptide in blastout:
            results=blastout[row[0]]
            newrow=row+results
            output.write("\t".join(map(str,newrow))+"\n")
        else:
            newrow=row+["novelpep (no match to known protein found)","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA"]
            output.write("\t".join(map(str,newrow))+"\n")


    input2.close()
    input3.close()
    output.close()
   
    #adding headers to the novel blast output
    header_names = ["qseqid", "sseqid", "pident", "qlen", "mismatch", "qstart", "qend", "sstart", "send", "evalue", "bitscore", "gaps", "qseq", "sseq"]
    novel_file = pd.read_table('categorized_' + blast_out, sep ='\t', names = header_names)
    header_names = ["qseqid", "sseqid", "pident", "qlen", "mismatch", "qstart", "qend", "sstart", "send", "evalue", "bitscore", "gaps", "qseq", "sseq"]
    blast_file = pd.read_table(blast_out, sep ='\t', names = header_names)
    blast_file
    #to check if any keys are missing in the dictionary seqdb[]
    missing = blast_file[~blast_file["sseqid"].isin(seqdb.keys())]
    if (len(missing) == 0):
        return print("\t"+ 'all is gud for',blast_out)
    else:
        return print('dictionary error') #make sure the dictionary key is the same as the accession in
    
#fn that gets matches from peptide_list to fasta
def find_matches(peptide, db):
    search_6ft = {}
    for string in peptide:
        search_6ft[string] = []
        for key, value in db.items():
            if string in value:
                    search_6ft[string].append((key))
    
    unmatched = []
    #fn for filtering the 6FT matched dictionary
    def my_filtering_function(pair):
        key, value = pair
        if value == []:
            return False  # filter pair out of the dictionary
        else:
            return True  # keep pair in the filtered dictionary
        
    matched = dict(filter(my_filtering_function, search_6ft.items()))

    for key, value in search_6ft.items():
        if (value == []):
            unmatched.append(key)
    if (len(matched) + len(unmatched) == len(search_6ft)):  #to make sure the dictionary filter works fine
        return (matched, unmatched)
    else:
        return print("\t" + "error in separating 6FT matches")

# main search function

In [30]:
#make databse from known HLA peptides fasta file
#!makeblastdb -in db/APD_Hs_all.fasta -dbtype prot -out db/APD_Hs_all

#make blast database from fasta
#!makeblastdb -in db/human_canonical.fasta  -dbtype prot -parse_seqids -out db/human_canonical

#Make sixframe translated database from ref genome
#!gedi -e SixFrame 

In [3]:
def immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name):   #path of files of peptides and gibbs classification, MHC_class as an integer, Gibbs clusters to select as an array, output_file_path with / at the end
    
    # peptide length 8-11, removing gibbs junk and DB matched
    all_data = pd.ExcelFile(input_file_path)
    data = pd.read_excel(all_data, file_name)
    gibbs = pd.read_excel(all_data, 'gibbs_clustering')
    #data = pd.read_csv(path_peptides) #PSM table from PEAKS
    #gibbs = pd.read_csv(path_gibbs) #Gibbs clustering CSV
    print("filtering peptides found by DB search")
    data = data[data["Found By"] != 'DB Search']
    if (len(gibbs_cluster) != 0):
        print("Selected gibbs clusters based on input")
        gibbs = gibbs[gibbs["Gn"].isin(gibbs_cluster)]
    else:
        print("Selecting all gibbs clusters")
        gibbs = gibbs
    
    if (MHC_class == '1'):
        print("selecting peptides with length between 8 and 11 AA")
        gibbs = gibbs[gibbs["Sequence"].str.len().between(8,11)]
    elif (MHC_class == '2'):
        print("selecting peptides with length between 12 and 17 AA")
        gibbs = gibbs[gibbs["Sequence"].str.len().between(12,17)]
    elif (MHC_class == "E"):
        print("selecting peptides with length between 8 and 15 AA")
        gibbs = gibbs[gibbs["Sequence"].str.len().between(8,15)]
    else:
        print("!!! not filtered by length")

    data = data[data["Peptide"].isin(gibbs["Sequence"])]
    list = pd.unique(data["Peptide"])
    pep = list.tolist()
    blast_p = pep
    print("generating lists and files for further analysis")
    create_files(pep,'peptides')


    #blast against canonical HLA peptides
    if(len(pep) != 0):

        if(len(pep) == 1): #coz I'm a grammar Nazi :)
            print(str(len(pep)) + " peptide being searched for known HLAs")
        else:
            print( str(len(pep)) + " peptides being searched for known HLAs")
        
        !blastp -task blastp-short -query peptides.fasta -db db/APD_Hs_all -out HLA_blast_out -evalue 10.0 -outfmt "6 qseqid saccver pident qlen mismatch qstart qend sstart send evalue bitscore gaps qseq sseq"

        #parsing and catergorizing HLA_blast output
        print("\t"+ "reading output")
        parse_categorize('db/APD_Hs_all.fasta', 'HLA_blast_out', 'peptides_2.fasta')

        #known HLA
        print("\t"+ "generating lists and files for further analysis")
        output_HLA = pd.read_table('categorized_HLA_blast_out')
        known = output_HLA[output_HLA["blastp_category"] == 'match to known protein']
        list = known["Query"] 
        pep = list.to_list()

        if (len(pep) != 0):
            print("\t"+ "known HLA found")
            ofile = open("known_HLA.fasta", "w")

            for i in range(len(pep)):
                ofile.write('>' + '\n' + pep[i] + '\n')
            ofile.close()
        else:
            print("\t"+ "no known HLA found")

        known = output_HLA[output_HLA["blastp_category"] != 'match to known protein']
        list = known["Query"] 
        pep = list.to_list()
    else:
        print("No peptides to search for known HLAs")

    
    if (len(pep) != 0): #to prevent blast with empty query
        
        create_files(pep, 'to_blastp')

        if (len(pep) == 1):
            print(str(len(pep)) + " peptide being searched for human canonical proteins")
        else:
            print(str(len(pep)) + " peptides being searched for human canonical proteins")
        
        #blast all proteins against human canonical proteins
        !blastp -task blastp-short -query to_blastp.fasta -db db/human_canonical -out blastp_out_human_canonical -evalue 10.0 -outfmt "6 qseqid sseqid pident qlen mismatch qstart qend sstart send evalue bitscore gaps qseq sseq"
        print("\t"+ "reading output")
        parse_categorize('db/human_canonical.fasta', 'blastp_out_human_canonical', 'to_blastp_2.fasta')
        output  = pd.read_table('categorized_blastp_out_human_canonical')
        
        #preparing fasta files of proteins to search 6FT database
        to_6ft = output[(output["blastp_category"] != 'map to known protein with 1 aa mismatch') & (output["blastp_category"] != 'match to known protein')]
        list = to_6ft["Query"] 
        pep = list.to_list()

        create_files(pep, 'to_6ft')
       
        #preparing fasta files of proteins with 1 AA mismatch
        mismatched = output[output["blastp_category"] == 'map to known protein with 1 aa mismatch']
        list = mismatched["Query"] 
        pep = list.to_list()
        SAAV = pep
    else:
        print("blastp input empty")

    print("\t"+ "generating lists and files for further analysis")
    
    
    if (len(pep) != 0): #to prevent blast with empty query

        create_files(pep,'SAAV')
        
        if (len(pep) == 1):  #more grammar Nazi
            print(str(len(pep)) + " peptide being searched for Single Amino Acid Variants")
        else:
            print(str(len(pep)) + " peptides being searched for Single Amino Acid Variants")
            
        #blastp against db_SAP (single amino acids polymorphisms)
        !blastp -task blastp-short -query SAAV.fasta -db db/sap_db -out blast_out_SAAV -evalue 10.0 -outfmt "6 qseqid saccver pident qlen mismatch qstart qend sstart send evalue bitscore gaps qseq sseq"
        
        print("\t"+ "reading output")
        parse_categorize('db/sap_db.fa', 'blast_out_SAAV', 'SAAV_2.fasta')


        output = pd.read_table("categorized_blast_out_SAAV")
        not_SNP = output[output["blastp_category"] != 'match to known protein']
        pep = not_SNP["Query"]
        pep = pep.to_list()

        ofile = open("not_SAAV.fasta", "w")

        for i in range(len(pep)):
            ofile.write('>' + '\n' + pep[i] + '\n')
        ofile.close()

    else:
        print("no potential SAAVs, proceeding to 6FT search")

    #searches peptides in 6FT db
    
    pep = to_6ft["Query"]

    if(len(pep) != 0):

        if(len(pep) == 1):
            
            print(str(len(pep)) + " peptide being searched in the six frame translated human genome")
            
            !seqkit grep --by-seq --ignore-case --threads 12 --seq-type protein --pattern-file to_6ft_2.fasta db/human_6FT_m.fasta > 6ft_out
            
            input1= SeqIO.parse('6ft_out',"fasta") # 6FT results to dict
            seqdb={}
            for record in input1:
                seq=str(record.seq)
                if record.description not in seqdb:
                    seqdb[record.description]=seq
            
            if (len(seqdb) == 0):
                print("no match to 6FT")
                unmatched = pep
                matched = []
            else:
                matched = pep
                unmatched = []

        else:
            print(str(len(pep)) + " peptides being searched in the six frame translated human genome")

            !seqkit grep --by-seq --ignore-case --threads 12 --seq-type protein --pattern-file to_6ft_2.fasta db/human_6FT_m.fasta > 6ft_out

            print("\t"+ "writing 6FT results to dictionary")

            input1= SeqIO.parse('6ft_out',"fasta") # 6FT results to dict
            seqdb={}
            for record in input1:
                seq=str(record.seq)
                if record.description not in seqdb:
                    seqdb[record.description]=seq
                    
            print("\t"+ "number of matches from 6FT:", len(seqdb))

            input  = open('to_6ft_2.fasta', 'r') #list of peptides, has to be in a list
            pep = []
            for line in input:
                    pep = pep + line.strip().split("\t")

            print("\t"+ "matching peptides to 6FT")

            matched, unmatched = find_matches(pep, seqdb)
            
            if len(matched) == 0:
                print("\t"+ "no peptides matched to 6ft")
            else:
                print("\t"+ "writing " + str(len(matched)) + " peptide matches to 6FT")
                with open('matches_to_6ft.csv','w') as f:
                    w = csv.writer(f)
                    w.writerows(matched.items())

            print("\t"+ "writing " + str(len(unmatched)) + " unmatched peptides")
            create_files(unmatched, 'no_match_6ft')

    else:
        print("no peptides to search in 6FT")
    
    #saving output files
    SAAV_out = pd.read_table('categorized_blast_out_SAAV', sep = "\t")
    Sixframe_out = pd.read_csv('matches_to_6ft.csv', names = ["Peptides", "Matches"])
    Sixframe_notmatched = pd.read_table('no_match_6ft_2.fasta', sep = "\t", names = ["Peptides"])

    #writing data to excel file
    if (len(mismatched) == 0 & len(matched) == 0 & len(unmatched) == 0):
        print("No significant peptides found")
        
    else:
        
        #creates output directory
        try:
            os.makedirs(output_file_path)
            os.chdir(output_file_path)
            print("Search directory created at " + os.getcwd())
        except OSError as error:
            os.chdir(output_file_path)
            print("Directory already exists at " + os.getcwd())
            
        print("Creating excel file with results")
        with pd.ExcelWriter(file_name + '_immuno_search_out.xlsx', engine='openpyxl') as writer:
            # Write each DataFrame to a different sheet
            if (len(mismatched) != 0):
                SAAV_out.to_excel(writer, sheet_name='Single_AA_variants', index=False)
            if (len(matched) != 0):
                Sixframe_out.to_excel(writer, sheet_name='Matches_to_six_frame', index=False)
            if (len(unmatched) != 0):
                Sixframe_notmatched.to_excel(writer, sheet_name='Six_frame_non_matched', index=False)

        os.chdir('D:/Period_6/')
        return print("search and classification done, file saved at ", output_file_path + file_name + '_immuno_search_out.xlsx')
        

In [4]:
input_file_path = "1148_MHCI/1148_E/1148_E.xlsx"
MHC_class = 'E'
gibbs_cluster = [0,1]
file_name = '1148_E'
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 15 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
145 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
135 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
7 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
128 peptides being searched in the six frame translated human genome
	writing 6FT results to dictionary


[INFO][0m 128 patterns loaded from file

	number of matches from 6FT: 280
	matching peptides to 6FT
	writing 4 peptide matches to 6FT





	writing 124 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1148_E
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1148_E/1148_E_immuno_search_out.xlsx


In [25]:
input_file_path = "1148_MHCI/1148_MHCI/1148_MHCI.xlsx"
MHC_class = '1'
gibbs_cluster = [0,1,2,3,4]
file_name = '1148_MHCI'
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
412 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
394 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
27 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
26 peptides being searched in the six frame translated human genome


[INFO][0m 366 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 342





	matching peptides to 6FT
	writing 32 peptide matches to 6FT
	writing 334 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1148_MHCI
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1148_MHCI/1148_MHCI_immuno_search_out.xlsx


In [27]:
input_file_path = "1148_MHCI/1148_MHCII/1148_MHCII.xlsx"
MHC_class = '2'
gibbs_cluster = [1]
file_name = '1148_MHCII'
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
116 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
108 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
11 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
97 peptides being searched in the six frame translated human genome


[INFO][0m 97 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 0
	matching peptides to 6FT
	no peptides matched to 6ft





	writing 97 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1148_MHCII
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1148_MHCII/1148_MHCII_immuno_search_out.xlsx


In [5]:
input_file_path = "1148_MHCI/1150_E/1150_E.xlsx"
MHC_class = 'E'
gibbs_cluster = [0,1]
file_name = '1150_E'
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 15 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
165 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
155 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
6 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
149 peptides being searched in the six frame translated human genome


[INFO][0m 149 patterns loaded from file

	writing 6FT results to dictionary





	number of matches from 6FT: 42
	matching peptides to 6FT
	writing 7 peptide matches to 6FT
	writing 142 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1150_E
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1150_E/1150_E_immuno_search_out.xlsx


In [29]:
input_file_path = "1148_MHCI/1150_MHCI/1150_MHCI.xlsx"
MHC_class = '1'
gibbs_cluster = []
file_name = '1150_MHCI'
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
338 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
330 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
28 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
302 peptides being searched in the six frame translated human genome


[INFO][0m 302 patterns loaded from file

	writing 6FT results to dictionary





	number of matches from 6FT: 707
	matching peptides to 6FT
	writing 42 peptide matches to 6FT
	writing 260 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1150_MHCI
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1150_MHCI/1150_MHCI_immuno_search_out.xlsx


In [30]:
input_file_path = "1148_MHCI/1150_MHCII/1150_MHCII.xlsx"
MHC_class = '2'
gibbs_cluster = [0]
file_name = '1150_MHCII'
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
88 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
81 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
9 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
72 peptides being searched in the six frame translated human genome


[INFO][0m 72 patterns loaded from file


	writing 6FT results to dictionary
	number of matches from 6FT: 0
	matching peptides to 6FT
	no peptides matched to 6ft
	writing 72 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1150_MHCII
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1150_MHCII/1150_MHCII_immuno_search_out.xlsx


In [6]:
input_file_path = "1148_MHCI/1153_E/1153_E.xlsx"
MHC_class = 'E'
gibbs_cluster = [1,2,3,4]
file_name = '1153_E'
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 15 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
196 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
191 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
11 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
180 peptides being searched in the six frame translated human genome


[INFO][0m 180 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 390
	matching peptides to 6FT
	writing 10 peptide matches to 6FT
	writing 170 unmatched peptides





	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1153_E
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1153_E/1153_E_immuno_search_out.xlsx


In [33]:
input_file_path = "1148_MHCI/1153_MHCII/1153_MHCII.xlsx"
MHC_class = '2'
gibbs_cluster = [1]
file_name = '1153_MHCII'
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
59 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
55 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
6 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
49 peptides being searched in the six frame translated human genome


[INFO][0m 49 patterns loaded from file


	writing 6FT results to dictionary
	number of matches from 6FT: 0
	matching peptides to 6FT
	no peptides matched to 6ft
	writing 49 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1153_MHCII
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1153_MHCII/1153_MHCII_immuno_search_out.xlsx


In [7]:
file_name = '1130_E1'
input_file_path = "im folder/1130_E1/" + file_name + ".xlsx"
MHC_class = 'E'
gibbs_cluster = [0,2,3]
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 15 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
21 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	no known HLA found
	files to_blastp , to_blastp_2 created
21 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
1 peptide being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
20 peptides being searched in the six frame translated human genome


[INFO][0m 20 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 10





	matching peptides to 6FT
	writing 1 peptide matches to 6FT
	writing 19 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1130_E1
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1130_E1/1130_E1_immuno_search_out.xlsx


In [8]:
file_name = '1130_E2'
input_file_path = "im folder/1130_E2/" + file_name + ".xlsx"
MHC_class = 'E'
gibbs_cluster = [0,1,3]
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 15 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
13 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	no known HLA found
	files to_blastp , to_blastp_2 created
13 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
1 peptide being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
12 peptides being searched in the six frame translated human genome


[INFO][0m 12 patterns loaded from file


	writing 6FT results to dictionary
	number of matches from 6FT: 25
	matching peptides to 6FT
	writing 3 peptide matches to 6FT
	writing 9 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1130_E2
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1130_E2/1130_E2_immuno_search_out.xlsx


In [6]:
file_name = '1130_MHCI_1'
input_file_path = "im folder/1130_MHCI_1/" + file_name + ".xlsx"
MHC_class = '1'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
734 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
733 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
50 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
678 peptides being searched in the six frame translated human genome


[INFO][0m 678 patterns loaded from file


	writing 6FT results to dictionary
	number of matches from 6FT: 676
	matching peptides to 6FT
	writing 49 peptide matches to 6FT
	writing 629 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1130_MHCI_1
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1130_MHCI_1/1130_MHCI_1_immuno_search_out.xlsx


In [7]:
file_name = '1130_MHCI_2'
input_file_path = "im folder/1130_MHCI_2/" + file_name + ".xlsx"
MHC_class = '1'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
746 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
742 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
54 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
684 peptides being searched in the six frame translated human genome
	writing 6FT results to dictionary


[INFO][0m 684 patterns loaded from file


	number of matches from 6FT: 791
	matching peptides to 6FT
	writing 44 peptide matches to 6FT
	writing 640 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1130_MHCI_2
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1130_MHCI_2/1130_MHCI_2_immuno_search_out.xlsx


In [8]:
file_name = '1130_MHCII_1'
input_file_path = "im folder/1130_MHCII_A2/" + file_name + ".xlsx"
MHC_class = '2'
gibbs_cluster = [0,1]
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
9 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	no known HLA found
	files to_blastp , to_blastp_2 created
9 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
1 peptide being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
8 peptides being searched in the six frame translated human genome


[INFO][0m 8 patterns loaded from file


	writing 6FT results to dictionary
	number of matches from 6FT: 0
	matching peptides to 6FT
	no peptides matched to 6ft
	writing 8 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1130_MHCII_1
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1130_MHCII_1/1130_MHCII_1_immuno_search_out.xlsx


In [9]:
file_name = '1130_MHCII_2'
input_file_path = "im folder/1130_MHCII_B2/" + file_name + ".xlsx"
MHC_class = '2'
gibbs_cluster = [1,2]
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
14 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	no known HLA found
	files to_blastp , to_blastp_2 created
14 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
3 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
11 peptides being searched in the six frame translated human genome


[INFO][0m 11 patterns loaded from file

	writing 6FT results to dictionary





	number of matches from 6FT: 0
	matching peptides to 6FT
	no peptides matched to 6ft
	writing 11 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1130_MHCII_2
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1130_MHCII_2/1130_MHCII_2_immuno_search_out.xlsx


In [4]:
file_name = '1134_E'
input_file_path = "im folder/1134_E/" + file_name + ".xlsx"
MHC_class = 'E'
gibbs_cluster = [0,1,2,3]
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 15 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
267 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
256 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
15 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
241 peptides being searched in the six frame translated human genome


[INFO][0m 241 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 24
	matching peptides to 6FT
	writing 5 peptide matches to 6FT





	writing 236 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1134_E
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1134_E/1134_E_immuno_search_out.xlsx


In [11]:
file_name = '1134_MHCI'
input_file_path = "im folder/1134_MHCI/" + file_name + ".xlsx"
MHC_class = '1'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
366 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
357 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
25 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
332 peptides being searched in the six frame translated human genome


[INFO][0m 332 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 771
	matching peptides to 6FT
	writing 43 peptide matches to 6FT





	writing 289 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1134_MHCI
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1134_MHCI/1134_MHCI_immuno_search_out.xlsx


In [12]:
file_name = '1134_MHCII'
input_file_path = "im folder/1134_MHCII/" + file_name + ".xlsx"
MHC_class = '2'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
145 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
132 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
6 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
126 peptides being searched in the six frame translated human genome


[INFO][0m 126 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 0





	matching peptides to 6FT
	no peptides matched to 6ft
	writing 126 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1134_MHCII
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1134_MHCII/1134_MHCII_immuno_search_out.xlsx


In [5]:
file_name = '1136_E'
input_file_path = "im folder/1136_E/" + file_name + ".xlsx"
MHC_class = '1'
gibbs_cluster = [0,1,2]
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
100 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
98 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
2 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
96 peptides being searched in the six frame translated human genome


[INFO][0m 96 patterns loaded from file


	writing 6FT results to dictionary
	number of matches from 6FT: 60
	matching peptides to 6FT
	writing 9 peptide matches to 6FT
	writing 87 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1136_E
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1136_E/1136_E_immuno_search_out.xlsx


In [14]:
file_name = '1136_MHCI'
input_file_path = "im folder/1136_MHCI/" + file_name + ".xlsx"
MHC_class = '1'
gibbs_cluster = [0,1,2,4]
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
191 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
183 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
10 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
173 peptides being searched in the six frame translated human genome


[INFO][0m 173 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 46
	matching peptides to 6FT
	writing 7 peptide matches to 6FT





	writing 166 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1136_MHCI
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1136_MHCI/1136_MHCI_immuno_search_out.xlsx


In [5]:
file_name = '1136_MHCII'
input_file_path = "im folder/1136_MHCII/" + file_name + ".xlsx"
MHC_class = '2'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
110 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
100 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
3 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
97 peptides being searched in the six frame translated human genome


[INFO][0m 97 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 0
	matching peptides to 6FT
	no peptides matched to 6ft
	writing 97 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created





Search directory created at D:\Period_6\out\1136_MHCII
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1136_MHCII/1136_MHCII_immuno_search_out.xlsx


In [6]:
file_name = '1137_E1'
input_file_path = "im folder/1137_E1/" + file_name + ".xlsx"
MHC_class = 'E'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 8 and 15 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
118 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
117 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
8 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
107 peptides being searched in the six frame translated human genome


[INFO][0m 107 patterns loaded from file

	writing 6FT results to dictionary





	number of matches from 6FT: 326
	matching peptides to 6FT
	writing 18 peptide matches to 6FT
	writing 89 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1137_E1
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1137_E1/1137_E1_immuno_search_out.xlsx


In [7]:
file_name = '1137_E2'
input_file_path = "im folder/1137_E2/" + file_name + ".xlsx"
MHC_class = 'E'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 8 and 15 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
126 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
125 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
14 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
110 peptides being searched in the six frame translated human genome


[INFO][0m 110 patterns loaded from file


	writing 6FT results to dictionary
	number of matches from 6FT: 488
	matching peptides to 6FT
	writing 18 peptide matches to 6FT
	writing 92 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Directory already exists at D:\Period_6\out\1137_E2
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1137_E2/1137_E2_immuno_search_out.xlsx


In [8]:
file_name = '1137_MHCI_1'
input_file_path = "im folder/1137_MHCI_1/" + file_name + ".xlsx"
MHC_class = '1'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
99 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	known HLA found
	files to_blastp , to_blastp_2 created
98 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
8 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
90 peptides being searched in the six frame translated human genome


[INFO][0m 90 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 152
	matching peptides to 6FT
	writing 14 peptide matches to 6FT
	writing 76 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created





Search directory created at D:\Period_6\out\1137_MHCI_1
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1137_MHCI_1/1137_MHCI_1_immuno_search_out.xlsx


In [9]:
file_name = '1137_MHCI_2'
input_file_path = "im folder/1137_MHCI_2/" + file_name + ".xlsx"
MHC_class = '1'
gibbs_cluster = [1,2,3]
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selected gibbs clusters based on input
selecting peptides with length between 8 and 11 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
88 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	no known HLA found
	files to_blastp , to_blastp_2 created
88 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
6 peptides being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
82 peptides being searched in the six frame translated human genome


[INFO][0m 82 patterns loaded from file

	writing 6FT results to dictionary
	number of matches from 6FT: 72





	matching peptides to 6FT
	writing 11 peptide matches to 6FT
	writing 71 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1137_MHCI_2
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1137_MHCI_2/1137_MHCI_2_immuno_search_out.xlsx


In [10]:
file_name = '1137_MHCII_1'
input_file_path = "im folder/1137_MHCII_C2/" + file_name + ".xlsx"
MHC_class = '2'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
4 peptides being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	no known HLA found
	files to_blastp , to_blastp_2 created
4 peptides being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
	files SAAV , SAAV_2 created
1 peptide being searched for Single Amino Acid Variants
	reading output
	all is gud for blast_out_SAAV
3 peptides being searched in the six frame translated human genome


[INFO][0m 3 patterns loaded from file


	writing 6FT results to dictionary
	number of matches from 6FT: 0
	matching peptides to 6FT
	no peptides matched to 6ft
	writing 3 unmatched peptides
	files no_match_6ft , no_match_6ft_2 created
Search directory created at D:\Period_6\out\1137_MHCII_1
Creating excel file with results
search and classification done, file saved at  D:/Period_6/out/1137_MHCII_1/1137_MHCII_1_immuno_search_out.xlsx


In [4]:
file_name = '1137_MHCII_2'
input_file_path = "im folder/1137_MHCII_D2/" + file_name + ".xlsx"
MHC_class = '2'
gibbs_cluster = []
output_file_path = "D:/Period_6/out/" + file_name  + "/"    # full file path, has to have "/" at the end


immuno_search(input_file_path, MHC_class, gibbs_cluster, output_file_path, file_name)

filtering peptides found by DB search
Selecting all gibbs clusters
selecting peptides with length between 12 and 17 AA
generating lists and files for further analysis
	files peptides , peptides_2 created
1 peptide being searched for known HLAs
	reading output
	all is gud for HLA_blast_out
	generating lists and files for further analysis
	no known HLA found
	files to_blastp , to_blastp_2 created
1 peptide being searched for human canonical proteins
	reading output
	all is gud for blastp_out_human_canonical
	files to_6ft , to_6ft_2 created
	generating lists and files for further analysis
no potential SAAVs, proceeding to 6FT search
1 peptide being searched in the six frame translated human genome
no match to 6FT
No significant peptides found


The process cannot access the file because it is being used by another process.
