In [1]:
import pandas as pd
import seaborn as sns
import time
import re
import nltk
import math
import os
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
def fasta_parser(filename):
    if os.path.exists(filename):
        pass
    else:                     
        print("The file, %s, does not exist" % filename)
        
    file = open(filename,mode='r')
    rec_all = file.read()
    file.close()
    # remove all whitespace from string all_of_it
    rec_all = rec_all.replace(' ','')
    # split records by > 
    records = rec_all.split('>')
    # Parse out the headers & sequences for each record
    headers = []
    sequences = []
    for rec in records:
        s = ''
        data = rec.split('\n')
        sq = s.join(data[1:])
        if len(data[0]) > 0:
            headers.append(data[0])
   
        if (len(sq) > 0):
            sequences.append(s.join(data[1:]))
    
    return headers, sequences

In [3]:
# concatenate all relevant fasta files into one fasta file use glob
# see https://stackoverflow.com/questions/17749058/combine-multiple-text-files-into-one-text-file-using-python/17749339
# Note the code below works with no bizzare artifacts from globbing or concatenation
#read_files = glob.glob("*.fasta")

#with open("isomerase_2000.fasta", "wb") as outfile:
    #for f in read_files:
        #with open(f, "rb") as infile:
            #outfile.write(infile.read())

In [4]:
f_name = 'uniprot-oxidoreductase-reviewed.fasta'
t_head, t_seqs = fasta_parser(f_name)

In [5]:
for hd in t_head[0:5]:
    print(hd)
    print()

sp|Q8L540|LTO1_ARATHThiol-disulfideoxidoreductaseLTO1OS=ArabidopsisthalianaOX=3702GN=LTO1PE=1SV=1

sp|Q9AL95|NROR_CLOABNADH-rubredoxinoxidoreductaseOS=Clostridiumacetobutylicum(strainATCC824/DSM792/JCM1419/LMG5710/VKMB-1787)OX=272562GN=nroRPE=1SV=1

sp|P55931|ETFD_PIGElectrontransferflavoprotein-ubiquinoneoxidoreductase,mitochondrialOS=SusscrofaOX=9823GN=ETFDHPE=1SV=2

sp|Q16134|ETFD_HUMANElectrontransferflavoprotein-ubiquinoneoxidoreductase,mitochondrialOS=HomosapiensOX=9606GN=ETFDHPE=1SV=2

sp|Q9NZC7|WWOX_HUMANWWdomain-containingoxidoreductaseOS=HomosapiensOX=9606GN=WWOXPE=1SV=1



In [6]:
for sq in t_seqs[0:5]:
    print(sq)
    print()

MMARFVSVSSCQFHFGFREVSPPSVTSYPRRFEVSDRRFPAIPIKCSSSEPENGEDSAPSLSSSSSSSTSEVSTSNSSTYNWYTGIGGIGMLDTAYLTYLKVTGSDAFCPIGGGTCGDVLNSDYAVVFGVPLPVIGFVMYGVVTALSAELGEGNLPFGISKSNGRFALFGITTAMASASAYFLYILSTKLSGSSCLYCLVSAFLSFSLFFLSVKDVKLQEIQQVVGLQICLAIIVVASLTASYSTAQPIPSRSGDIELPYFRTEISSSSSPYAIALAKHLNSIGAKMYGAFWCSHCLEQKEMFGREAAKELNYVECFPDGYKKGTKILKACADAAIEGFPTWIINDKVLSGEIELAELAEMTGFSLDQANETNQLQ

MKSTKILILGAGPAGFSAAKAALGKCDDITMINSEKYLPYYRPRLNEIIAKNKSIDDILIKKNDWYEKNNIKVITSEFATSIDPNNKLVTLKSGEKIKYEKLIIASGSIANKIKVPHADEIFSLYSYDDALKIKDECKNKGKAFIIGGGILGIELAQAIIDSGTPASIGIILEYPLERQLDRDGGLFLKDKLDRLGIKIYTNSNFEEMGDLIRSSCVITAVGVKPNLDFIKDTEIASKRGILVNDHMETSIKDIYACGDVAEFYGKNPGLINIANKQGEVAGLNACGEDASYSEIIPSPILKVSGISIISCGDIENNKPSKVFRSTQEDKYIVCMLKENKIDAAAVIGDVSLGTKLKKAIDSSKSFDNISSLDAILNNL

MMVPLAKLASPAYQCFHALKIKKNYLPLCATRWSSTCKVPRITTHYTIYPRDQDKRWEGVNMERFAEEADVVIVGAGPAGLSAATRLKQLAAQHEKDLRVCLVEKAAHIGAHTLSGACLDPRAFEELFPDWKEKGAPLNTPVTEDRFGILTEKYRIPVPILPGLPMNNHGNYVVRLGHLVSWMGEQAEALGVEVYPGYAAAEILFHEDGSVKGIATNDVGIQKDGAPKTTFERGLELHAKV

In [7]:
print(len(t_seqs))

46750


In [8]:
t_seqs2 = list(set(t_seqs))
print(len(t_seqs2))

39793


In [9]:
from Bio import SeqIO
records = list(SeqIO.parse(f_name, "fasta"))


In [10]:
seq_ids =[]
seqs = []
#for record in SeqIO.parse("isomerase_2000.fasta", "fasta"):
for record in SeqIO.parse(f_name, "fasta"):
    print(repr(record.seq))
    print()
    seq_ids.append(record.id)
    seqs.append(str(record.seq))

Seq('MMARFVSVSSCQFHFGFREVSPPSVTSYPRRFEVSDRRFPAIPIKCSSSEPENG...QLQ', SingleLetterAlphabet())

Seq('MKSTKILILGAGPAGFSAAKAALGKCDDITMINSEKYLPYYRPRLNEIIAKNKS...NNL', SingleLetterAlphabet())

Seq('MMVPLAKLASPAYQCFHALKIKKNYLPLCATRWSSTCKVPRITTHYTIYPRDQD...NGM', SingleLetterAlphabet())

Seq('MLVPLAKLSCLAYQCFHALKIKKNYLPLCATRWSSTSTVPRITTHYTIYPRDKD...NGM', SingleLetterAlphabet())

Seq('MAALRYAGLDDTDSEDELPPGWEERTTKDGWVYYANHTEEKTQWEHPKTGKRKR...QSG', SingleLetterAlphabet())

Seq('MPSPQLLVLFGSQTGTAQDVSERLGREARRRRLGCRVQALDSYPVVNLINEPLV...TWA', SingleLetterAlphabet())

Seq('MGEKQRKLLVLYASQTGNALDAAERIGREAERRGLPASVVSTDEFDTSSLPHHE...AWS', SingleLetterAlphabet())

Seq('MSSSKKIVILYGSETGNAHDFATILSHRLHRWHFSHTFCSIGDYDPQDILKCRY...ETW', SingleLetterAlphabet())

Seq('MLVRLTKLSCPAYHWFHALKIKKCLPLCAPRCSSTSAVPQITTHYTVHPREKDK...NGM', SingleLetterAlphabet())

Seq('MILSAFSLEGKVAVVTGCDTGLGQGMALGLAQAGCDIVGINIVEPTETIEQVTA...LAR', SingleLetterAlphabet())

Seq('MQVDELLKPFPIKEFHPFPRALLGPGAHEMIGPEALKLGFKKTLVMTSGLRGSD...LLS', Si


Seq('MTTVLFVGLGLIGGSLASNIKYHNPNTNIIAYDADTSQLDKAKSIGIINEKCLN...YIQ', SingleLetterAlphabet())

Seq('MSQVSGTDVPDLGRRQFMNLLTFGTITGVAAGALYPIVKYFIPPSAGGTGGGVT...WWS', SingleLetterAlphabet())

Seq('MTQLSSNDVPSMGRRQFMNLLTFGTATGVALGALYPVANYFMPLRAGGGGGGTS...WWA', SingleLetterAlphabet())

Seq('MKITISGTGYVGLSNGLLIAQNHEVVALDILPSRVAMLNDRISPIVDKEIQQFL...GSD', SingleLetterAlphabet())

Seq('MNITFIGSGYVGLVSGIIMGYLGHNVTCLDNDEVKISKLNKQILPIYEAKLDEY...KAR', SingleLetterAlphabet())

Seq('MMLHIPGVLTKEQVAQCRDILDAADWADGNATSGAQSALAKRNRQLPEGSPAAR...ADA', SingleLetterAlphabet())

Seq('MATSSTSSTTSLCFPATSAAGARTSFRTTDTFLRYRRSRQLTRLKVRKAVVRSD...SLV', SingleLetterAlphabet())

Seq('MELLIHQLLSKEESKKITSNITKDNSCWIDGKTSAGSYAAKVKNNLQLKKDSEV...LGD', SingleLetterAlphabet())

Seq('MFEIKKICCIGAGYVGGPTCSVIAQMCPKIQVTVVDVNEARINAWNSDTLPIYE...PRV', SingleLetterAlphabet())

Seq('MKITTKVLIIGSGPAGLSAAIYTARSSLKPILINGMQPGGQLTMTTDVENYPGF...LNK', SingleLetterAlphabet())

Seq('MSTVLYRCPELLIGGEWRPGRHEQRLVVRNPATGEPLDELRLASADDLQLALQT...YVA', S

Seq('MSTHNEYDYIIIGAGSAGNVLATRLTEDADVSVLLLEAGGPDYRLDFRTQMPAA...KQD', SingleLetterAlphabet())

Seq('MFGKKNLKWLGVVATLMMTFVQLGGALVTKTGSADGCGSSWPLCHGALIPEFFP...DKQ', SingleLetterAlphabet())

Seq('MDQSMKPLLSPTERPRRHLTASVISFFLPNQFRLSTILCIGALLQTILCAVLPL...RDD', SingleLetterAlphabet())

Seq('MILLQNIKRCSLKQLKVLATLLLSLSLPTLEAAENRDSDSIVWHLDYQEALQKS...HQS', SingleLetterAlphabet())

Seq('MQQHYDYIIVGAGSAGCVLADRLSESGDHSVLLLEAGGSDKSIFIQMPTALSYP...PMR', SingleLetterAlphabet())

Seq('MEQNSAWFMEAKPVAIWLFLCSVMVILMVGIGGFTRLSKAGLSITEWKPITGTL...FPS', SingleLetterAlphabet())

Seq('MEYDYIIIGAGSAGNVLAARLTEDADVTVLLLEAGGPDYRLDFRTQMPAALAFP...SYP', SingleLetterAlphabet())

Seq('MSFQYPLLAPMTNSSVATNSNQPFLGQPWLVEERDACGVGFIANLRGKPDHTLV...TSV', SingleLetterAlphabet())

Seq('MKAQPKASHFIGGAFVEDKAGKPLPVIYPATGEEIASLYSATPGIIEAAYAAAL...SPY', SingleLetterAlphabet())

Seq('MRAQPKASHFIDGEYVEDAAGTVIESIYPATGEIIARLHAATPGIVEKAIAAAK...APY', SingleLetterAlphabet())

Seq('MGVIESPSSTTSGSAEEMAQAITGHEDSVLPVAIVGMGMRLPGGIHTPDELWGM...KAP', Si

Seq('MSADIVDIGHTGWMPSVQSLSILLVPGALVLVILYLCERQCNDLMGAPPPGPWG...VQD', SingleLetterAlphabet())

Seq('MLTKLLKISCTSRQCTFAKPYQAIPGPRGPFGMGNLYNYLPGIGSYSWLRLHQA...RRE', SingleLetterAlphabet())

Seq('MITETLLTICAAVFLCLSYRYAVGRPSGFPPGPPKIPLFGSYLFMLIINFKYLH...DRH', SingleLetterAlphabet())

Seq('MSAASSSSVLHLRTNQQLLSLRSLKNSTSVASQLAVTSGVSRRRSCTARCSVKK...ITN', SingleLetterAlphabet())

Seq('MTEKRERPGPLRWLRHLLDQLLVRILSLSLFRSRCDPPPLQRFPATELPPAVAA...RTE', SingleLetterAlphabet())

Seq('MDLIPNFAMETWVLVATSLVLLYIYGTHSHKLFKKLGIPGPTPLPFLGTILFYL...SGP', SingleLetterAlphabet())

Seq('MSNASDHVDVLIIGAGPAGLTTANSFNGSNCRVRLIDWKPAPLETGRADGLKSI...ANL', SingleLetterAlphabet())

Seq('MFTLVGLCLTIVHVAFAVVYFYLTWYHKYWDKRGVVTAEPLTILGSYPGILINK...LQK', SingleLetterAlphabet())

Seq('MDLLSALTLETWVLLAVILVLLYRLGTHRHGIFKKQGIPGPKPLPFLGTVLNYY...NGA', SingleLetterAlphabet())

Seq('MPDLIQTKSVVLGYGVKMVLAPAKAARSLLIREEAVWRFSLPRVLSTPTTMLET...ITV', SingleLetterAlphabet())

Seq('MACTLADSLLLFKGSYQKPVLRRDIAARYSPGIFSLNSNGLIQKRFRRQRNFVT...TCP', Si

Seq('MVKVGIIGGGGYTAGELIRLLINHPNVNIIFVHSYSNARNKIIDIHTGLIGEMD...NAF', SingleLetterAlphabet())

Seq('MKKIGIIGVSGYTGLELIKIILNHSGFKLSYLAATSEGEISEIFPQLAGVLNMK...YGI', SingleLetterAlphabet())

Seq('MVTISIIGGTGYTGSELLRILLNHPQVEIKHITSRKMDGVNITKVHPNLKNIKN...LKP', SingleLetterAlphabet())

Seq('MLKVAIVGASGYTGVELLRILHSHPEVAVTCVTSEQSAGRPVSSVFPSLRGRCD...VFP', SingleLetterAlphabet())

Seq('MKVAIVGASGYAGGELVRLLYHHSSAEVTCVTSRSLAGIPLSEVHPQLTGFSDL...LLP', SingleLetterAlphabet())

Seq('MIRAAIVGGTGYTGVELLRLLANHPNVEIVAITSRTEAGRPVSKLFPNLRGYLD...VIP', SingleLetterAlphabet())

Seq('MSKKIKVGIVGATGYTGVELLRLLAAHPDVEVAAVTSRSEAGTAVADYFPSLRG...LLP', SingleLetterAlphabet())

Seq('MAEISILGAGGLTGKELLLLLSRQKEHEVVHITSDKLAGKTLAEVFPEIPFPKN...LFP', SingleLetterAlphabet())

Seq('MAEKKKIGILGASGYTGAELVRLLLRHPRVEIVLLTADRRAGHKMGDVFPQFAP...LSS', SingleLetterAlphabet())

Seq('MQSTTFTRLLAAAALAATTLFAPATQAQGAQQYVNINPPMPSDTPGKIEVLEFF...SRK', SingleLetterAlphabet())

Seq('MNPLKAGDIAPKFSLPDQDGEQVNLTDFQGQRVLVYFYPKAMTPGCTVQACGLR...EHA', Si

Seq('MTENNEFDVADLRREYIRGGLRRSDLTENPLELFERWLKQACEARLPDPTAMCV...LAP', SingleLetterAlphabet())

Seq('MVNNMTDLTAQEPAWQTRDHLDDPVIGELRNRFGPDAFTVQATRTGVPVVWIKR...VDR', SingleLetterAlphabet())

Seq('MGLTPSATKPEIAQAPQGIVDPSTGRPVGADDPFFKEINSELADKGFILTTADD...IER', SingleLetterAlphabet())

Seq('MARRGLLISFFAIFSVLLQSSTSLISSSSVFVNPSKVKQVSSKPRAFVYEGFLT...KAC', SingleLetterAlphabet())

Seq('MRQRTPFARPGLLASAALALVLGPLAASAQEQVAPPKDPAAALEDHKTRTDNRY...DTY', SingleLetterAlphabet())

Seq('MVNNMTDLTAQEPAWQTRDHLDDPVIGELRNRFGPDAFTVQATRTGVPVVWIKR...VDR', SingleLetterAlphabet())

Seq('MELADIRREYTKGGLRRKDLKNDPIDQFNFWLEQAIKANLSDPTAMTVATVDKD...LAP', SingleLetterAlphabet())

Seq('MDYTLTRIDPNGENDRYPLQTQETVSGDPLEQHVHRSVYMGKLENAMHDMVNWG...DEI', SingleLetterAlphabet())

Seq('MSKSTSVSTILYLRQRLQGLKIYETSDLIQHINTFDELVGEQVSVDVKIEEKTK...NLF', SingleLetterAlphabet())

Seq('MPEGARRDLAGLRREYTRAGLAEDGADPDPIRQFGRWFEEALRAGLYEPNAMVL...LQP', SingleLetterAlphabet())

Seq('MSDNDQLQQIAHLRREYTKGGLRRRDLPAEPLTLFERWLGQACDARLADPTAMV...LAP', Si


Seq('MSCPMRSGFVDSVQGGHHLGSEAGMLYGEYLMLDKVLSAQRMLSVEGKKPVHDE...SLH', SingleLetterAlphabet())

Seq('MQPPGDDAAPRCPFAGAHAPDAPHVPEAAGDDVQAGWHRAQLDFSQSMSYGDYL...TTL', SingleLetterAlphabet())

Seq('MKPPGDNAPAGCPFSGARAAQPAHEAPHVPGDAAGETGWHDAQLDFSKSMSYGD...TTL', SingleLetterAlphabet())

Seq('MKSLKGTKTAENLMKAFAGESQARNRYTFYSNTAKKEGYVQISNIFLETAENER...ENY', SingleLetterAlphabet())

Seq('MFKILRKERLAPGINLFEIESPRIAKHAKPGQFVMIRLHEKGERIPLTIADVDI...RMV', SingleLetterAlphabet())

Seq('MKSLTELFGCFKRQPRQQEASSPANPHVSDTLSMGVAASGMPPPKRPAPAESPT...CUG', SingleLetterAlphabet())

Seq('MAPVVQQPAPSFKKTAVVDGVFEEVTLEQYKGKWVLLAFIPLAFTFVCPSEIIA...VNK', SingleLetterAlphabet())

Seq('MSEEKIYDVIIIGAGPAGMTAAVYTSRANLSTLMIERGIPGGQMANTEDVENYP...TLK', SingleLetterAlphabet())

Seq('MLSKQIPLGIYEKALPAGECWLERLQLAKTLGFDFVEMSVDETDDRLSRLNWSR...EAA', SingleLetterAlphabet())

Seq('MKALNKETAAKTQRPERIIQFGEGNFLRAFVDWIIYNMNEKTDFNSSVVVVQPI...GIL', SingleLetterAlphabet())

Seq('MTSTRGEAAGIPSVSLNDGHSIPVLGLGVGELSEAEAERSVAAALEAGYRLIDT...TGP', S

Seq('MYYPFVRKALFQLDPERAHEFTFQQMRRITGTPLEALLRQKVPSKPVSCMGLTF...THL', SingleLetterAlphabet())

Seq('MLRLRVFDRHKRLWSVLQSTIRGNNISNHIPNTTCRSSAPPHLCARQRAFYSQN...FPD', SingleLetterAlphabet())

Seq('MKNILLLGSGFVAKPALDYLLKREDYFVTIVSLFQNELDSITKGHDTSKFKTIQ...IKK', SingleLetterAlphabet())

Seq('MATQQRPFHLVVFGASGFTGQFVTEEVAREQVSPERTSHLPWAVAGRSREKLLR...TEV', SingleLetterAlphabet())

Seq('MATEQRPFHLVVFGASGFTGQFVTEEVAREQVDPERSSRLPWAVAGRSREKLQR...SEV', SingleLetterAlphabet())

Seq('MATEQRPFHLVVFGASGFTGQFVTEEVAREQIASEQSSRLPWAVAGRSKEKLQQ...SEV', SingleLetterAlphabet())

Seq('MVVSREEVREIIGRGNAIVIYEDHLLNLNGWLERHPGGEKAIHHMIGRDASDEM...IKG', SingleLetterAlphabet())

Seq('MSADLRKRKKDTTADNDAPQEAQAEEENKKEKPSSCRYRFLWKLLLGLLLIALL...LHS', SingleLetterAlphabet())

Seq('MSILLSPPSLLLLLAALVAPATSTTNYRPDWNRLRGLARGRVETCGGUQLNRLK...DDL', SingleLetterAlphabet())

Seq('MWLPLPLLLGLLQLQPILSYQIDWNKLERINRGKVESCGGUQLNRLKEVKGFVT...EDL', SingleLetterAlphabet())

Seq('MIFIPIIILIYLVSIAASDKSPKIKKNPRNVVAVADFPFGGDTQVKGNVVFSAK...IFV', Si

Seq('MAGGSTGERPFTDIITSIRYWVIHSITIPALFIAGWLFVSTGLAYDAFGTPRPN...SAK', SingleLetterAlphabet())

Seq('MATQTVENGSKSGPRRTTVGNLLKPLNSEYGKVAPGWGTTPLMGVAMALFAVFL...SMN', SingleLetterAlphabet())

Seq('MLTLKLFVYTVVIFFVSLFIFGFLSNDPGRNPGREE', SingleLetterAlphabet())

Seq('MLTLKIFVYTVVTFFVSLFIFGFLSNDPGRNPGQRELD', SingleLetterAlphabet())

Seq('MTIALGRIPKEENDLFDTMDDWLRRDRFVFVGWSGLLLFPCAYFALGGVFTGTT...NAL', SingleLetterAlphabet())

Seq('MSGGSTGERPFSDIITSVRYWIIHSITIPSLFVSGWLFVSTGLAYDVFGTPRPN...KGL', SingleLetterAlphabet())

Seq('MLTLKLFVYTVVIFFVSLFIFGFLSNDPGRNPGRDE', SingleLetterAlphabet())

Seq('MSQRTGLGDILKPLNSEYGKVSPGWGTTWVMAVFIGLFFVFLLIILQIYNSSLL...LTK', SingleLetterAlphabet())

Seq('MATQTVENGARSGPRRTTVGDLLKPLNSEYGKVAPGWGTTPLMGVAMALFAVFL...SMN', SingleLetterAlphabet())

Seq('MLTLKLFVYTVVIFFVSLFIFGFLSNDPGRNPGRDE', SingleLetterAlphabet())

Seq('MPTLKLFVYAIVIFFVSPFVFGFLSNDPGRNPGRKD', SingleLetterAlphabet())

Seq('MADTTGRIPLWIIGTVTGITVIGLIGIFFYGSYSGLGSSL', SingleLetterAlphabet())

Seq('MANTTGRVPLWLIGTVA


Seq('MLAELNAEVLENTYISGNLYRLTLRIPDKILKQIEPGHFAMIKPSDTYDPMGRR...VIL', SingleLetterAlphabet())

Seq('MTIRSLPAALSPLSMAVQAVLLVSSLALAPAANAKPVTWEDIANDHLNTQNVLQ...AKR', SingleLetterAlphabet())

Seq('MNFPWDQLLVKGNWMITMAQIGAPFLVIGLIAVITYFKLWKYLYKEWFTSVDHK...HES', SingleLetterAlphabet())

Seq('MVSTKTQIAGFEFDNCLMNAAGVACMTIEELEEVKNSAAGTFVTKTATLDFRQG...YID', SingleLetterAlphabet())

Seq('MSQAVETRTRIKSERYESGVIPYAKMGYWDPEYSIKETDILALFRCTPQPGVDP...ANR', SingleLetterAlphabet())

Seq('DILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDRYKGRCYHIE...PEK', SingleLetterAlphabet())

Seq('VGFKAGVKDYKLTYYTPDYDTKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTG...DTL', SingleLetterAlphabet())

Seq('MSPQTETKASVGFKAGVKDYKLTYYTPEYETKDTDILAAFRVTPQLGVPPEEAG...DTL', SingleLetterAlphabet())

Seq('MSPKTETKASVGFKAGVKDYRLTYYTPEYQTKDTDILAAFRVTPQPGVPPEEAG...DYL', SingleLetterAlphabet())

Seq('MVEKFDTIYDYYVDKGYEPSKKRDIIAVFRVTPAEGYTIEQAAGAVAAESSTGT...TPV', SingleLetterAlphabet())

Seq('TPEYQTKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDR...GNE', S


Seq('MYTLARQLLFKLSPETSHDLSLDLIGAGGRLGLNGLLCKAPASLPVNVMGLQFP...AKR', SingleLetterAlphabet())

Seq('MYTLARQLLFKLSPETSHDLSLDLIGAGGRLGLNGMLCKQPASLPVSVMGLNFA...MPR', SingleLetterAlphabet())

Seq('MSYALLRPFLFNMDPEHAHEMTLSLLDKAHKARVLGLVYGQSMQPTDCMGLQFS...MEN', SingleLetterAlphabet())

Seq('MSYALLRPFLFNLDPEHAHELTLQLLEKAHKARALGFIYSQQSLPTECMGLQFS...MML', SingleLetterAlphabet())

Seq('MLNALYPLARPLLFSMDPEDAHHFTLNQLKRAHALGLSGCIGARVAPQPRTVMG...LAR', SingleLetterAlphabet())

Seq('MIDPFQRLARRGLFLFDPETAHGMSIAALKSGLVPACQIAPDPRLRQTIAGLTF...RKV', SingleLetterAlphabet())

Seq('MASIAAKSVSLRAATRRAAPVAAPADARFKVWQPVNNKQYETFSYLPPLTNQKI...RSV', SingleLetterAlphabet())

Seq('MASSMISSPAVTTVNRAGAGTVAPFTGLKSMAGFPTRKTNNDIASIASNGGRVQ...PSF', SingleLetterAlphabet())

Seq('MSFATTNKTIVPCATTKQIVRPRFLSNGTISKSRAMMVWEPFNNKFFETFSYLP...RQV', SingleLetterAlphabet())

Seq('MAEMQDYKQSLKYETFSYLPPMNAERIRAQIKYAIAQGWSPGIEHVEVKNSMNQ...RGN', SingleLetterAlphabet())

Seq('MASSMLSTAAVACINRASPAQASMVAPFTGLKSTSAFPTTRKTTTDITSIASNG...GAE', S

Seq('MVLVYLNSNLNLRSLKETNHDFLINSTERLTTNQILSTSVILTTMNDLTNWARL...TEE', SingleLetterAlphabet())

Seq('MAMTSAATGFILTANVPAAIGGGSSKSTTIVSFLPMRSFGSRLVVRAAEDTPPA...EVK', SingleLetterAlphabet())

Seq('MKALHFGAGNIGRGFIGKLLADSGIQVIFADVNDHVIEQLKTQRAYPVKIVGDR...QKN', SingleLetterAlphabet())

Seq('MRAVHFGAGNIGRGFIGSLLAASGYDVVFVDVNEQIVRLLKERGEYRVIIAGER...KEQ', SingleLetterAlphabet())

Seq('MKTLYSLRRFYPVETLFNGTLALAGRDQETTGFAWWAGNARLINLSGKLLGAHV...PLN', SingleLetterAlphabet())

Seq('MTSILREQRRDNVWDRFCEWVTSTDNRIYVGWFGVLMIPTLLTATICFIVAFIA...IHG', SingleLetterAlphabet())

Seq('MTAILERRDSENLWGRFCNWITTTENRLYIGWFGVLMIPTLLTATSVFIIAFIA...ISG', SingleLetterAlphabet())

Seq('MVQNTDNNTYDQLIRERNDYDDDAGSSGDVAVIGIGLRFPSGSLKESISKPYQL...EIK', SingleLetterAlphabet())

Seq('MKTLYSPRRFYPVETLFNGTLTLAGRDQETTGFAWWAGNARLINLSGKLLGAHV...PLN', SingleLetterAlphabet())

Seq('MTIAVGRAPVERGWFDVLDDWLKRDRFVFIGWSGLLLFPCAFMALGGWLTGTTF...NAL', SingleLetterAlphabet())

Seq('MIMAAGSTGERPFFEIITSIRYWIIHAVTLPAIFIAGFLFVYTGLAYDAFGTPR...RTK', Si


Seq('MLNADLKQQLQQLLELMEGDVEFVASLGSDDKSNELKELLNEIAEMSAHITITE...IRN', SingleLetterAlphabet())

Seq('MSAKIIDGKTIAQQVRNEVAAVVQQRLAAGKRAPGLAVVLVGENPASQIYVASK...SQN', SingleLetterAlphabet())

Seq('MAVTQTAQVCDLVIFGAKGDLARRKLLPSLYQLEKAGQLNPDTRIIGVGRADWD...EFE', SingleLetterAlphabet())

Seq('MLIDVKPLYSEIKNLIMDRMSKLKKVPKLVAVTYKPDPSTISYLKSQEKAAKRF...MNF', SingleLetterAlphabet())

Seq('MTARLIDGRQVAQQVHEEVRQAVERHTAQGRRAPGLAVVLVGENPASQVYVRNK...LGR', SingleLetterAlphabet())

Seq('MEHFQQVEPFDYVIFGATGDLTMRKLLPALYNRLRMGQIPDDACIIGAARTELD...ASE', SingleLetterAlphabet())

Seq('MNDATATRLDGKALAKQIETELSQRVAAVIKKTGRTPILATILVGDDPASATYV...AQA', SingleLetterAlphabet())

Seq('MTQSVCILGVTGSIGQSTLKVLAQHPDKYSIYAITAHSRIQELVEICKQFKPKR...IRG', SingleLetterAlphabet())

Seq('MGNRVSREDFEWVYTDQPHATRRQEILAKYPEIKSLMKPDSNLIWIVIMMVLTQ...VQE', SingleLetterAlphabet())

Seq('MPFIPHTEAEVRDMLAAIGAGSIDELFAEIPPDLRCGELKDLPEALSEMEVCKL...LEC', SingleLetterAlphabet())

Seq('MKKITILGSTGSIGKNTLKIISNNLDKFSVYSLVAYGNNINVLISQCIKYKPNY...KCY', S

Seq('MKITVIGAGHVGATAALRIAEKQLAREVVLIDIIEGIPQGKALDMYESGPVALF...SAI', SingleLetterAlphabet())

Seq('MGMKRKKISIIGAGFTGATTAFILAQKELGDIVLVDIPQLENPTKGKALDMLES...VLQ', SingleLetterAlphabet())

Seq('MHKICVIEGDGIGKEVVPATIQVLEATGLPFEFVYAEAGDEVYKRTGKALPEET...REE', SingleLetterAlphabet())

Seq('MKSMKIAVIPGDGIGVEVMEAALHILNTLDLDLEFIHADAGDACLKRTGTALPE...LED', SingleLetterAlphabet())

Seq('MTARNLFLLPGDGIGPEAMGEVRKIIAYMNEAMDAGFVTDEGLVGGCAYDAHGA...LSA', SingleLetterAlphabet())

Seq('MKAPVRVTVTGAAGQISYSLLFRIAAGDMLGKDQPVILQLLEITPALKALQGVA...LPA', SingleLetterAlphabet())

Seq('MNSPQNVSTKKVTVTGAAGQISYSLLWRIANGEVFGTETPVELKLLEIPQALGG...DLL', SingleLetterAlphabet())

Seq('MARSKIALIGAGQIGGTLAHLAALKELGDIVLFDIAEGTPQGKALDLAESGPVE...LAG', SingleLetterAlphabet())

Seq('MKVAVLGAAGGIGQALALLLKTQLPSGSELSLYDIAPVTPGVAVDLSHIPTAVK...VNK', SingleLetterAlphabet())

Seq('MKIAVLGAAGGIGQALALLLKLQLPAGSELSLYDIAPVTPGVAADVSHIPTAVK...VNN', SingleLetterAlphabet())

Seq('MDLRRRPPKPPVTNNNNSNGSFRSYQPRTSDDDHRRRATTIAPPPKASDALPLP...TTT', Si

Seq('MQAKSICFIGGGNMAQAIIFGLLKQGYPANQITVSDPNAAKRQCLAEKGVNTVE...KLF', SingleLetterAlphabet())

Seq('MSKNFQEPIFDVAQLAHVELLSPKLEESIVFFTKYLGMEVTARAGNSVYLRAYE...VKP', SingleLetterAlphabet())

Seq('MAQTEIKSYTLNFGPQHPAAHGVLRLVLELSGEVIERADPHIGLLHRGTEKLIE...IDR', SingleLetterAlphabet())

Seq('MALRFPRFSQGLAQDPTTRRIWFGIATAHDFESHDDITEERLYQNIFASHFGQL...KFG', SingleLetterAlphabet())

Seq('MSWISPELIEILLTILKAVVILLVVVTCGAFMSFGERRLLGLFQNRYGPNRVGW...QAQ', SingleLetterAlphabet())

Seq('MFVVLFVLTLILLYAVFVVWAERKVAAFIQDRLGPMEVGPYGMLQTIADLIKLL...MKI', SingleLetterAlphabet())

Seq('MSGLIQAAKSLLLLEFVSAFFLAMRQFFSPKPTINYPYEKGVVSQRFRGEHALR...PYR', SingleLetterAlphabet())

Seq('MATTATRFPQFSQDLASDPTTRRLWYGIATAHDFETHDGMTEERLYQKLFATHF...LSG', SingleLetterAlphabet())

Seq('MANKLRNYTINFGPQHPAAHGVLRMILELEGETIVRADPHIGLLHRGTEKLAET...VDR', SingleLetterAlphabet())

Seq('MKLDQVAKSLLLKEFVSGMILGMRYFFKPKATINYPFEMGHRGPRFRGEHALRR...PYR', SingleLetterAlphabet())

Seq('MDWQRLIVSYLVGFVLLNVLLGLMAYMTWFERRVLARMQHRVGPNRTGPFGLLQ...GAL', Si

Seq('MSVREKMLEILEGIDIRFKEPLHSYSYTKVGGEADYLVFPRNRFELARVVKFAN...CKR', SingleLetterAlphabet())

Seq('MGMSVNSDPAALVPEITLLVSAVTGLLAGAWTPRERQGTIHVLAALATVVGLVA...ASG', SingleLetterAlphabet())

Seq('MNNVPWLSVLWLVPLAGAVLIILLPPGRRRLAKWAGMVVSVLTLAVSIVVAAEF...PHP', SingleLetterAlphabet())

Seq('MLELPIISITIFLPLISVLYILLFINQSKKANKLNIYVAMVAMLSSVLTFILTI...IKF', SingleLetterAlphabet())

Seq('MIITGQQLVALSPLLILMSIAIVVMLFIAYNRNRYIVFLLTSFGFLSVFISLFI...IQW', SingleLetterAlphabet())

Seq('MTITPQNLIALLPLLIVGLTVVVVMLSIAWRRNHFLNATLSVIGLNAALVSLWF...PLM', SingleLetterAlphabet())

Seq('MNTLIAITGLGIFCLLFEILNFRKGIVPFTILGLLGVLALNFYEFGTTASYYNN...LLN', SingleLetterAlphabet())

Seq('MTITPQQLIALLPLLIVGLTVVVVMLSIAWRRNHFLNATLSVLGLNAALVSLWF...PLM', SingleLetterAlphabet())

Seq('MNNLNLIPVIPEIFLAAATCAILLIDLFLSDAKRYLTYVLSLATLVVCAVLSLS...LAS', SingleLetterAlphabet())

Seq('MIDKLSWIAVYPELVLLVMACLIALVDLGVKSPRRTLTYALTLLTLGAVAVMEA...LGV', SingleLetterAlphabet())

Seq('MTWNMLLALLPLLVLFGGSITVLFLSERRALLSAAMCAVVAAGFAWWPASLEAA...MLP', Si


Seq('MKTLYSLRRFYPVETLFNGTLTLAGRDQETTGFAWWAGNARLINLSGKLLGAHV...PLN', SingleLetterAlphabet())

Seq('MLTLKLFVYTVVIFFVSLFIFGFLSNDPGRNPGREE', SingleLetterAlphabet())

Seq('MAPWAEAEHSALNPLRAVWLTLTAAFLLTLLLQLLPPGLLPGCAIFQDLIRYGK...FLF', SingleLetterAlphabet())

Seq('MATEKTAQDTGIETALGTLLRPLNSEYGKVAPGWGTTVLMGTFMALFAVFLVII...SWQ', SingleLetterAlphabet())

Seq('MSNVGTTGRIPLWFIGVIAGIAALSIVGLFFYGAYSGLGSSL', SingleLetterAlphabet())

Seq('MSTLPILLATLPEAYLPFRPLVDVLPSIPVLFLLLAFVWQAAVSFR', SingleLetterAlphabet())

Seq('MSFENFAIITLKENVFALLPEAYAPFDPIVDDLPIIPVLFLLLAFVWQSAVKFR', SingleLetterAlphabet())

Seq('MLNIFSLICICLNSALYSSSLFFAKLPEAYAFLNPIVDLMPVIPLFFFLLAFVW...SFR', SingleLetterAlphabet())

Seq('MSKLKGPDGRAGDRLPNGMPAVSWERRWTEGALPLWLVATAGGTAVIFVLGIFF...NAG', SingleLetterAlphabet())

Seq('MLILFNTFAELPEAYKAFAPTVDVLPLIPLFFFLLVFVWQAAVGFK', SingleLetterAlphabet())

Seq('MTDTTGRIPLWLIGTLTGILVIGLIGIFFYGSYSGLGSSL', SingleLetterAlphabet())

Seq('MASTGRIPLWLVATIGGIAVLTVLGLFFYGSYSGLGSSL', SingleLetterAlphabet())



Seq('MSDRFLSAADFVLNWSRKYSLWPLFFGLSCCFVEEATAFTPRYDMARFGAEVLR...IDR', SingleLetterAlphabet())

Seq('MDKQLLPVLLLLLGVSGSWGQGEEPGGPSEVLPEEPTGEEVPKEDGILVLNHRT...EEL', SingleLetterAlphabet())

Seq('MMYLTYYFIEITIFLAILCTIFIISAKNPMVSILYMIALFVIAAMYLYLIGLGI...HKE', SingleLetterAlphabet())

Seq('MARAWGLLLAIGVILPTWLSSTKVSSLIERISDPKDLKKLLRTRNNVLVLYSES...EDL', SingleLetterAlphabet())

Seq('MHVNGKVALVTGAAQGIGRAFAEALLLKGAKVALVDWNLEAGVKCKAALDEQFE...KMQ', SingleLetterAlphabet())

Seq('MEQTYEYAWIIPFIPLPVPMLIGAGLILFPTATKSFRRMWAFQSVLLLSIVMIF...FST', SingleLetterAlphabet())

Seq('MEYTHQYSWIIPFIPFPVPMLIGVGLLLFPTATKYLRRMWAFPSILLLTIVMMF...TNI', SingleLetterAlphabet())

Seq('MSDPTIRPQSELTPGDFTAAENPFALFAEWLAEANKSEPNDPNAMALATVDPDG...LYP', SingleLetterAlphabet())

Seq('MDYTLTRIDPNGENDRYPLQSQEIVSDPLDAHVHRSVYMGKLEHALHDVVNWGR...DEI', SingleLetterAlphabet())

Seq('MDIAALREEYTRHGLSRDDLNVDPFKQFETWFKQACESQLLEPNAMSLATASDQ...LSP', SingleLetterAlphabet())

Seq('MTENNEFDVADLRREYIRGGLRRSDLTENPLELFERWLKQACEARLPDPTAMCV...LAP', Si


Seq('MLKEEDKIFTNLHGQQSHDLKSSKKRGDWDNTKALLDKGREFIIEEVKKSGLRG...SAI', SingleLetterAlphabet())

Seq('MTITPQQLQAMLPLLIVGLTVVVVMLSIAWRRDHFINATLTVIGLNLALLSLYF...PLM', SingleLetterAlphabet())

Seq('MKSSPNQLSIFSYLLGICSLDTLLSPVGRKMDAISPLVILRMDNHISSFFSEFQ...ATR', SingleLetterAlphabet())

Seq('MTITPQQLIAMLPLLIVGLTVVVVMLSIAWRRDHFINATLTVIGLNLALLSLYF...PLM', SingleLetterAlphabet())

Seq('MVQHHQAAANDDHQAIPLLTPYKQAGRPGSKLDLSHRVLLAPMTRCRSYGNVPQ...PSA', SingleLetterAlphabet())

Seq('MGNISPLTGTNGEIR', SingleLetterAlphabet())

Seq('NAPPNLTLR', SingleLetterAlphabet())

Seq('MECYEQSRQRAAFVVLLFIVMLGSQAQAQLRTDFYSDSCPSLLPTVRRVVQREV...RPN', SingleLetterAlphabet())

Seq('ANLFTSDQDLYTDSR', SingleLetterAlphabet())

Seq('MWVCLQLPVFLASVTLFEVAASDTIAQAASTTTISDAVSKVKIQVNKAFLDSRT...REN', SingleLetterAlphabet())

Seq('MRAALLTLAFTALAAAADDATTTVGFFGGGEWENSNDDDSLPLIPSYTSIGASV...VRT', SingleLetterAlphabet())

Seq('MNQNKQFSIVIAGGGSTYTAGIVMMLLENAERFPLRALKLYDIDEERQATIAEA...SEI', SingleLetterAlphabet())

Seq('MRTAVIIGTGMIGTSIGLA

Seq('MTATKLHKKVILVGDGAVGSSYAFALVNQGIAQELGIIEIPQLFEKAVGDALDL...SKN', SingleLetterAlphabet())

Seq('MLRLIDSISDNCTVKTALYGALLLGVYKLTTFALSLVSLVLDLWVLPPVNFAKY...KKD', SingleLetterAlphabet())

Seq('MIKKRNTTKISVIGAGSVGATTAYALMLSGVATEIVLVDVNKAKTEGEAMDLSH...SGL', SingleLetterAlphabet())

Seq('MEFLSKYTACLSNWGLNLEPGLQTVGAAVLLTTGTLFIASRVLTFVRVLLSLFV...KST', SingleLetterAlphabet())

Seq('MIETCLCWVGGIFIVTCIAYVVYVALEVHVFPGRSLTTYGANPKLRGAGSWALV...KSQ', SingleLetterAlphabet())

Seq('MMDLSSKDALSDVLKDTHAQSSDPSLWAIFGHQSSDNVHEGGNESVSVEQEIFD...NFS', SingleLetterAlphabet())

Seq('MMLQPPKVLLLYAHPESQDSVANRVLLQPVQQLEHVTVHDLYAHYPDFFIDIHH...RGL', SingleLetterAlphabet())

Seq('MMSQPAKVLLLYAHPESQDSVANRVLLKPATQLSNVTVHDLYAHYPDFFIDIPR...GGR', SingleLetterAlphabet())

Seq('MLQPPKVLLLYAHPESQDSVANRVLLQPVQQLEHVTVHDLYAHYPDFFIDIHHE...RGL', SingleLetterAlphabet())

Seq('MTVRIGVVGPGGMGRAHIDRITGELAGGAVVAVHDIDEVNARRVAEPIGAKVFG...LYA', SingleLetterAlphabet())

Seq('MGLPQLWLWLKRLVIFLQVALEVAVGKVLMTLFPGRVKQSILAMGQKTGMARNP...PQL', Si

Seq('MSAFVVAYFPHLCLAFGGLLVLCLSMARSVPAGFYPATAALFAALPGLWAVAGP...GGA', SingleLetterAlphabet())

Seq('MMEIGLNHYLVLSTILFAIGLVGVMRRKNLLMLFFATEILLNSVNISFAAISHY...MRG', SingleLetterAlphabet())

Seq('MIKIKKGLNLPIAGRPEQVIYDGPAITEVALLGEEYVGMRPSMKIKEGEAVKKG...KEG', SingleLetterAlphabet())

Seq('MTESVLDYMTRLGRAAREASRVIGRASTAQKNRALQATAAALDEARAELSAANA...GQA', SingleLetterAlphabet())

Seq('MAQMTMVQAINDALKTELKNDQDVLIFGEDVGVNGGVFRVTEGLQKEFGEDRVF...LEF', SingleLetterAlphabet())

Seq('MSSAQNVKKSILAPVLDNNPIALQVLGVCSALAVTTKLETAFVMTLAVTFVTAL...AKE', SingleLetterAlphabet())

Seq('MLKRFVNSIWEICQKDKFQRFTPVADAIDTFCYEPIHQPSSPPFIRDAVDVKRW...RRI', SingleLetterAlphabet())

Seq('MSGKTSYKDLLLAPIAKNNPIALQILGICSALAVTTKLETAFVMAIAVTLVTGL...QEK', SingleLetterAlphabet())

Seq('MPVEILMPALSPTMEEGTLSKWLKNEGDKVSSGDVIAEIETDKATMEVEAVDEG...CYK', SingleLetterAlphabet())

Seq('MAHYISLFVRAVFVENMALAFFLGMCTFLAVSKKVSTASGLGVAVTVVLGLAVP...VQL', SingleLetterAlphabet())

Seq('MATDIEQKVMEAKMASIVLASVDTQTKDNALEAMAKALDANRNKILEANKADLE...INE', Si

Seq('MSNQGEYPEDNRVGKHEPHDLSLTRRDLIKVSAATAATAVVYPHSTLAASVPAA...IKS', SingleLetterAlphabet())

Seq('MINLIITLIINTALSTIIVLIAFWLPQLYLYLEKSSPYECGFDPLGSARLPFSM...WSE', SingleLetterAlphabet())

Seq('MNNFPWLTAILLLPISAGSSILFIPQRGNKAVRWYTICICLLELLLMTYVFYYN...FNG', SingleLetterAlphabet())

Seq('MSSLPWLTIIVLLPICAGLLIPLFPNEGNKIIRWYTLGICIIEFLLITYIFCCH...SQY', SingleLetterAlphabet())

Seq('MLRQIIGQAKRHPSLIPLFIFIGAGGTGAALYVTRLALFNPDVSWDRKNNPEPW...PDF', SingleLetterAlphabet())

Seq('MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELLLTTYAFCYH...FYR', SingleLetterAlphabet())

Seq('MAGLLKKTTGLVGLAVCDTPHERLTILYTKTLDILKHFPKHAAYRKYTEQITNE...WPI', SingleLetterAlphabet())

Seq('MMLEHVLVLSAYLFSIGIYGLITSRNLVRALMCLELILNAVNLNFVTFSDFFDS...LNK', SingleLetterAlphabet())

Seq('MNYFPWLTLIVVLPISAGSLIFFLPHRGNKVIRWYTIFICMLELLLTTYVFCYH...FYR', SingleLetterAlphabet())

Seq('MLEFAISAPLDTGAELLSDQVSANFPWLSLSILFPIVGAFLVPFIPDEGEGKQV...TRK', SingleLetterAlphabet())

Seq('MGTVKITSRYGILLGFIALLCTIISAGIFFLTKDKIDAVIAAQQRELLLQVIPQ...QVK', Si

Seq('MTKDREIRQTVPAQPTSDGDGVKIQRIAGFQRPNFSPFLMLDELKADSQADYIG...ETY', SingleLetterAlphabet())

Seq('MDDTGAAPVVIFGGRSQIGGELARRLAAGATMVLAARNADQLADQAAALRAAGA...MPR', SingleLetterAlphabet())

Seq('MLLKIPNVLSKEQVETAKSKLLDADWADGNITAGYQSAKAKNNLQLPENSPIAI...AQT', SingleLetterAlphabet())

Seq('MGVGEKKESQKVAIITGASSGIGLECALMLLDQGYKVYALSRHATLCVALNHAL...RDA', SingleLetterAlphabet())

Seq('MTATVEIRRAADRAVTTTSWLKSRHSFSFGDHYDPDNTHHGLLLVNNDDQMEPA...SAT', SingleLetterAlphabet())

Seq('MSKLEGKVAVVTGASKGIGAAIAKALAKDGAAVVVNYASSKAGADAVVEAITAA...GVR', SingleLetterAlphabet())

Seq('MLLTLPDILSPQDLQAARQLLIDAPWADGRDSAGAQARQVKNNAQLPHDCEAAR...ADT', SingleLetterAlphabet())

Seq('MAKVAFIGLGVMGFPMAGHLVKQGHDVTVYNRTGAKATQWVEQYGGKKADTPKD...SRS', SingleLetterAlphabet())

Seq('MAMATQATLFSPSSLSSAKPIDTRLTTSFKQPSAVTFASKPASRHHSIRAAAAA...YDL', SingleLetterAlphabet())

Seq('MKVIEKIQEAAADGRTVFSFEYFPPKTEEGLDNLFERMDRMVAHGPNFCDITWG...KEI', SingleLetterAlphabet())

Seq('MNALHFGAGNIGRGFIGKLLADSGVFVTFADINQTQIDQINQNKQYGVKIVGDA...LYN', Si


Seq('MKVCCIGAGYVGGPTCAVMALKCPDIVITLVDKSSERIAQWNSDKLPIYEPGLD...PQL', SingleLetterAlphabet())

Seq('MAEELAGVRSGVVVSPHSDLSVDVLVLRISQQHNSSRRIDPRLSDGASEEISLV...PVL', SingleLetterAlphabet())

Seq('MSWLTPALVTIILTVVKAIVVLLAVVICGALLSWVERRLLGLWQDRYGPNRVGP...AAQ', SingleLetterAlphabet())

Seq('MGENLFVGMAKGLRTWLESQHFVPLVTDVFLMLLAVVGVLAFLFLNALFLIYYD...IRW', SingleLetterAlphabet())

Seq('MELRFPRFSQGLAQDPTTRRIWFGIATAHDFESHDDITEERLYQNIFASHFGQL...KFG', SingleLetterAlphabet())

Seq('MADAATEAAGLRNFTINFGPQHPAAHGVLRLVLELDGEVVERVDPHIGLLHRGT...VDR', SingleLetterAlphabet())

Seq('MIHPDPTLFGHDPWWLILAKAVGVFVFLVLTVLAAILIERKVLGRMQMRFGPNR...TDA', SingleLetterAlphabet())

Seq('MSASNYATERETAEGKVFTVTGGDWDVVLSGTDPINDERIVVNMGPQHPSTHGV...CDR', SingleLetterAlphabet())

Seq('MTATNNIVNLYQPFLDYALTKLKEELELQPYPIPSGFEHKVAMTGKGKKQQEVQ...NPA', SingleLetterAlphabet())

Seq('MTIIFVDNEEYNVDKSDNLLQACLSSGINIPYFCWHPVLGSIGSCRQCAVTIYK...ISI', SingleLetterAlphabet())

Seq('MPQTQKPDFPGNDLGDQFRFWPKHNEKIYERLENKHAWLEEKRAAAQGDGKPPA...ADK', S

Seq('MRVYYDRDADLNLIKAKKVAVIGYGSQGRAHALNLKDSGAQNLVIALKAGSPTV...SVN', SingleLetterAlphabet())

Seq('MRVYYDRDADLNLIKAKKVAVIGYGSQGRAHALNLKDSGAQNLVIALKAGSPTV...SVN', SingleLetterAlphabet())

Seq('MAKIYTDREASLEPLKGKTIAVIGYGIQGRAQALNLRDSGLEVIIGLRRGGKSW...FGP', SingleLetterAlphabet())

Seq('MSFFHANQREALNQSLAEVQGQINVSFEFFPPRTSEMEQTLWNSIDRLSSLKPK...PGL', SingleLetterAlphabet())

Seq('MKVFYDKDADLSLIKGKNVTIIGYGSQGHAHALNLNDSGVKVTVGLRKNGASWN...TKN', SingleLetterAlphabet())

Seq('MAVEMFYDDDADLSIIQGRKVAVIGYGSQGHAHSLSLRDSGVDVRIGLKEGSKS...ETA', SingleLetterAlphabet())

Seq('MAVEMFYDDDADLSIIQGRKVAVIGYGSQGHAHSLSLRDSGVDVRIGLKEGSKS...ETA', SingleLetterAlphabet())

Seq('MRVYYDRDADLNLIKGKKVVIIGYGSQGHAHALNLKDSGVKDVAIALRKGSTSA...SKN', SingleLetterAlphabet())

Seq('MANYFNTLNLRQQLAQLGKCRFMGRDEFADGASYLQGKKVVIVGCGAQGLNQGL...VAG', SingleLetterAlphabet())

Seq('MSVEVFYDDDADLGLIQGRTVAVIGYGSQGHAHALSLRDSGVAVVIGLPAGSKS...ETA', SingleLetterAlphabet())

Seq('MANYFNSLNLRQQLAQLSQCRFMDRSEFENGCDYIKGWNIVILGCGAQGLNQGL...AAQ', Si


Seq('MAKHRYLPMTEQDEKEMLDVIGVKSIDDLFQDIPEKIRFKRDYDLKPAKSEPAL...GAK', SingleLetterAlphabet())

Seq('MALRLDGKALAKELEQRLAQQVRSGCAKAGRPPGLAVLRVGDDPASAVYVANKE...LVP', SingleLetterAlphabet())

Seq('MAEQVALSRTQVCGILREELYQGDAFHQSDTHIFIIMGASGDLAKKKIYPTIWW...HKL', SingleLetterAlphabet())

Seq('MKHLAVLGSTGSIGRQTLEIVRRYPSEFKIISMASYGNNLRLFFQQLEEFAPLA...QEI', SingleLetterAlphabet())

Seq('MKKQIAILGSTGSIGTQALQVIEEHPDLYEVYALTANNRVDLLVEQARKFMPEA...LSS', SingleLetterAlphabet())

Seq('MAAKIIDGKTIAQQVRSEVAQKVQARVAAGLRAPGLAVVLVGSNPASQIYVASK...QGK', SingleLetterAlphabet())

Seq('MPFIVNTASERDEMLKAIGVSSFDDLLVAIPEEIRLKRALDLFPASDEPQVRKL...TGI', SingleLetterAlphabet())

Seq('MSPRPEKRRVSIFGATGSIGQNTIDLIARAPEAYDVVALTGARNIAQLAADARR...LAR', SingleLetterAlphabet())

Seq('MQNMVILGATGSIGASTLSVISANPFAYSVYGLVANASVDKMLALCVAHKPKVA...SIA', SingleLetterAlphabet())

Seq('MQKRLTLLGSTGSIGDSTLDVVARHPERFSVYALTAHRNGDKLVEQCLRFAPEV...RTV', SingleLetterAlphabet())

Seq('MTPIRHLTILGSTGSIGESTLDVVARHPDRFQAVALTADKNVEKMFEQCKRFYP...WNG', S

Seq('MTQPNPNKQSVELNRTSLYWGLLLIFVLAVLFSNYFFN', SingleLetterAlphabet())

Seq('MEIILGVVMFTLIVLALTVMILFAKSKLVNTGDITVEINEDEDKSFTAPAGDKL...FGG', SingleLetterAlphabet())

Seq('MTQSNPNEQNVELNRTSLYWGLLLIFVLAVLFSNYSFN', SingleLetterAlphabet())

Seq('MFMINILMLIIPILLAVAFLTLVERKVLGYMQLRKGPNVVGPYGLLQPIADAIK...PQT', SingleLetterAlphabet())

Seq('MIIIVIVVFLFYFSFLNLNLNPKKKRPPSPITLPVIGNLISLLNNQPQNILFNY...IKR', SingleLetterAlphabet())

Seq('MAPAEILNGKEISAQIRARLKNQVTQLKEQVPGFTPRLAILQVGNRDDSNLYIN...GLF', SingleLetterAlphabet())

Seq('MANQVIRCKAAVAWEAGKPLSIEEIEVAPPQAHEVRIKIIATAVCHTDAYTLSG...LKM', SingleLetterAlphabet())

Seq('MEVMQLSFSYPALFLFVFFLFMLVKQLRRPKNLPPGPNKLPIIGNLHQLATELP...CQD', SingleLetterAlphabet())

Seq('MEVFMFPTGTTVIISVLSVLLAVIPWYLLNKLWLKPKRFEKLLKAQGFQGEPYN...HKL', SingleLetterAlphabet())

Seq('MCDPIREDGSNKRGAVSKEKRPYIHREWSWADIIRALTVINVHFLCLLAPFNYK...LTR', SingleLetterAlphabet())

Seq('MSLWYIIVAFVFFSSMIIVRIIRKTKKNLPPGPPRLPIIGNLHQLGSKPHRSMF...QWT', SingleLetterAlphabet())

Seq('MPTYALLGATGATGSA

Seq('MKLSLLSTFAAVIIGALALPQGPGGGGSVTCPGGQSTSNSQCCVWFDVLDDLQT...PAP', SingleLetterAlphabet())

Seq('MGITGSLTPDQLDFFHSQGYLVIESFASEDEIRGLRKRMDELLNQFDCSVSSIF...VLP', SingleLetterAlphabet())

Seq('MDRNRASARLTVLLRHLGCRSAGTIIAHHTSGVGSLASFHPQQFQYTRENNVLS...INL', SingleLetterAlphabet())

Seq('MTTPSDLNIYQLIDTQNGRVTPRIYTDPDIYQLELERIFGRCWLFLAHESQIPK...VMK', SingleLetterAlphabet())

Seq('MFCVQCEQTIRTPVGNGCSYAQGMCGKTAETSDLQDLLVAVLQGLSAWALKARE...LAA', SingleLetterAlphabet())

Seq('MFCVQCEQTIRTPAGNGCSYAQGMCGKTAETSDLQDLLIAALQGLSAWAVKARE...LSA', SingleLetterAlphabet())

Seq('MLWVIGINHKVEVDIRQKFSLTKTKLQEKLISLKKLADEVIILSTCNRTEIYFF...FLK', SingleLetterAlphabet())

Seq('MIAVLGIKRNTPIEIREKLTIKVNKHDEYLDKLLKYLEGVVILATCNRTEIYFN...TKK', SingleLetterAlphabet())

Seq('MPQPAPLDFVVVGLNHQTAPVEVRERVAVRPEEEGALLGHLARHADEVLLLATC...ACD', SingleLetterAlphabet())

Seq('MYRTALRPSQSALRAIRSTTSPSALVSSGARRFASTTSAPKKKSTWKGAAVRWG...KQQ', SingleLetterAlphabet())

Seq('MEPKSKKVKQDIFNFPDGKDVPTTKEKAEAYVDALKAHPFYDNVHSVVDVYDSA...ALE', Si

Seq('MTFTASQLLALLPLLLTTGAMVALMLAIAWKRCDDTAFVVTIAGLNLALFSLPI...VLH', SingleLetterAlphabet())

Seq('MTAPAFTAKAFAAFAPFTLLGAVTILVMLLIAVRRDHRLVALSTIAGLLLCCIA...GHG', SingleLetterAlphabet())

Seq('MAAAASTLASLSATAAAAAGKRLLLSSPSRSLSLSLASRGRIAVMPHLRAGILS...KAL', SingleLetterAlphabet())

Seq('MSNFQFAIPTIDYIRILPEIVLAVFGIVVMMADALIPQNNSKKPLGYLSLIGVL...LVR', SingleLetterAlphabet())

Seq('MSIAPPPLSVLLELTHRCPLACPYCSNPIALAALREEMDTAGWRSLLQQAADMG...PLE', SingleLetterAlphabet())

Seq('MSSLLPPLGAVLPELLLALSAVVLVLIGAIQGEKSANLVNGLAIAALVAAGVLV...ALF', SingleLetterAlphabet())

Seq('MGMDIGLSHYLTVAAILFTLGTLGIFLNRKNVIVILMSVELILLAVNINLVSFS...MKG', SingleLetterAlphabet())

Seq('MIQYKVLIPEISLLILAIISFFYGFISRNYRTTYILSFLSILTAIILSVFNFGQ...LIF', SingleLetterAlphabet())

Seq('MSATAVHSLWTTASGVTSAAPGNSFTAPKIEYTQLMPVLIIVVAAVLGILVEAF...FVR', SingleLetterAlphabet())

Seq('MNFVQPVIQWASLAPILIVLGAAVLGVLIEAFAPRRVRRPVQVTLALLAAAGAF...FVV', SingleLetterAlphabet())

Seq('MMLFLPELALLITALVFFILTLKAPDSPKLHGVALISSVVVIITSLLCLGQTGE...TTL', Si


Seq('MQQALFGGGCFWCVEAVFLQIRGVEKVTSGYAGGHTTHPTYEQVCQGDTQHAEV...MKN', SingleLetterAlphabet())

Seq('MSFFDSYRKKMQMPSKEEVLPGRVQPIPTAAAHFVSGHPLKGPWPDGMKQVLFG...LAH', SingleLetterAlphabet())

Seq('MSVVQSSCAYCGVGCGVSVSSNKPNWTDVDAADLILVGDNKHPANYGHLCAKGE...DVA', SingleLetterAlphabet())

Seq('MTERAILAGGCFWGMQDLIRKRPGVLRTRVGYTGGDVPNATYRNHGSHAEAIEI...AGD', SingleLetterAlphabet())

Seq('MSLWGPSKTVMVSADKALPGRASEIRVPERHYVLDTLLKPPFPVGMEVACFGMG...RYG', SingleLetterAlphabet())

Seq('MQQALFGGGCFWCVEAVFLQIRGVEKVTSGYAGGHTTHPTYEQVCQGDTQHAEV...MKN', SingleLetterAlphabet())

Seq('MKLSNEYAWIIPLCPLIASCCTGSLSFFFPRVARGFHRLCALLNVFSLAISMFV...PFP', SingleLetterAlphabet())

Seq('MILKADFLKLSANNLISWARQGSFWPLTFGLACCALEMMHATVSRYDFDRFGVI...KVF', SingleLetterAlphabet())

Seq('MEGNEKAEQKNATSEESTDIFENPGEGLEKATFAAGCFWGIEEAFRQVKGVVAT...FKK', SingleLetterAlphabet())

Seq('MLSRGHCCKLKHSFNGLRNVALKRLVGSKAGYRMFPNLIEKRIRRIDDALADGE...KVT', SingleLetterAlphabet())

Seq('MAMFSWTSSEAINGMYIPSALLIFGTAIVKKEWLPYAVALAAILSGGKVFSNRQ...FCF', S


Seq('MEVNILAFIATALFILVPTAFLLIIYVKTVSQNN', SingleLetterAlphabet())

Seq('MSWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDSTSDQ...TLF', SingleLetterAlphabet())

Seq('MIWHVQNENFILDSTSIFMKAFHLLLFNGSFIFPECILIFGLILLLMFDSTSVQ...TLF', SingleLetterAlphabet())

Seq('MIWHVQNENFILDSTRIFMKAFHLLLFNGSFIFPECILIFGLILLLMIDLTSDQ...TLF', SingleLetterAlphabet())

Seq('MIWHVQNENFILDSTRIFMKAFHLLLFHGSFIFPECILIFGLILLLMIDLTSDQ...TLF', SingleLetterAlphabet())

Seq('MIWHVQNENFILDSTRIFMKAFHLLLFHGSFIFPECILIFGLILLLMIDSTSDQ...TLF', SingleLetterAlphabet())

Seq('MKTKLFINNAWIDSSDQQTFERIHPVSSDVVTESANATVTDAIKAAQAAEEAFK...YPF', SingleLetterAlphabet())

Seq('MNDINLFLAKLTYTSAKLTILPEIILILGLVAVVVIDLLSKGKNTFLLYKISMV...TFF', SingleLetterAlphabet())

Seq('MIWHVQNENFILDSTRIFMKAFHLLLFNGSFIFPECILIFGLILLLMIDSTSDQ...TLF', SingleLetterAlphabet())

Seq('MIWHVQNENFILDSTRIFMKAFHLLLFHGSFIFPECILIFGLILLLMIDSTSDQ...TFF', SingleLetterAlphabet())

Seq('MIWHVQNENFILDSTRIFMKAFHLLLFHGSFIFPECILIFGLILLLMIDSTSDQ...TLF', SingleLetterAlphabet())

Se

Seq('MTMHVEVGPDSTAPPHSSGIKVIIIGLGIGGLAAAIECHRKGHSVIAFDKAQEL...QDT', SingleLetterAlphabet())

Seq('MALRVTADVWLARPWQCLHRTRALGTTATLAPKTLQPFEAIPQYSRNKWLKMIQ...PVS', SingleLetterAlphabet())

Seq('MTHRIAFIGLGAIASDVAAGLLADAAQPCQLAALTRNAADLPPALAGRVALLDG...TIG', SingleLetterAlphabet())

Seq('MELMNLASKETSYWMIALPAGFGSQNLHDVSTLGYLFLAVVFLSIVTWALAGGG...EIF', SingleLetterAlphabet())

Seq('MAKVLYITAHPFNELVSNSMAAGKAFIETYQQQHPDDEVKHIDLFETYIPVIDK...TTF', SingleLetterAlphabet())

Seq('MRFLLGVLMLMISGSALATIDVLQFKDEAQEQQFRQLTEELRCPKCQNNSIADS...ESK', SingleLetterAlphabet())

Seq('LVPLFVFILFLHKCFFTTSNNNKKLPPSPRKLPIIGNLHQLGLHPHRSLHKLSK...LLP', SingleLetterAlphabet())

Seq('MEWEWSYVFFSAIIILPAFILFFSQKNTTKSSYKFPPGPPGLPIFGNMFELGTE...KKA', SingleLetterAlphabet())

Seq('MMLEGEFHTHMLPEFMIERRQDRCIRCRVCERQCGFNVHWYDEEMDMMREDEMK...AGE', SingleLetterAlphabet())

Seq('MASEGGSVRHVIVVGAGPGGLSAAINLAGQGFRVTVVEKDAVPGGRMKGLTLGA...RAG', SingleLetterAlphabet())

Seq('MFAEGQIQKVPILGKESIHIGYKMQDHIVSEIVANIKSSTYILVTDTNIEDLGY...LGN', Si

Seq('MKYTLTRVNISDDDQNYPREKKIQVSDPTKKYIQKNVFMGTLSKVLHNLVNWGR...DKN', SingleLetterAlphabet())

Seq('MSDNDELQQIAHLRREYTKGGLRRRDLPADPLTLFERWLSQACEAKLADPTAMV...LAP', SingleLetterAlphabet())

Seq('MSTLADLRKNYSLGSLDISDVDPNPFGQFDRWFKQAIDAQLPEPNTMTLATADA...LSP', SingleLetterAlphabet())

Seq('MGLLDAKISNHNVLVTSVDNVLNWARLSSLWPMGFGLACCAIEMMATNASNYDL...MAS', SingleLetterAlphabet())

Seq('METPSIDIQNIRAKYLNSHDPYLLESKLPTTSPFELFDIWFRNVASQSDLTFEE...LSP', SingleLetterAlphabet())

Seq('MPDPLPDLSSLRLAYTRAELRRADLDPDPLRQFQGWLGEALQAGLREPYALSLA...LMP', SingleLetterAlphabet())

Seq('MELDSGKPLSLLTLDEIDQHEGGIIQSFRTGHTTPYPDPADWLNSEEPPPGVFV...IKR', SingleLetterAlphabet())

Seq('MNDLAHMRTNYVRSVLSEETAGFDPLALFSLWFSEAKEEGELEPNAMSLSTVNT...LSP', SingleLetterAlphabet())

Seq('MITKDPIDLFNIWYQEVLKNYSKDPTAMVLATCSKDLKPSARVVLLKQHSDEGF...LYP', SingleLetterAlphabet())

Seq('MSIEGVLKEGFVTTTADKLINWTRTGSLWPMTFGLACCAVEMMHAGAARYDLDR...ARQ', SingleLetterAlphabet())

Seq('MAAQDCPVRPPVIAPATGGVDAPIVNVAPVQKILDVCRAMSLWPMTFGLACCAI...GGA', Si


Seq('MKILVDENMPYAEELFRRLGDVQAVPGRPIPRDALVDADALMVRSVTKVNEALL...QLL', SingleLetterAlphabet())

Seq('MHNRGMSSPSSPLSSSIADLRKSYERAELGEEASHADPLRQFDQWLQEAVAAQV...LAP', SingleLetterAlphabet())

Seq('MGTVNNAIRDSVLFTTADSIINWSRSSALWPETFGIACCAIEMISAGCARYDLD...IAS', SingleLetterAlphabet())

Seq('MDLHNIREDYSKQELSQANCHADPIQQFEQWLEEAITAKANEPTAMNVATVLDG...LSP', SingleLetterAlphabet())

Seq('MKILIDENMPYAEALFSQLGEVTMKSGRTLTADDLVDVDALMIRSVTKVNESLI...PTL', SingleLetterAlphabet())

Seq('MAGVNDAIRDSVLFTTADSIISWSRRSALWPETFGIACCAIEMISAGCARYDLD...IAS', SingleLetterAlphabet())

Seq('MKILVDENMPYAEELFRRLGDVQAVPGRPIPRDALVDADALMVRSVTKVNEALL...QLL', SingleLetterAlphabet())

Seq('MGLSPGSAKLGSPAQPLIAPAATGILDPRTGRPVGADDRFFVEVNNELSDKGFF...IER', SingleLetterAlphabet())

Seq('MANHPLTLEKDGFIVTTLDAAMAAAQKNSLWYMTFGLACCAVEMMHAAGARYDM...ARP', SingleLetterAlphabet())

Seq('MASELDGLPVIATRREEAEGFLQGLVSKSLGWARKYSLFTYPFVTACCGMEYMT...AGR', SingleLetterAlphabet())

Seq('MSWTIGRPGAEAPEDRMAASHLFTRLDDLVAWSRKHSLWPFNFGLSCCYVEMAT...EST', S

Seq('MAIANKNIIFVAGLGGIGFDTSREIVKSGPKNLVILDRIENPAAIAELKALNPK...SGI', SingleLetterAlphabet())

Seq('MTQEQANQSETKPAFDFKPFAPGYAEDPFPAIERLREATPIFYWDEGRSWVLTR...KAG', SingleLetterAlphabet())

Seq('MAEPNQHYEVIIAGGGIAGVTLALMFEKLDISYFLLEGRDTLESDRGAGIGLQP...VGS', SingleLetterAlphabet())

Seq('MTKLLMINAHPHTTVPSASLTVAASFKTAYQQTHPHDEITTRDLYQDGVPALND...GEF', SingleLetterAlphabet())

Seq('MTLAHIFQTIWDFGTKDSVLQPIWDYVRLNHSETLRSPLFPVVLTVSSYFVLVL...PRE', SingleLetterAlphabet())

Seq('MSTTLTNKNVIFVAGLGGIGLDTSKELVKRDLKNLVILDRIDNPAAIAELKAIN...SGI', SingleLetterAlphabet())

Seq('MILVLASLFAVLILNVLLWRWLKASACKAQRLPPGPPRLPILGNLLQLGPLPHR...LYT', SingleLetterAlphabet())

Seq('MNYPSVSEQKHRVFIIYSAYLRVQFRESARLAVSVRNKNYNLVRADLHNILPQK...LGR', SingleLetterAlphabet())

Seq('MVIANSNIIFVAGLGGIGLDTSREIVKSGPKNLVLLDRIDNPAAIAELSALNPK...SGI', SingleLetterAlphabet())

Seq('MTTFYLSLIISLFFLIITLKVFFNTSRKFKNLPPGPQCLPIIGNLHQLKQPLHH...DVF', SingleLetterAlphabet())

Seq('MAKVLYITAHPFNELVSNSMAAGKAFIETYQQQHPDDEVKHIDLFETYIPVIDK...TTF', Si


Seq('MLKLSSSQPDFAERLKALLAFETAQDPAVDAAVASICADVHHRGDAALVEHTNR...TPR', SingleLetterAlphabet())

Seq('MKGLEEYVREILEDIRRRGLEALREYSERFDNYSGPFRVSEGEFEEAEELVPEE...VRR', SingleLetterAlphabet())

Seq('MVRAKRKLDHIEYALSTGQSRTHGFHDIEFVHQSLPNSSYETITCETKIGELSL...SRR', SingleLetterAlphabet())

Seq('MTNRKDDHIKYALKYQSPYNAFDDIELIHHSLPSYDLSDIDLSTHFAGQDFDFP...QVN', SingleLetterAlphabet())

Seq('MMKAEELNKGFVNEIIEAGTPVPGEKEVASLKSCYQCGTCTGSCPSGRRTAYRT...LKQ', SingleLetterAlphabet())

Seq('MLIAVAGMSHRSAPVEARERVAFAPCAARSFLRRLREEDGVPEAVLLSTCNRTE...REP', SingleLetterAlphabet())

Seq('MLIAVIGVNHRTAPLEVREKLAFTEWGMKDSLKRLMSYPGIDGCAIVSTCNRTE...SRE', SingleLetterAlphabet())

Seq('MSDNKREQRKNEHVEIAMAQGDATISDFDEIRFVHHSIPSVDVDDIDLTSQLKD...DIR', SingleLetterAlphabet())

Seq('MHILVVSVNYRTAPVEFREKLTFQAAELERAMTTLQNQKSVLENVIVSTCNRTE...PSL', SingleLetterAlphabet())

Seq('MNIIVVGLSHKTAAVEIREKVAFSPTQMEKPLHALVSIPDITEGVIVSTCNRVE...LEE', SingleLetterAlphabet())

Seq('MNIISVGVNHKTAPIEIRERIALSEVQNKEFVTDLVSSGLASEAMVVSTCNRTE...SLQ', S

Seq('MKLLLSKPRGFCAGVERAIETVEKALLLWGSPIYVKHEIVHNRHVVQGLKTKGA...SVD', SingleLetterAlphabet())

Seq('MTVGYDEEIPFEQVRAHNKPDDAWCAIHGHVYDVTKFASVHPGGDIILLAAGKE...RAA', SingleLetterAlphabet())

Seq('MDSPRFPEETVLITGGGGYFGFRLGCALNQKGARVILFDITQPAQNLPEGIKFV...LSI', SingleLetterAlphabet())

Seq('MARRAAAGLLRRHLGPLAAGETLQARGMYPKQYGAANHAFSRFYSIQGQQRSLY...SVL', SingleLetterAlphabet())

Seq('MRVIVLGSGVIGVASAYYLARQGAEVTVLDRQSGPAEETSFGNAGQISPGYSTP...HAA', SingleLetterAlphabet())

Seq('MRVVILGSGVVGVASAWYLNQAGHEVTVIDREPGAALETSAANAGQISPGYAAP...AHS', SingleLetterAlphabet())

Seq('MRVVILGSGVVGVASAWYLNQAGHEVTVIDREPGAALETSAANAGQISPGYAAP...AHS', SingleLetterAlphabet())

Seq('MRVVVLGSGVVGVTSAWYLCQAGHEVTVIDREPGSALETSAANAGQISPGYAAP...AHN', SingleLetterAlphabet())

Seq('MRVVILGSGVVGVASAWYLNQAGHEVTVIDREPGAALETSAANAGQISPGYAAP...AHS', SingleLetterAlphabet())

Seq('MRLVLSGASSFTSNLFCSSQQVNGRGKELKNPISLNHNKDLDFLLKKLAPPLTA...STD', SingleLetterAlphabet())

Seq('MKVIVLGAGIVGVTSAYQLAKAGHDVTVVDRQPGPALETSFANAGEVSFGYCSP...RYG', Si


Seq('MAAKIIDGKTIAQQVRSEVAQKVQARIAAGLRAPGLAVVLVGSNPASQIYVASK...QGE', SingleLetterAlphabet())

Seq('MAAKIIDGKTIAQQVRSEVAQKVQARIAAGLRAPGLAVVLVGSNPASQIYVASK...QGE', SingleLetterAlphabet())

Seq('MGVILEGKPVAEKLEKEIKEKIEIYKTKGIIPNLCIVKVGENEEAEAYAKSIER...ARN', SingleLetterAlphabet())

Seq('MTVKAGINGFGRIGRIVLRASLLNPEVEVVAINDPFIDLEYMVYMFKYDSTHGR...GRL', SingleLetterAlphabet())

Seq('MGVILEGKPVAEKLEKEIKEKIQFYKSQGVIPTLCIVKVGKNAEAEAYAKSIEK...VEA', SingleLetterAlphabet())

Seq('MIKVAINGYGTIGKRVADAVAAQPDMEVIGVSKTSVSAEAYIAKERGYPLYIAD...AIG', SingleLetterAlphabet())

Seq('MRVILGPMEGVLDHLMRDMLTQINDYDFCVTEFVRVVNLLLPDHVFYRLCPELQ...LPV', SingleLetterAlphabet())

Seq('MTKILDGKKIASILREELKQDIITLKERGIEPKLAVVLVGEDPASVAYAKFLQK...TQE', SingleLetterAlphabet())

Seq('MKENFWSELPRPFFILAPMEDVTDIVFRHVVSEAARPDVFFTEFTNTESFCHPE...IEL', SingleLetterAlphabet())

Seq('MTLIDGKKLSIDLKERLTDQLREYKEQTGIVPKLVAIIVGDDPASKTYVGSKEK...LNR', SingleLetterAlphabet())

Seq('MKIKVGINGYGTIGKRVAYAVTKQDDMELIGVTKTKPDFEAYRAKELGIPVYAA...ILK', S


Seq('MDIIFYHPTFDTQWWIEALRKAIPQARVRAWKSGDNDSADYALVWHPPVEMLAG...RGY', SingleLetterAlphabet())

Seq('MSRKEELYDITIIGGGPTGLFAAFYGGMRQAKVKIIESMPQLGGQLAALYPEKY...SLF', SingleLetterAlphabet())

Seq('MKFDCAIVGGGLAGLLCGLALNQYGLRSVIISRGQSALHFSSASLDLLSALPNG...DRP', SingleLetterAlphabet())

Seq('MLNYDIAVIGGGIAGYCAALNAIEAGKKTVLISQGQSALHFSSGSIDVMAKTPS...CVL', SingleLetterAlphabet())

Seq('MKFDTVIMGGGLAGLLCGLQLQQHGLRCAIVTRGQSALHFSSGSLDLLSALPDG...EQQ', SingleLetterAlphabet())

Seq('MDIIFYHPTFDTQWWIEALRKAIPQARVRAWKSGDNDSADYALVWHPPVEMLAG...RGY', SingleLetterAlphabet())

Seq('MKPSVILYKALPDDLLQRLQEHFTVHQVANLSPQTVEQNAAIFAEAEGLLGSNE...VAD', SingleLetterAlphabet())

Seq('MDIIFYHPTFDTQWWIEALRKAIPQARVRAWKSGDNDSADYALVWHPPVEMLAG...RGY', SingleLetterAlphabet())

Seq('MKILLIGYGAMNQRVARLAEEKGHEIVGVIENTPKATTPYQQYQHIADVKDADV...DNL', SingleLetterAlphabet())

Seq('MKTLTTDVAIIGAGPVGLFQIFELGLQGLSTVVIDSLPEIGGQCSELYPDKPIY...MLS', SingleLetterAlphabet())

Seq('MMNHQFSSLERDRMLTDMTKKTYDLFIIGGGITGAGTALDAASRGMKVALSEMQ...LEQ', S

Seq('MIESQRHSYHLVDPSPWPISGSLGALATTVGGVMYMHSFQGGATLLSLGLIFLL...GGT', SingleLetterAlphabet())

Seq('MIYPIPNSETSTVHFKDVYDNYIGGQWMKPHSGEYFSNTSPVNGLVFCRVARSS...GLF', SingleLetterAlphabet())

Seq('MSPITREERLERRIQDLYANDPQFAAAKPATAITAAIERPGLPLPQIIETVMTG...GLL', SingleLetterAlphabet())

Seq('MSAHANHPFHLVDYSPWPLTGAIGAMTTVSGLVQWFHQYTMTLFILGNIITILT...WGS', SingleLetterAlphabet())

Seq('MSSLTMQVTKRLETFLQGTKKLYIDGKFVPSASGATFDTPNPATGETLMTLYEA...LED', SingleLetterAlphabet())

Seq('MKLFGLIGEKLGHSLSPEIHNKVFKDNNIDGLYNLFSVKKDFENNIVESLKCLG...CRF', SingleLetterAlphabet())

Seq('MHVFGLIGYPLNYTLSPQIHNYVFKRLRIDAAYVPLRVASKRLLHFIEFSRDAL...NPS', SingleLetterAlphabet())

Seq('MLEEINYDTKLFGLIGKNIKYTLSPYIHNFSFRTLGINAVYLVFDLDEMKFKRS...LVW', SingleLetterAlphabet())

Seq('MKFAVIGNPISHSLSPVMHRANFNSLGLDDTYEALNIPIEDFHLIKEIISKKEL...KGE', SingleLetterAlphabet())

Seq('MTMAALDVFNVPYSVPLLGSTVVILIGFIAIKALRVGSRPKGLPPGPPTEFIWG...VGK', SingleLetterAlphabet())

Seq('MATATTQRPLKGPAKRMSTWTMTREAITIGFDAGDGFLGRLRGSDITRFRCAGR...RRR', Si

Seq('MAVQMEYEKDVKVPALDGKKIAVIGYGSQGHAHSQNLRDTGHDVIIGVRPGKSF...IYN', SingleLetterAlphabet())

Seq('MATPEKWRKYLEDYIPCSFPTFLDNEGADSKSFASVLVRLEHPYGRLMSFSNNC...SRL', SingleLetterAlphabet())

Seq('MKSTSKIYTDKDSNLDVIKGKRIAVLGYGSQGRAWAQNLRDSGLNVVVGLEREG...PKS', SingleLetterAlphabet())

Seq('MTVQMEYEKDVKVAALDGKKIAVIGYGSQGHAHAQNLRDSGRDVIIGVRPGKSF...IYN', SingleLetterAlphabet())

Seq('MKSTSKIYTDKDSNLDVIKGKRIAVLGYGSQGRAWAQNLRDSGLNVVVGLEREG...PKS', SingleLetterAlphabet())

Seq('MKSTSKIYTDKDSNLDVIKGKRIAVLGYGSQGRAWAQNLRDSGLNVVVGLEREG...KKS', SingleLetterAlphabet())

Seq('MARLYYDTDANLDLLDGKTVAIIGYGSQGHAHALNLRDSGVNVLVGLYPGSPSW...LKK', SingleLetterAlphabet())

Seq('MATLYYDTDADLGLLSGKTVAIIGYGSQGHAHALNLKDSGVDVVVGLYEGSRSA...KAA', SingleLetterAlphabet())

Seq('MARMYYDADANLDLLNGKTVAIIGYGSQGHAHALNLKDSGINVVIGLYAGSKST...KES', SingleLetterAlphabet())

Seq('MAQLFYDSDADLGLLNGKTVAIIGYGSQGHAHALNLKDSGVNVVVGLYDGSRSA...KDA', SingleLetterAlphabet())

Seq('MAAQMYYDDDADLNIIQGRTVAVIGYGSQGHAHALSLRDSGVDVRVGLPESSKS...LKN', Si

Seq('MAITLRQSDADFEQRFAAFLTTKREVSEDVDAGVRQIIARVRAEGDAALIDYTQ...LNM', SingleLetterAlphabet())

Seq('MTKGLKIVTIGGGSSYTPELVEGFIKRYDELPVRELWLVDIPEGEEKLNIVGTL...IEA', SingleLetterAlphabet())

Seq('MFCVQCEQTIRTPAGNGCSYAQGMCGKTAETSDLQDLLIAALQGLSAWAVKARE...LNA', SingleLetterAlphabet())

Seq('MFCVQCEQTIRTPAGNGCSYAQGMCGKTAETSDLQDLLIAALQGLSAWAVKARE...LSA', SingleLetterAlphabet())

Seq('MRGFPREVLESVWKIVDDVQSGGLKAALEYSKRLDGVAPEPHLVTPRQGGDPEV...ALR', SingleLetterAlphabet())

Seq('MKIKKAAVIGAGVMGAAIAAQLANAGIPVLLLDIVLPDKPDRNFLAKAGVERAL...APP', SingleLetterAlphabet())

Seq('MIIVTNTAKITKGNGHKLIDRFNKVGQVETMPGFLGLEVLLTQNTVDYDEVTIS...AAQ', SingleLetterAlphabet())

Seq('MMNMKEDKKNTMDMKNMKHHDERKKLNSSQGKNEIIFPEVAESKKDNNGYKNYT...VTN', SingleLetterAlphabet())

Seq('MTLWVLGLNHQTAPMELRERASFVGDALPRALDSLRNLPNVNEAALLSTCNRTE...PAS', SingleLetterAlphabet())

Seq('MHILVVSVNYRTAPVEFREKLTFQAAELEQAMTTLQNQKSVLENVIVSTCNRTE...PSL', SingleLetterAlphabet())

Seq('MSMLSRRLFSTSRLAAFSKIKVKQPVVELDGDEMTRIIWDKIKKKLILPYLDVD...SIE', Si


Seq('MAAAVRSVKGLVAVVTGGASGPWLATAKRLVGQGATAVLLDVPDSEGESQAKKL...MQP', SingleLetterAlphabet())

Seq('MDTQAFRRSLHHSDRYNRRGFDSPTKRAQALEEAYQSDLISSIRDNGFTYTKGR...IAS', SingleLetterAlphabet())

Seq('MYDFVIIGGGIIGMSTAMQLIDVYPDARIALLEKESAPACHQTGHNSGVIHAGV...FNQ', SingleLetterAlphabet())

Seq('HGSTKWCPSPDAAQKYACCHHGMATYVLGSENEMPWKFDRAYKSDITHVMDEMK...NNH', SingleLetterAlphabet())

Seq('MALINRLDSRDPGFKTALSQLLAFEAEQDESIDQAAAGILADVRRRGDAALLEY...DQP', SingleLetterAlphabet())

Seq('MTTPSDLNIYQLIDTQNGRVTPRIYTDPDIYQLELERIFGRCWLFLAHESQIPK...VMK', SingleLetterAlphabet())

Seq('MAFKQLLSAVTLALAASAASVTRRATCPDGTQLMNAECCALLAVRDDLQNNMFN...PRD', SingleLetterAlphabet())

Seq('MSMFCYQCQEAAGCKGCTVRGVCGKTEDLAKKQDLLIYTLKVVSLYNVEARKLN...ENL', SingleLetterAlphabet())

Seq('MSKAEMVANKIKERFPNAEVVVKTNKWGRERVWVRISREEYKELMKFIRELDPE...EKK', SingleLetterAlphabet())

Seq('MHILVVSVNYRTAPVEFREKLTFQAAELEQAMNTLQNQKSVLENVIVSTCNRTE...PSL', SingleLetterAlphabet())

Seq('MALVTLGINHRTAPVEIRERVAFTPERMAEAFSELRAASGASEAAILSTCNRTE...EKL', S

Seq('MSQLLQQLGTDNEFIRRHNGPASSEHQHMLNTIGAETLQQLIEETVPSSIRLPQ...YEE', SingleLetterAlphabet())

Seq('MAGSSLRQVAVFGATGSIGASALDVIARHPERLRASLLSAGSKVDELLALCATH...LHA', SingleLetterAlphabet())

Seq('MIIKPRVRGFICVTTHPVGCEANVKEQIDYVTSHGPIANGPKKVLVIGASTGYG...TTV', SingleLetterAlphabet())

Seq('MGFLQGKKILITGMISERSIAYGIAKACREQGAELAFTYVVDKLEERVRKMAAE...TEG', SingleLetterAlphabet())

Seq('MKLEHPDRLMNRTPLSLAALETHDAFAERHIGPDAASQQAMLDTLGFATRAALI...DYA', SingleLetterAlphabet())

Seq('MTKQTLTQLEQHDLFLRRHIGPDSSQQQEMLNYVGAESLDDLTAQIVPESIRLS...DYE', SingleLetterAlphabet())

Seq('MKLEHPDRLMNRTPLSLAALETHDAFAERHIGPDAASQQAMLDTLGFASRAALI...DYA', SingleLetterAlphabet())

Seq('MSDHSTFADRHIGLDSQAVATMLAVIGVDSLDDLAVKAVPAGILDTLTDTGAAP...AFA', SingleLetterAlphabet())

Seq('MIAVRHLTILGSTGSIGVSTLDVVARHPDRFRVVALTANKSVQKILEQCRRFEP...CLA', SingleLetterAlphabet())

Seq('MSWHQAVWNEPLIFEYKGKGRIGFKIPEEEELKKEISINIPEKLRRKEIDLPEL...SLK', SingleLetterAlphabet())

Seq('MTQNLSQLEHNDAFIQRHIGSSVEQQQQMLAAVGASSLSTLIQQIVPADIQLPG...DYE', Si

Seq('MIIYLISLLPIIVATLMLYQRWWRSNIPPGPKPKFLLGNLHQMKPLWTHSFSEW...PLD', SingleLetterAlphabet())

Seq('MKAVIFAYHDMGCQGVQAVLDAGYEIAAIFTHADNPAENTFFGSVSRQAAELGI...RAS', SingleLetterAlphabet())

Seq('MLDIKGYLVLFFLWFISTILIRSIFKKPQRLRLPPGPPISIPLLGHAPYLRSLL...FAA', SingleLetterAlphabet())

Seq('MKAVIFAYHDMGCQGVQAVLDAGYEIAAIFTHADNPAENTFFGSVSRLAAGLGI...RAS', SingleLetterAlphabet())

Seq('MESRKGIKEVSMNFLDQLDAIIQNKHMLEHPFYMKWSKGELTKEQLQAYAKDYY...HKA', SingleLetterAlphabet())

Seq('MDFNLNDEQELFVAGIRELMASENWEAYFAECDRDSVYPERFVKALADMGIDSL...QYR', SingleLetterAlphabet())

Seq('MVVKKIALFGATGNTGLTTLAQAVQAGYEVTVLVRDPSRLPSEGPQPAHVVVGD...VYE', SingleLetterAlphabet())

Seq('MSLINKEILPFTAQAFDPKKDQFKEVTQEDLKGSWSVVCFYPADFSFVCPTELE...GKI', SingleLetterAlphabet())

Seq('MTVRDWANRGQTYVNARAPNLLGRFRSTDDEDENNPSTELATDTTSAYGSTAAS...IAN', SingleLetterAlphabet())

Seq('MKLAVYGKGGIGKSTTSCNISIALAKRGKKVLQIGCDPKSDSTFTLTGFLIPTI...DLV', SingleLetterAlphabet())

Seq('MSSELVKRHGHTHYANIPYGYYVLIVSFFYLVFLGVLRIILKPRAAGFNSSKRS...YAL', Si

Seq('MTKQTLTELEQHELFLTRHIGPDADEQQAMLNYVGAESLEDLTAQIVPESIRLG...DYE', SingleLetterAlphabet())

Seq('MSDHSTFADRHIGLDSQAVATMLAVIGVDSLDDLAVKAVPAGILDTLTDTGAAP...AFA', SingleLetterAlphabet())

Seq('MIIKPRVRGFICVTTHPVGCEANVKEQIDYVTSHGPIANGPKKVLVIGASTGYG...TTA', SingleLetterAlphabet())

Seq('MQNMVILGATGSIGASTLSVISANPTAYRVYALVANASVDKMLTLCLAHRPQVA...KFA', SingleLetterAlphabet())

Seq('MTQTLSQLENSGAFIERHIGPDAAQQQEMLNAVGAQSLNALTGQIVPKDIQLAT...EYQ', SingleLetterAlphabet())

Seq('MVVLGSTGSIGVNTLEIARRYDIEIEGLVAGNNYEVLNAQIKEFKPKYVVVKDL...NFV', SingleLetterAlphabet())

Seq('MGATALRKVSIFGATGSIGQNTIDLIARDPDAYDVVALSGGANIAQLAADARRL...RAG', SingleLetterAlphabet())

Seq('MPLNLPLSALEQHDEFIGRHIGPCSTEMATMLTAIGADSLEQLIDQTVPAAIRL...YAD', SingleLetterAlphabet())

Seq('MPVSRSPIEDLANGFARRHIGPSPQEIAAMLRAVGAPSLDALMGETLPAAIRQA...AAE', SingleLetterAlphabet())

Seq('MFLQNKNVVVMGVANKKSIAWGCAKALKDQGANVIYTYQNERMKKQVVKLADEN...HLT', SingleLetterAlphabet())

Seq('MTRTSPTLPVIILGAGMVGLTLAQALKKAGIPYEVYERDSAADTEKGRGWALTV...IAV', Si

Seq('MTQTLSQLENRGAFIERHIGPDAAQQQEMLNAVGAESLNALTGQIVPKDIQLAT...DYQ', SingleLetterAlphabet())

Seq('MSDHSTFADRHIGLDSQAVATMLAVIGVDSLDDLAVKAVPAGILDTLTDTGAAP...AFA', SingleLetterAlphabet())

Seq('MQYLTILGSTGSIGRSTLDIVRRHPDRFAVTALTANQGVAQMMSDCLEFHPSYA...LAV', SingleLetterAlphabet())

Seq('MTAHRTPLSELEQAMPFEQRHIGPDHEARAKMLAQVGYGSLDELTAAAVPDVIK...YED', SingleLetterAlphabet())

Seq('MTQTLSQLENSGAFIERHIGPDAAQQQEMLNAVGAQSLNALTGQIVPKDIQLAT...EYQ', SingleLetterAlphabet())

Seq('MSGSSLRRVAVFGATGSIGASALDVIARHPERLRASVLSAGSKVDALLALCVLH...LHA', SingleLetterAlphabet())

Seq('MKKISVLGSTGSIGKKTVDLLLKRKEEYQVEALSTCSNFALLACQAKLLNARYV...AYS', SingleLetterAlphabet())

Seq('MTQTLSQLENSGAFIERHIGPDAAQQQEMLNAVGAQSLNALTGQIVPKDIQLAT...EYQ', SingleLetterAlphabet())

Seq('MQQEEIIEGYYGASKGLKKSGIYAKLDFLQSATGLILALFMIAHMFLVSSILIS...HKE', SingleLetterAlphabet())

Seq('MTHPIIHDLENRYTSKKYDPSKKVSQEDLAVLLEALRLSASSINSQPWKFIVIE...TIL', SingleLetterAlphabet())

Seq('MTLSIIVAHDKQRVIGYQNQLPWHLPNDLKHVKQLTTGNTLVMGRKTFNSIGKP...KGK', Si

Seq('MSWGRLSRLLKPALLCGALAVPGLAGTMCASRDDWRCARSMHEFAAKDIDGHMV...CYL', SingleLetterAlphabet())

Seq('MTLSNQLSDLRLFRQYAYIDGKWTHGDAGREEAVFDPATGEAIGHIPVLEVEQI...SGS', SingleLetterAlphabet())

Seq('MKMMNGEDATDQMIKESFFITHGNPILTVEDTHPLRPFFETWREKIFSKKPKAI...TSA', SingleLetterAlphabet())

Seq('MLRECDYSQALLEQVNQAISDKTPLVIQGSNSKAFLGRPVTGQTLDVRCHRGIV...AEL', SingleLetterAlphabet())

Seq('MKAIVVKPPKPGVEVRDLSQVIRHGSGTVKVRILENGICGSDREIVKGELTTAR...KWS', SingleLetterAlphabet())

Seq('MSQNSNPAVVLEKVGDIAIEQRPIPTIKDPHYVKLAIKATGICGSDIHYYRSGG...GPE', SingleLetterAlphabet())

Seq('MDRIIQSPGKYIQGADVINRLGEYLKPLAERWLVVGDKFVLGFAQSTVEKSFKD...EWE', SingleLetterAlphabet())

Seq('MSILYEERLDGALPDVDRTSVLMALREHVPGLEILHTDEEIIPYECDGLSAYRT...ERF', SingleLetterAlphabet())

Seq('MVTEQEVEAIGKTLVDSTQPLQARFRALFTLRGLGGPDAISWISRGFEDSSALL...PPP', SingleLetterAlphabet())

Seq('MEFSFSSPALYIVYFLLFFVVRQLLKPKSKKKLPPGPRTLPLIGNLHQLSGPLP...VND', SingleLetterAlphabet())

Seq('MGFSPSSSWFLHPQLHHVVSKMSYFDAFLFYIVHLVDKLGLWHRFPVLLGVAYL...SAP', Si

In [11]:
print(len(seq_ids))
print()
print(len(seqs))

46750

46750


In [12]:
seqs[0:3]

['MMARFVSVSSCQFHFGFREVSPPSVTSYPRRFEVSDRRFPAIPIKCSSSEPENGEDSAPSLSSSSSSSTSEVSTSNSSTYNWYTGIGGIGMLDTAYLTYLKVTGSDAFCPIGGGTCGDVLNSDYAVVFGVPLPVIGFVMYGVVTALSAELGEGNLPFGISKSNGRFALFGITTAMASASAYFLYILSTKLSGSSCLYCLVSAFLSFSLFFLSVKDVKLQEIQQVVGLQICLAIIVVASLTASYSTAQPIPSRSGDIELPYFRTEISSSSSPYAIALAKHLNSIGAKMYGAFWCSHCLEQKEMFGREAAKELNYVECFPDGYKKGTKILKACADAAIEGFPTWIINDKVLSGEIELAELAEMTGFSLDQANETNQLQ',
 'MKSTKILILGAGPAGFSAAKAALGKCDDITMINSEKYLPYYRPRLNEIIAKNKSIDDILIKKNDWYEKNNIKVITSEFATSIDPNNKLVTLKSGEKIKYEKLIIASGSIANKIKVPHADEIFSLYSYDDALKIKDECKNKGKAFIIGGGILGIELAQAIIDSGTPASIGIILEYPLERQLDRDGGLFLKDKLDRLGIKIYTNSNFEEMGDLIRSSCVITAVGVKPNLDFIKDTEIASKRGILVNDHMETSIKDIYACGDVAEFYGKNPGLINIANKQGEVAGLNACGEDASYSEIIPSPILKVSGISIISCGDIENNKPSKVFRSTQEDKYIVCMLKENKIDAAAVIGDVSLGTKLKKAIDSSKSFDNISSLDAILNNL',
 'MMVPLAKLASPAYQCFHALKIKKNYLPLCATRWSSTCKVPRITTHYTIYPRDQDKRWEGVNMERFAEEADVVIVGAGPAGLSAATRLKQLAAQHEKDLRVCLVEKAAHIGAHTLSGACLDPRAFEELFPDWKEKGAPLNTPVTEDRFGILTEKYRIPVPILPGLPMNNHGNYVVRLGHLVSWMGEQAEALGVEVYPGYAAAEILFHEDGSVKGIATNDVGIQKDGAPKTTFER

In [13]:
seqID = []
for item in records:
    seqID.append(item.id)
    print(item.id)
    print()

sp|Q8L540|LTO1_ARATH

sp|Q9AL95|NROR_CLOAB

sp|P55931|ETFD_PIG

sp|Q16134|ETFD_HUMAN

sp|Q9NZC7|WWOX_HUMAN

sp|Q9UHB4|NDOR1_HUMAN

sp|Q6NPS8|NDOR1_ARATH

sp|Q12181|NDOR1_YEAST

sp|Q921G7|ETFD_MOUSE

sp|P37769|KDUD_ECOLI

sp|Q9RCG0|MNO_AMYME

sp|O22854|ETFQO_ARATH

sp|Q8J2V8|P2OX_TRIMT

sp|Q9T0A4|NDHS_ARATH

sp|Q6QWR1|P2OX_PHACH

sp|Q6UG02|P2OX_PHLGI

sp|Q6UPE1|ETFD_RAT

sp|Q9LVM2|NDHN_ARATH

sp|Q8PZ67|FPOF_METMA

sp|Q9CAC5|NDHL_ARATH

sp|P77526|YFCG_ECOLI

sp|Q8U3K2|GAPOR_PYRFU

sp|P72579|OFOB_SULSP

sp|P72578|OFOA_SULSP

sp|Q91WL8|WWOX_MOUSE

sp|P15559|NQO1_HUMAN

sp|Q8YW84|NDHM_NOSS1

sp|Q8DKZ3|NDHL_THEEB

sp|Q8YQS7|PSBA3_NOSS1

sp|A6QJ00|MTLD_STAAE

sp|Q033G3|MTLD_LACLS

sp|B5LMP9|PSBB_CICAR

sp|A0T0G9|PSBA_PHATC

sp|A2BP21|PSBA_PROMS

sp|Q09MG2|PSBF_CITSI

sp|Q08CS6|MOXD2_DANRE

sp|A4QK44|PSBB_ARAHI

sp|Q81MI9|MTND_BACAN

sp|P41644|PSBD_PINTH

sp|Q8RSW3|PSBE_SYNP2

sp|B5LMM1|PSBD_CICAR

sp|Q06GW8|PSBH_DRIGR

sp|Q31ZB5|MTOX_SHIBS

sp|P69384|PSBE_TOBAC

sp|Q7HIV0|PSBF_HYDCA

sp|B2J6T

sp|C4ZR74|ULAE_ECOBW

sp|B1XDU8|ULAE_ECODH

sp|O49312|YUC7_ARATH

sp|Q8Y5F1|Y2113_LISMO

sp|O32175|YUSI_BACSU

sp|O05616|VANA_PSEUH

sp|O28082|Y2201_ARCFU

sp|A7GVD5|Y3909_BACCN

sp|O26799|VORC_METTH

sp|P0C1E8|Y2355_CORGL

sp|P0A3X4|Y2533_BORPA

sp|P9WJJ0|Y1812_MYCTO

sp|P9WJJ1|Y1812_MYCTU

sp|A0A443HK11|VDTE1_BYSSP

sp|Q02873|YP107_YEAST

sp|Q9F723|Y301_CHLTE

sp|Q9LIG0|Y3136_ARATH

sp|Q9HZ00|Y3240_PSEAE

sp|B7LNV2|SSUD_ESCF3

sp|Q2YS77|Y537_STAAB

sp|A6UU98|WECC_META3

sp|Q6GBP9|Y546_STAAS

sp|Q72X61|Y5517_BACC1

sp|P23171|YRUB_CLOPA

sp|O89049|TRXR1_RAT

sp|P43496|TRXB_PENCH

sp|Q56632|VIBA_VIBCH

sp|P0CR36|TSC10_CRYNJ

sp|A7WZ30|Y585_STAA1

sp|Q7AKN0|WHIB_STRCO

sp|Q7AKI9|WHID_STRCO

sp|Q05016|YM71_YEAST

sp|Q68WY1|Y384_RICTY

sp|Q03102|YMN1_YEAST

sp|Q10RE2|YUC8_ORYSJ

sp|G2QDQ9|VAO15_MYCTT

sp|O34295|TTUC5_AGRVI

sp|Q46799|XDHA_ECOLI

sp|P80907|VORA_METTM

sp|O31820|YNEN_BACSU

sp|P53878|YNS1_YEAST

sp|P9WMV4|Y1279_MYCTO

sp|Q2JRM5|Y2611_SYNJA

sp|Q6GDV6|Y2567_STAAR

sp|P9WJP8|Y

sp|Q89KD9|CTAA_BRADU

sp|A0A0C6DUU3|BET2_PLEBE

sp|A9VUA7|CTAA_BACMK

sp|B7GYG5|BETA_ACIB3

sp|A7Z4B0|CTAA_BACVZ

sp|Q8YGI7|CTAA_BRUME

sp|A0A0C6DWS6|BET3_PLEBE

sp|A1USH8|CTAA_BARBK

sp|A9IVC8|CTAA_BART1

sp|Q1QXE1|BETA1_CHRSD

sp|A9MAG4|CTAA_BRUC2

sp|B0V945|BETA_ACIBY

sp|C0RIC2|CTAA_BRUMB

sp|Q6FDF9|BETA_ACIAD

sp|P94346|CTAA_GEOTD

sp|P0AA88|DSBE_SHIFL

sp|A4ILW9|CTAA_GEOTN

sp|B8H551|CTAA_CAUVN

sp|Q1BQE2|BETA_BURCA

sp|Q11J69|CTAA_CHESB

sp|C4L5D6|CTAA_EXISA

sp|A4JJG6|BETA_BURVG

sp|Q9PAN4|DSBE_XYLFA

sp|A8AJN0|BETA_CITK8

sp|B0UR12|CTAA_METS4

sp|Q2GCS4|CTAA_NEOSM

sp|B1XE52|BETA_ECODH

sp|B7KV80|CTAA_METC4

sp|A7ZWV4|BETA_ECOHS

sp|A9W350|CTAA_METEP

sp|B7MCD0|BETA_ECO45

sp|Q0TKW1|BETA_ECOL5

sp|B2TCJ8|BETA_PARPJ

sp|B3CRU7|CTAA_ORITI

sp|Q02DZ0|BETA_PSEAB

sp|A6X2G7|BETA_OCHA4

sp|A7HXA1|CTAA_PARL1

sp|Q6D6D9|BETA_PECAS

sp|B0KN19|BETA_PSEPG

sp|Q2K9W9|CTAA_RHIEC

sp|B4EX94|BETA_PROMH

sp|Q4ZM63|BETA_PSEU2

sp|C3PMV5|CTAA_RICAE

sp|B5ZUG2|BETA_RHILW

sp|A1VZK8|DSBI_CAMJJ

s


sp|B0CLB5|ARGC_BRUSI

sp|C4ZZA2|ASTD_ECOBW

sp|B1WYI6|ARGC_CROS5

sp|Q5P7T9|ARGC_AROAE

sp|D3J0Z1|CHADH_ASPFM

sp|Q41931|ACCO2_ARATH

sp|Q94A78|ACCH4_ARATH

sp|Q5AUZ6|ATNA_EMENI

sp|P55544|CPXP_SINFN

sp|P31718|CPXX_RHORH

sp|P31237|ACCO_ACTDE

sp|P24467|CPXD_RHIRD

sp|Q1B0P7|ACDH2_MYCSS

sp|Q0S005|ACDH5_RHOJR

sp|Q5AUZ9|ATNE_EMENI

sp|C1AYB3|ACDH5_RHOOB

sp|Q6I681|ACET1_MAIZE

sp|P37845|CHLB_EQUAR

sp|Q85FG5|CHLL_ADICA

sp|A9BEG6|CHLL_PROM4

sp|Q85X76|CHLB_PINKO

sp|A2CDU0|ACSF_PROM3

sp|C3NEV8|G3P_SULIY

sp|O34591|ACOB_BACSU

sp|P34731|FAS1_CANAX

sp|Q2UB88|3HAO2_ASPOR

sp|B1HRX9|FOLD_LYSSC

sp|Q2HD63|3HAO_CHAGB

sp|Q89187|3BHS_VAR67

sp|Q5JHB5|G3P_THEKO

sp|Q83V26|3HAO_PSEFL

sp|Q28SE8|3HAO_JANSC

sp|Q6CKK7|ACOX_KLULA

sp|P26439|3BHS2_HUMAN

sp|A2VCW9|AASS_RAT

sp|P05335|ACOX4_CANMA

sp|Q8NLB6|3HBH_CORGL

sp|Q67477|3BHS_FOWPN

sp|Q5EXK1|3HBH_KLEOX

sp|Q949M8|3HID3_ARATH

sp|Q9CR21|ACPM_MOUSE

sp|P71864|3O1D1_MYCTU

sp|Q0TTH1|ANSME_CLOP1

sp|Q83R90|ABDH_SHIFL

sp|Q93VC3|ANS1_ORYSJ



sp|B0FWC6|NU2M_AEDAE

sp|O78680|NU2M_CARAU

sp|P0CD45|NU2C2_SOLBU

sp|Q534E0|NU2M_AILFU

sp|P0CC31|NU2C2_ANTAG

sp|Q9MTP3|NDHJ_OENEH

sp|B2J6S6|NDHK_NOSP7

sp|Q9S831|PSAE1_ARATH

sp|A4QJT7|NDHK_OLIPU

sp|Q9M3I7|NDHI_SPIOL

sp|Q9M3M1|NDHJ_SPIOL

sp|P09424|MTLD_ECOLI

sp|Q4L9Y4|MTLD_STAHJ

sp|B0Z5A8|PSBA_OENPA

sp|Q332Y2|PSBC_LACSA

sp|Q635P0|MTND_BACCZ

sp|Q7U4Z3|PSBA3_SYNPX

sp|Q68S10|PSBC_PANGI

sp|B2XWK1|PSBC_FAGEA

sp|Q1KVW6|PSBD_TETOB

sp|A6BM30|PSBC_GNEPA

sp|Q70Y08|PSBD_AMBTC

sp|Q0G9K2|PSBE_LIRTU

sp|Q2MIH1|PSBF_SOLBU

sp|Q67HB3|PSBF_ASPOF

sp|Q20EW2|PSBE_OLTVI

sp|Q731Q9|MTND_BACC1

sp|Q56317|PORB_THEMA

sp|Q49L03|PSBD_EUCGG

sp|B0JUK9|MTND_MICAN

sp|Q4VZH6|PSBF_CUCSA

sp|B1WQ89|PSBD_CROS5

sp|Q8CM25|PSBD_THEEB

sp|A4QJI3|PSBI_AETGR

sp|F9VEN7|QUEG_LACGL

sp|Q68RX8|PSBH_PANGI

sp|Q1XDD0|PSBD_PYRYE

sp|B2VAA3|MTND_SULSY

sp|Q85CN1|PSBI_ANTAG

sp|Q9KPA4|NADB_VIBCH

sp|B0Z4W7|PSBI_OENBI

sp|P12240|PSBI_THEVL

sp|Q31CN2|PSBJ_PROM9

sp|Q06GP6|PSBJ_PIPCE

sp|A7Y3F7|PSBJ_IPOPU

sp|Q8M

sp|A0A067XMV3|PTAB_PESFW

sp|A3NUS1|PYRD_BURP0

sp|Q7NC99|PYRD_GLOVI

sp|Q8NQC0|PYRD_CORGL

sp|A5EV79|PYRD_DICNV

sp|C1DDY1|PYRD_AZOVD

sp|Q64FG0|RETST_MACFA

sp|Q44240|NDHK_NOSS1

sp|Q8PXV8|MTD_METMA

sp|Q8HVQ0|NDHI_LOXSI

sp|A4QLJ7|NDHJ_LOBMA

sp|A2BZX1|NDHM_PROM1

sp|A2BQ51|NDHL_PROMS

sp|B1YHJ8|MTLD_EXIS2

sp|Q70Y17|PSBA_AMBTC

sp|C4L274|MTLD_EXISA

sp|A3PF48|NDHN_PROM0

sp|A7ZTE9|MTLD_ECO24

sp|B2X1W8|PSBC_OEDCA

sp|P24725|PSBA_GALSU

sp|A4QJR4|PSBA_OLIPU

sp|A5GTY4|PSBA1_SYNR3

sp|P10503|PUTA_SALTY

sp|Q5R7S9|PXL2B_PONAB

sp|A5EF40|RBL1B_BRASB

sp|P92445|RBL_ASPEL

sp|Q479W5|RBL2_DECAR

sp|A9BEH3|RBL_PROM4

sp|Q948U0|RBOHA_SOLTU

sp|P26963|RBL_PINKR

sp|H9BFQ1|TPRL2_ERYCB

sp|Q569D5|SBP1_XENTR

sp|A0A084B9Y9|SAT1_STACB

sp|A0A084B9Z0|SAT2_STACB

sp|Q665B4|SSUD_YERPS

sp|Q89ER2|SSUD_BRADU

sp|B7UN21|SSUD_ECO27

sp|A8AID7|SSUD_CITK8

sp|E3PZS3|STAD2_OPHAA

sp|P32061|STAD_CUCSA

sp|P46253|STAD_SOLTU

sp|P22243|STAD_CARTI

sp|A0A144KPK9|TES1_ALTAL

sp|Q8L7W5|SMO11_ARATH

sp|Q00681|ST


sp|Q669J9|MTOX_YERPS

sp|Q76KC2|PLD_MICLT

sp|Q67HN7|PSBF_ANACO

sp|Q67H93|PSBF_LEOCS

sp|A3PAX2|PSBH_PROM0

sp|Q8XWM7|NADB1_RALSO

sp|Q9MTK7|PSBJ_OENEH

sp|A0T0P8|PSBH_THAPS

sp|Q2MI86|PSBJ_SOLLC

sp|O78676|PSBI_OROMI

sp|Q1ACJ0|PSBJ_CHAVU

sp|Q3KSU5|RIR1_EBVG

sp|P00335|RIDH_KLEAE

sp|P95629|PUTA_RHIML

sp|Q9ZKP7|RDXA_HELPJ

sp|A4IGL7|PXDN_XENTR

sp|Q4WLD1|PYR5_ASPFU

sp|P50523|QCR8_SCHPO

sp|Q6AYT0|QOR_RAT

sp|P34956|QOX1_BACSU

sp|P0DD67|PYRDA_STRPQ

sp|Q5HH23|QOX2_STAAC

sp|Q53FA7|QORX_HUMAN

sp|Q5HQB0|QOX1_STAEQ

sp|E0TW65|QOX3_BACPZ

sp|Q7A6A1|QOX4_STAAN

sp|A4IQF5|RESA_GEOTN

sp|Q6GAF5|QOX4_STAAS

sp|A4J560|PYRDB_DESRM

sp|A0RBT0|RESA_BACAH

sp|Q819S5|PYRDB_BACCR

sp|Q9CFC9|PYRDA_LACLA

sp|P0DH74|PYRDB_ENTFA

sp|Q8EUY2|PYRDB_MYCPE

sp|B1YIR7|PYRDB_EXIS2

sp|O08841|QSOX1_CAVPO

sp|B0CA74|PYRD_ACAM1

sp|Q8JGM4|QSOX1_CHICK

sp|O00391|QSOX1_HUMAN

sp|Q1LT62|PYRD_BAUCH

sp|A9AJX4|PYRD_BURM1

sp|A1WTJ3|PYRD_HALHL

sp|Q5P1A9|PYRD_AROAE

sp|A2S374|PYRD_BURM9

sp|B6I919|PYRD_ECOSE

sp|


sp|A0AUJ5|POLG_BVY3

sp|M1WG92|PIG5_CLAP2

sp|M1WCF5|PIG9_CLAP2

sp|Q5W271|PIGA_SERS3

sp|Q5W270|PIGB_SERS3

sp|P26698|PIGM_RHOST

sp|O87605|PIKC_STRVZ

sp|Q9FVQ6|PILR1_ARATH

sp|Q9LD12|PILR4_THUPL

sp|Q4R0H9|PILR1_LINUS

sp|Q9HVQ7|PIUC_PSEAE

sp|Q92HU9|POLOX_RICCN

sp|Q9ZD53|POLOX_RICPR

sp|Q7A617|POLOX_STAAN

sp|Q6GHP8|POLOX_STAAR

sp|P55826|PPOC_ARATH

sp|P45497|POLOX_STRCO

sp|Q9LRI8|PPOC_SPIOL

sp|P43311|PPO_VITVI

sp|Q9CCE3|POLOX_MYCLE

sp|P50336|PPOX_HUMAN

sp|C7FF05|PPO4_AGABI

sp|P40012|PPOX_YEAST

sp|P43310|PPO_SPIOL

sp|Q559A9|PKS13_DICDI

sp|P80900|PORA_METTM

sp|P21218|PORB_ARATH

sp|Q42850|PORB_HORVU

sp|Q9HTB6|RMD_PSEAE

sp|A5GVQ6|PEBB_SYNR3

sp|P42032|NUOH_RHOCA

sp|Q6FZY8|NUOD_BARQU

sp|Q5ASN8|PRX5_EMENI

sp|A8G798|PEBA_PROM2

sp|Q72XF6|NUOD_BACC1

sp|Q9K4U5|PEBB_PROMA

sp|Q2P0W2|NUOH_XANOM

sp|B5FPG3|NUOH_SALDC

sp|Q2MIA0|PSAA_SOLLC

sp|Q7MA48|NUOD_WOLSU

sp|Q49CB2|PSAB_CUSSA

sp|Q5P5I4|PED_AROAE

sp|B0BVB1|NUOH_RICRO

sp|Q8HVK7|NDHI_STERE

sp|A7FP92|MTLD_YERP3

sp|Q

sp|Q3K0Y5|MURB_STRA1

sp|Q8DUF8|MURB_STRMU

sp|B0RTS4|MURB_XANCB

sp|A5FUM6|PQQC_ACICJ

sp|P9WJL9|MURB_MYCTU

sp|Q3SMH1|MURB_THIDA

sp|Q8ZAN4|MURB_YERPE

sp|Q73HL4|MURB_WOLPM

sp|Q9ZJV7|NUOK_HELPJ

sp|A0A2I6PJ01|NODY1_HYPPI

sp|Q0IE56|MURB_SYNS3

sp|Q2S2L3|NUOK_SALRD

sp|Q8PHY3|PQQC_XANAC

sp|Q0T2K8|NUOK_SHIF8

sp|Q6D2S6|NUOK_PECAS

sp|Q5H234|PQQE_XANOR

sp|B7GME1|NUON_ANOFW

sp|A9R6L2|NUOK_YERPG

sp|Q9RUA0|NUON_DEIRA

sp|Q9PGI5|NUOK_XYLFA

sp|B9LGS9|NUON_CHLSY

sp|Q8F7R0|NUON_LEPIN

sp|A0LDR4|NUON_MAGMM

sp|Q2W3J7|NUON_MAGSA

sp|Q9ZD13|NUON_RICPR

sp|C1DCB6|NUON_LARHH

sp|B3DZT0|NUON_METI4

sp|A8GYE0|NUOF_RICB8

sp|Q30PJ2|NUON_SULDN

sp|Q4FM78|NUON_PELUB

sp|B1KJW0|NUON_SHEWM

sp|Q8GYB8|OPR2_ARATH

sp|M2YJQ2|NORB_DOTSN

sp|J4VWM7|OPS4_BEAB2

sp|Q9V3P0|PRDX1_DROME

sp|Q6DV14|PRDX1_GEKJA

sp|P42116|NURM_NEUCR

sp|P0CV91|PRDX4_CROAT

sp|Q3SYT8|NCPR_BOVIN

sp|Q9SI17|PER14_ARATH

sp|P50126|NCPR_CANMA

sp|A8ANR7|NORV_CITK8

sp|P86067|PER10_DAUCA

sp|P37201|NCPR_CANTR

sp|A7ZQD9|NORV_ECO24




sp|C1CEH8|LDH_STRZJ

sp|Q9KVY6|MIOC_VIBCH

sp|C1FMZ1|LDH_CLOBJ

sp|A9G9T3|NUOD2_SORC5

sp|Q9L4N4|PSAA_PROMA

sp|Q4UM09|NUOE_RICFE

sp|Q8K9Y2|NUOG_BUCAP

sp|Q0ICV5|PEBB_SYNS3

sp|Q92YN8|NUOI2_RHIME

sp|P9WQE9|PHAS_MYCTU

sp|Q9RU94|NUOH_DEIRA

sp|Q8P7T5|NUOD_XANCP

sp|P29166|PHF1_CLOPA

sp|Q49W60|NMO_STAS1

sp|A8GRR5|NUOD_RICRS

sp|Q6HAY8|NUOD_BACHK

sp|B6H063|PRX2_PENRW

sp|Q9RUK6|MSRB_DEIRA

sp|A3M1W6|MSRA_ACIBT

sp|B5YSJ7|MSRP_ECO5E

sp|A0KSX7|NRFA_SHESA

sp|B4TX85|MSRP_SALSV

sp|Q07500|NDH2_YEAST

sp|A4QKG3|NDHH_BARVE

sp|A9L9F3|NDHH_LEMMI

sp|A6H5P8|NDHH_CYCTA

sp|Q0G9Q6|NDHH_DAUCA

sp|B0Z4T1|NDHH_OENAR

sp|B0Z515|NDHH_OENBI

sp|Q7TUL1|NDHH_PROMM

sp|B3QK65|MQO_RHOPT

sp|O98692|NDHI_HORVU

sp|Q8HVQ1|NDHI_ECLPR

sp|Q8HVS3|NDHI_FLOPE

sp|A4QJK2|NDHJ_AETGR

sp|P0C384|NDHI_ORYSA

sp|Q8HVT6|NDHI_DUGME

sp|A0T0W4|PSAC_THAPS

sp|P0C342|NDHK_ORYSI

sp|A6MMU7|NDHJ_ILLOL

sp|Q06J24|PSBB_BIGNA

sp|Q4FFN5|PSBC_RANMC

sp|O19928|PSBB_CYACA

sp|A0A315|PSBA_COFAR

sp|Q19VC7|PSBD_CHLAT

sp|B2LMI9|P

sp|Q85PQ1|NU2M_VIVTA

sp|A6MMC7|NU3C_CHLSC

sp|C1KYN8|NAMA_LISMC

sp|P26847|NU3M_MARPO

sp|O21598|NU3M_ISTPI

sp|O21595|NU3M_ONYLE

sp|P06263|NU4C_MARPO

sp|O21604|NU3M_PERER

sp|Q8DKY0|NU4C1_THEEB

sp|Q3V4Y4|NU4C_ACOCL

sp|O21560|NU3M_ORYPA

sp|Q7NP39|NU4C_GLOVI

sp|B2K4J6|RNFE_YERPB

sp|Q6YXQ3|NU4C_PHYPA

sp|O21592|NU3M_REIFU

sp|P75188|MSRA_MYCPN

sp|A1E952|MSRB1_PIG

sp|C0MCD1|MSRA_STRS7

sp|B4RUW0|MSRB_ALTMD

sp|A0A286LF02|PSIH_PSICY

sp|Q87SW6|MSRA_VIBPA

sp|Q1I3L0|MSRA_PSEE4

sp|Q66F89|MSRA_YERPS

sp|B1YRN6|MSRB_BURA4

sp|Q1C7U0|MSRB_YERPA

sp|Q1WVT3|MSRB_LACS1

sp|P0A4R0|MSRP_BRUME

sp|Q0HRB0|NRFA_SHESR

sp|B7N2T7|NRFA_ECO81

sp|Q3Z0L8|MSRP_SHISS

sp|B8DDP3|MSRB_LISMH

sp|Q06PW6|NRFA_MANHA

sp|Q8PLY8|MSRP_XANAC

sp|Q9HVA4|MSRP_PSEAE

sp|Q4ZQC6|MSRB_PSEU2

sp|C0Q554|NRFA_SALPC

sp|Q665F0|MSRP_YERPS

sp|A1JQG8|MSRB_YERE8

sp|Q888N2|MSRP_PSESM

sp|A7FI81|MSRB_YERP3

sp|P67348|MSRP_SALTI

sp|A1AIR1|NRFA_ECOK1

sp|B8GMG5|MSRB_THISH

sp|P67347|MSRP_SALTY

sp|A0JXQ4|MSRA_ARTS2

sp|A8I

sp|C5D5V2|LDH_GEOSW

sp|Q6HK31|LDH1_BACHK

sp|P22988|LDHA_HORVU

sp|B0K226|LDH_THEPX

sp|Q6GH69|GUAC_STAAR

sp|P24273|LUXG_ALIFS

sp|A9MQG7|KEFF_SALAR

sp|Q83SM9|GUAC_SHIFL

sp|Q9KMW9|GUAC_VIBCH

sp|Q5I3B2|IOD2_BOVIN

sp|A7ZW55|GUAC_ECOHS

sp|C5B9H0|GUAC_EDWI9

sp|A5ISL9|GUAC_STAA9

sp|P18300|LUXB_KRYAS

sp|Q46888|LTND_ECOLI

sp|A8GKL4|KEFG_SERP5

sp|Q32551|NU5C_MUTAC

sp|P0DD21|PROA_STRPQ

sp|Q0SZW5|KEFG_SHIF8

sp|Q6AC27|IOLG_LEIXX

sp|Q32RH9|NU5C_ZYGCR

sp|P46620|NU5C_MAIZE

sp|O74298|LYS2_PENCH

sp|Q37372|NU5M_ACACA

sp|Q95885|NU5M_PAPHA

sp|O79422|NU5M_BRALA

sp|P12438|IPNS_STRMI

sp|O68575|PFLA_STRMU

sp|P11993|NU5M_PATPE

sp|P28301|LYOX_MOUSE

sp|Q06GU3|NU6C_DRIGR

sp|Q37680|NU5M_WHEAT

sp|Q49KU5|NU6C_EUCGG

sp|Q09FQ7|NU6C_NANDO

sp|P03916|NU5M_PANPA

sp|P11628|NU5M_EMEND

sp|Q95695|NU6C_ARATH

sp|B0Z4S8|NU6C_OENAR

sp|A4GYW9|NU6C_POPTR

sp|A6MMZ8|NU6C_ILLOL

sp|Q05528|KDUD_DICD3

sp|P18933|NU6M_DROME

sp|Q34944|NU6M_LUMTE

sp|P92670|NU6M_MACRO

sp|O21407|NU6M_STRCA

sp|Q3L243|PD


sp|B7IUW8|ROCA_BACC2

sp|Q7BUE1|RIFL_AMYMS

sp|Q5R673|RIOX2_PONAB

sp|P09368|PUT1_YEAST

sp|Q9Y8G7|C505_FUSOX

sp|O94038|ADH2_CANAL

sp|P48815|ADH2_CERCA

sp|Q6WKZ1|C71DI_MENGR

sp|A6VK38|ASPD_METM7

sp|Q4PD66|CCPR2_USTMA

sp|Q7NWH3|AZOR_CHRVO

sp|P91615|ADHR_DROME

sp|P25141|ADH1_PETHY

sp|Q9W011|C4D20_DROME

sp|Q9I2E2|AZOR2_PSEAE

sp|B2IX28|AZOR_NOSP7

sp|A0A140IL90|C96T1_NARAP

sp|Q6HK44|AZOR1_BACHK

sp|P0CP55|CCPR_CRYNB

sp|Q94IA6|C90D1_ARATH

sp|Q4L3N6|AZO1_STAHJ

sp|Q9S1U6|AZOR_STRCO

sp|B5EGQ4|AZOR_GEOBB

sp|B5EZH8|ARNA_SALA4

sp|C4LEW9|AZOR_TOLAT

sp|Q9X4K2|AZOR_GEOSE

sp|Q42569|C90A1_ARATH

sp|C0NL63|ARO1_AJECG

sp|P41407|AZOR_ECOLI

sp|Q0T8F5|CAIA_SHIF8

sp|P35183|AST1_YEAST

sp|A1A789|CAIA_ECOK1

sp|P85916|CADH2_PSEMZ

sp|P80239|AHPC_BACSU

sp|Q02971|CADH7_ARATH

sp|P31875|CYBH_WOLSU

sp|Q8UCK6|DPS_AGRFC

sp|P9WJF1|DPRE1_MYCTU

sp|Q7WTJ6|DMPL_ACICP

sp|Q8F1H5|ISPG_LEPIN

sp|B5YSA4|DPS_ECO5E

sp|Q9LVI9|DPYD_ARATH

sp|O89000|DPYD_RAT

sp|Q9V3A6|ERO1L_DROME

sp|Q75BB5|ERO1_ASH


sp|B5R0T7|MSRA_SALEP

sp|P40915|NDUV2_NEUCR

sp|Q9D6Y7|MSRA_MOUSE

sp|Q2UFN3|NCB5R_ASPOR

sp|Q0MQI9|NDUV2_PANTR

sp|O22769|NDUV2_ARATH

sp|Q8EVK2|MSRA_MYCPE

sp|Q32238|NU5C_FLARA

sp|P51100|NU5C_GERJA

sp|P19319|NARZ_ECOLI

sp|B0CQN7|NCB5R_LACBS

sp|Q0HMH8|NRFA_SHESM

sp|Q9Z4P4|NRFA_SULDE

sp|P0AAK8|NRFC_ECO57

sp|Q49178|MVHG_METFE

sp|Q92AE9|MSRB_LISIN

sp|B7L6Q3|MSRB_ECO55

sp|Q62G98|MSRP_BURMA

sp|A6VQE0|MSRP_ACTSZ

sp|P0ABK9|NRFA_ECOLI

sp|P58770|MSRP_AGRFC

sp|A6T7R0|MSRB_KLEP7

sp|Q00673|NDUS3_CANMA

sp|A2RHS0|MSRB_LACLM

sp|B4UGL0|MSRP_ANASK

sp|Q7NZY0|MSRP_CHRVO

sp|A6TUE9|MSRA_ALKMQ

sp|A8GK68|MSRP_SERP5

sp|C1DGW5|MSRA_AZOVD

sp|G8QMC1|MSRQ_AZOOP

sp|P76343|MSRQ_ECOLI

sp|Q8PA99|MSRQ_XANCP

sp|Q9S3Q5|MQO_PSEFL

sp|A1KWH2|MQO_NEIMF

sp|A1R7M9|MQO_PAEAT

sp|A5U6K4|MQO_MYCTA

sp|A9WQR6|MQO_RENSM

sp|C6DAM1|MQO_PECCP

sp|P31075|PSRA_WOLSU

sp|A1UEQ5|MQO_MYCSK

sp|A9BE38|MQO_PROM4

sp|Q46GZ7|MQO_PROMT

sp|B8HAT7|MQO_PSECP

sp|P40215|NDH1_YEAST

sp|A1EA65|NDHH_AGRST

sp|Q7YJS8|NDH

sp|Q8EI34|NUOG_SHEON

sp|B2HU48|NUOI_ACIBC

sp|Q33C36|PSAA_NICTO

sp|Q5RCH8|PECR_PONAB

sp|Q20EW7|PSAB_OLTVI

sp|Q47452|PCOA_ECOLX

sp|Q16W06|NO66_AEDAE

sp|O07112|PSAC_MASLA

sp|P41649|PSAC_PINTH

sp|Q2JFL2|NUOI_FRACC

sp|A5U7G9|NUOI_MYCTA

sp|Q57C80|MURB_BRUAB

sp|Q2RU32|NUOI_RHORT

sp|Q3ZIZ3|PSAB_TUPAK

sp|P9WQE6|PPSA_MYCTO

sp|P57260|NUOJ_BUCAI

sp|Q1GZL7|NUOI_METFK

sp|Q7N2J4|NUOI_PHOLL

sp|Q3JVB9|MURB_BURP1

sp|Q0KCS0|NUOK_CUPNH

sp|Q89AT8|NUOJ_BUCBP

sp|A7MK78|PPTA_CROS8

sp|A5FXJ1|NUOK_ACICJ

sp|P15109|PCXA_BURCE

sp|P9WQE5|PPSB_MYCTU

sp|Q0AJE3|MURB_NITEC

sp|B7GZB2|NUOK_ACIB3

sp|Q6JHU7|P3H2_CHICK

sp|Q1D8T0|NUOI_MYXXD

sp|P06235|NODG_RHIML

sp|Q2RI41|OORA_MOOTA

sp|P0AFE0|NUOJ_ECOLI

sp|P0DC49|MURB_STRPQ

sp|Q7MGQ8|MURB_VIBVY

sp|B3QBT0|PQQC_RHOPT

sp|A6TBW5|NUOK_KLEP7

sp|Q07QW6|NUOK_RHOP5

sp|Q3YV08|MURB_SHISS

sp|Q2NSK8|NUOK_SODGM

sp|A8M612|NUOK_SALAI

sp|A8GW95|MURB_RICB8

sp|B0RRA8|NUOK_XANCB

sp|A9MJA6|NUOK_SALAR

sp|Q88A82|PQQC_PSESM

sp|Q9X239|MURB_THEMA

sp|A0LEQ8|

sp|Q10058|YAM3_SCHPO

sp|P9WGQ6|Y1144_MYCTO

sp|P9WGQ7|Y1144_MYCTU

sp|G3Y420|YANH_ASPNA

sp|P0AFP4|YBBO_ECOLI

sp|Q7VDC0|UCRI_PROMA

sp|Q5R7B3|UGDH_PONAB

sp|Q9S3V1|VIOA_CHRVO

sp|O34678|YTBE_BACSU

sp|A7Z9Y9|Y3486_BACVZ

sp|Q8XD64|XDHD_ECO57

sp|Q9Z4S6|TTRA_SALTY

sp|Q7A3L9|Y2266_STAAN

sp|P29908|YNQ2_PARDE

sp|P77783|YNFF_ECOLI

sp|P9WLE6|Y2286_MYCTO

sp|P47373|Y127_MYCGE

sp|Q4UX14|Y1340_XANC8

sp|O05619|VDH_PSEUH

sp|P9WGQ1|Y1856_MYCTU

sp|P9WQA5|Y2971_MYCTU

sp|A5WFM3|Y1523_PSYWF

sp|P9WIZ7|Y3131_MYCTU

sp|P44247|Y1536_HAEIN

sp|P45250|Y1556_HAEIN

sp|A0QJV0|NUOD_MYCA1

sp|A6H1Q6|NUOH_FLAPJ

sp|Q32RS4|PSAB_STAPU

sp|Q07NL5|NUOI2_RHOP5

sp|Q92QP4|NUOI1_RHIME

sp|Q9MUJ5|PSAA_ANGEV

sp|A7FGR0|NUOH_YERP3

sp|Q53212|PRX5_SINFN

sp|A1WXW0|NUOH_HALHL

sp|Q72NT2|NUOH_LEPIC

sp|Q9MTN7|PSAB_OENEH

sp|B2K815|NUOH_YERPB

sp|Q6FE65|NUOH_ACIAD

sp|B1JVN3|NUOI_BURCC

sp|Q67P14|NUOI1_SYMTH

sp|Q5SCZ7|PSAC_HUPLU

sp|Q922Q4|P5CR2_MOUSE

sp|O65361|P5CS_MESCR

sp|B3NU20|NO66_DROER

sp|B7KQ58|NUOI_ME

sp|Q4KI35|HGD_PSEF5

sp|Q81LV7|ISPG_BACAN

sp|B7JN03|ISPG_BACC0

sp|Q730Q8|ISPG_BACC1

sp|Q6EMI9|HGD_PSEPU

sp|A6TV07|KATG2_ALKMQ

sp|Q2K9E5|HGD_RHIEC

sp|Q0B385|KATG2_BURCM

sp|A4JPG0|KATG2_BURVG

sp|Q7BSW8|KATG2_ECO57

sp|A7GSX5|ISPG_BACCN

sp|Q4ZR64|HGD_PSEU2

sp|O59651|KATG2_HALMA

sp|Q9X4F5|HGD_RHIME

sp|Q64N34|ISPG_BACFR

sp|B3QB98|HGD_RHOPT

sp|Q8EIV5|KATG2_SHEON

sp|Q9Z311|MECR_RAT

sp|Q4UZI9|HGD_XANC8

sp|Q6HDN9|ISPG_BACHK

sp|Q0HES2|KATG2_SHESM

sp|Q8PDA2|HGD_XANCP

sp|A9VH61|ISPG_BACMK

sp|A1TRV8|KATG_ACIAC

sp|Q2NYM7|HGD_XANOM

sp|A0QYP1|KATG3_MYCS2

sp|P54482|ISPG_BACSU

sp|B2I352|KATG_ACIBC

sp|A4SS04|KATG_AERS4

sp|C1F960|KATG_ACIC5

sp|B9K0G2|KATG_AGRVS

sp|B0YAK0|KATG_ASPFC

sp|Q0A8G6|KATG_ALKEH

sp|Q7Z7W6|KATG_ASPFU

sp|Q8G7Y6|ISPG_BIFLO

sp|A8EV24|KATG_ARCB4

sp|Q492E0|ISPG_BLOPB

sp|Q9KEE6|KATG_BACHD

sp|Q0CD12|KATG_ASPTN

sp|Q89VV9|ISPG_BRADU

sp|Q07923|LOT6_YEAST

sp|A4YKQ8|ISPG_BRASO

sp|A4YNR8|KATG_BRASO

sp|A9AGE5|KATG_BURM1

sp|A9M820|ISPG_BRUC2

sp|Q9Y7C8|LOV

sp|A7ZQE0|NORW_ECO24

sp|O35244|PRDX6_RAT

sp|A1AEQ1|NORW_ECOK1

sp|B5Z372|NORW_ECO5E

sp|Q8FEN4|NORW_ECOL6

sp|B5RDG6|NORW_SALG2

sp|P84516|PER1_SORBI

sp|Q42580|PER21_ARATH

sp|A8GG95|NORW_SERP5

sp|A6TCX6|NORW_KLEP7

sp|B2U036|NORW_SHIB3

sp|P36587|NCPR_SCHPO

sp|Q32CL7|NORW_SHIDS

sp|P37116|NCPR_VIGRR

sp|Q3YYF3|NORW_SHISS

sp|P16603|NCPR_YEAST

sp|P77258|NEMA_ECOLI

sp|Q9LSP0|PER29_ARATH

sp|P86055|PER2_DAUCA

sp|Q9SD46|PER36_ARATH

sp|Q9Z0J4|NOS1_MOUSE

sp|O19132|NOS1_RABIT

sp|Q9LHA7|PER31_ARATH

sp|A7NY33|PER4_VITVI

sp|Q6YPG5|NOS_ORYSJ

sp|Q6LG35|HCP_PHOPR

sp|P69743|MBHT_ECO57

sp|Q3AD53|HISX_CARHZ

sp|P0ACE1|MBHM_ECO57

sp|Q5H0L1|HISX_XANOR

sp|G3XD67|HCNA_PSEAE

sp|P53982|IDHH_YEAST

sp|Q46WL2|HISX_CUPPJ

sp|B4E6U7|HEM1_BURCJ

sp|P96797|HDRD_METBF

sp|A0A0E3D8N7|JANQ_PENJA

sp|P48077|HEM1_CYAPA

sp|P65103|IDI2_STRP1

sp|B0VTW7|HEM1_ACIBS

sp|B1YJV2|HEM1_EXIS2

sp|A4FPY4|HEM1_SACEN

sp|Q58DS6|JMJD6_BOVIN

sp|A1VDW9|HEM1_DESVV

sp|A6R1T7|MCR1_AJECN

sp|B2VI13|HEM6_ERWT9

sp|A

sp|Q5SLR3|ODBB_THET8

sp|Q09FQ4|NDHH_NANDO

sp|P12694|ODBA_HUMAN

sp|Q5SLR4|ODBA_THET8

sp|Q33BW6|NDHH_NICTO

sp|A5A6H9|ODBA_PANTR

sp|A4GGF2|NDHH_PHAVU

sp|Q6YXP7|NDHH_PHYPA

sp|Q06GL1|NDHH_PIPCE

sp|A3PAP0|NDHH_PROM0

sp|Q89AJ7|ODO1_BUCBP

sp|A1XGT5|NDHH_RANMC

sp|C3LEW9|PROA_BACAC

sp|P51056|ODO1_COXBU

sp|P20967|ODO1_YEAST

sp|Q2MID1|NDHH_SOLBU

sp|Q5HPC6|ODO1_STAEQ

sp|A0KNQ8|PROA_AERHH

sp|A0K4I3|PROA_BURCH

sp|P29925|NQO13_PARDE

sp|A1A1U9|PROA_BIFAA

sp|B2S5Q9|NQOR_BRUA1

sp|B0V4S6|PROA_ACIBY

sp|A4SJF6|PROA_AERS4

sp|C4Z9V4|PROA_AGARV

sp|B9MK80|PROA_CALBD

sp|B1X9C5|NQOR_ECODH

sp|A5VSI3|PROA_BRUO2

sp|P05982|NQO1_RAT

sp|Q69PS6|NTRA_ORYSJ

sp|Q59637|ODP1_PSEAE

sp|Q56217|NQO7_THET8

sp|Q3ARL1|PROA_CHLCH

sp|B9JEB9|NQOR_AGRRK

sp|Q46XE1|PROA_CUPPJ

sp|P29920|NQO8_PARDE

sp|P0AFG8|ODP1_ECOLI

sp|B7LP27|NQOR_ESCF3

sp|Q1LFW3|NQOR_CUPMC

sp|A9AT43|NQOR_BURM1

sp|B9M4V3|NQOR_GEODF

sp|Q9RYU4|NQOR_DEIRA

sp|P0C1E0|PROA_CORGL

sp|B3E9H6|NQOR_GEOLS

sp|Q4JWT3|PROA_CORJK

sp|Q74F05|N

sp|P0CH36|ADHC1_MYCS2

sp|P28036|ADHA_GLUPO

sp|O68098|COBK_RHOCB

sp|P80468|ADH4_STRCA

sp|Q9FRX6|AS1_ANTMA

sp|Q07586|ADHR_DROIM

sp|Q44002|ADHA_KOMEU

sp|P81747|ADHN_RHOER

sp|P54202|ADH2_EMENI

sp|Q9M7B7|C79D2_MANES

sp|Q50EK4|C75A1_PINTA

sp|Q7VX83|ASPD2_BORPE

sp|Q9X1X6|ASPD_THEMA

sp|O81117|C94A1_VICSA

sp|Q6Z5I7|C76M6_ORYSJ

sp|B6HV36|ADRH_PENRW

sp|Q6FDH0|ASPD_ACIAD

sp|O49342|C71AD_ARATH

sp|Q9LVD6|C81F2_ARATH

sp|H2DH16|C7A47_PANGI

sp|B8AFK5|CCMH_ORYSI

sp|Q1C742|ARNA_YERPA

sp|Q59I44|CAA43_BURSP

sp|B7N5M0|ARNA_ECOLU

sp|B6I7J8|ARNA_ECOSE

sp|P21762|AHPC_HELPY

sp|P0A156|AHPF_PSEPU

sp|Q00912|AOX_WICAO

sp|Q9SA99|BBE2_ARATH

sp|Q7V2D5|CHLN_PROMP

sp|Q9LKN0|CHMO_ATRHO

sp|Q7NI15|CHLN_GLOVI

sp|P96977|CHRR_PSEUG

sp|Q85FG6|CHLN_ADICA

sp|A4SCU6|BCHL_CHLPM

sp|B0KR47|ASTD_PSEPG

sp|Q5X4K4|ASTD_LEGPA

sp|B3QZE1|BCHL_CHLT3

sp|P71857|CHSE1_MYCTU

sp|P26179|BCHZ_RHOCB

sp|B7NT31|ASTD_ECO7I

sp|A5UUJ6|BCHN_ROSS1

sp|B4EK77|ACDH1_BURCJ

sp|Q2HEW4|CHG3_CHAGB

sp|C9K7C5|AMT13_ALTAL



sp|Q9KD93|AROE_BACHD

sp|P50271|COX3_DROSI

sp|Q08BA6|ALKB5_DANRE

sp|Q0I1B7|AROE_HAES1

sp|A8WGA0|C27C1_DANRE

sp|A0A2U1Q018|ADH1_ARTAN

sp|Q9ZSY9|C74B2_ARATH

sp|Q6CAB5|CCPR2_YARLI

sp|O64899|C80B1_ESCCA

sp|Q54KG7|ADRO_DICDI

sp|Q9LXM3|C71BZ_ARATH

sp|Q2ST93|AZOR_MYCCT

sp|O48923|C71DA_SOYBN

sp|Q9SHG5|C72C1_ARATH

sp|Q2P3X9|AZOR_XANOM

sp|P63463|AZOR_SALTI

sp|Q43078|C97B1_PEA

sp|B7LM76|ARNA_ESCF3

sp|Q5BEJ5|AFOF_EMENI

sp|Q9FUY7|C79F2_ARATH

sp|A3CZE0|AZOR_SHEB5

sp|Q2FDU6|CRTN_STAA3

sp|P66012|AHPF_STAAM

sp|P48034|AOXA_BOVIN

sp|Q9X5V1|AHPD_STRVD

sp|P16640|CAMA_PSEPU

sp|A3LQW6|CHO2_PICST

sp|Q5HIR6|AHPF_STAAC

sp|P9WGT2|FABG_MYCTO

sp|Q0BN71|GCSPA_FRATO

sp|Q0HTK5|FOLD_SHESR

sp|P50941|FABG_RICPR

sp|Q8E168|FOLD_STRA5

sp|Q72U08|DXR_LEPIC

sp|C4L656|DXR_EXISA

sp|B8DFX8|GCSPB_LISMH

sp|B6J4T8|GCSPB_COXB1

sp|P9WNS0|DXR_MYCTO

sp|Q6LN30|DXR_PHOPR

sp|B8CW61|DXR_HALOH

sp|Q9RCT1|DXR_SYNP6

sp|B8FR31|DXR_DESHD

sp|A5IIP4|DXR_THEP1

sp|Q7W1C4|GCSP_BORPA

sp|Q0HT68|DXR_SHESR

sp|


sp|Q937N7|PRPF_CUPNE

sp|A0KV91|PDXB_SHESA

sp|B1ZRT2|NUOB1_OPITP

sp|A8H7G5|MAO1_SHEPA

sp|Q1CGT4|MAO1_YERPN

sp|A5UZH9|NUOB2_ROSS1

sp|Q5MNI1|LOLP1_EPIUN

sp|P0C0H7|IMDH_STRP1

sp|Q43578|LCYB_TOBAC

sp|O00086|IMDH_CANAX

sp|O15054|KDM6B_HUMAN

sp|Q59011|IMDH_METJA

sp|Q8PAD3|KMO_XANCP

sp|O15550|KDM6A_HUMAN

sp|O15229|KMO_HUMAN

sp|Q38932|LCYE_ARATH

sp|Q6GJQ7|IMDH_STAAR

sp|P0CO49|KMO_CRYNB

sp|Q6GMG5|IMDH1_DANRE

sp|A0A179H0I7|LCSI_PURLI

sp|P69081|LDHA_CHIRA

sp|P26298|LDHD_LACPE

sp|Q9PW07|LDHA_COLLI

sp|Q5FKK7|LDH2_LACAC

sp|Q7SI97|LDH_PLABA

sp|Q57956|KORC_METJA

sp|P06150|LDH_THECA

sp|A4J898|LDH_DESRM

sp|A4QHW5|LDH_CORGB

sp|O27113|KORB_METTH

sp|P56512|LDH1_LACPL

sp|Q6CZ26|LTND_PECAS

sp|Q6MUI1|GUAC_MYCMS

sp|Q3Z5W3|KEFF_SHISS

sp|B2K5P7|KEFG_YERPB

sp|A6T4P6|GUAC_KLEP7

sp|P60560|GUAC_ECOLI

sp|Q8K9U0|GUAC_BUCAP

sp|C4ZRJ8|GUAC_ECOBW

sp|Q82NQ8|IOLG_STRAW

sp|G0RH19|LXR3_HYPJQ

sp|Q2JQB4|PROA_SYNJA

sp|Q5SH02|PROA_THET8

sp|B1NWJ6|NU5C_MANES

sp|Q34313|NU5M_DICDI

sp|Q9M

sp|A9R623|LLDD_YERPG

sp|Q1CGZ1|LLDD_YERPN

sp|A6TFK0|LLDD_KLEP7

sp|Q8PE75|LLDD_XANCP

sp|B4TZU7|LLDD_SALSV

sp|Q55P57|LAC1_CRYNB

sp|A4XYG7|LLDD_PSEMY

sp|Q0T8E3|PDXA_SHIF8

sp|Q9FJD5|LAC17_ARATH

sp|Q99044|LAC1_TRAVI

sp|B4SK42|PDXA_STRM5

sp|Q12729|LAC1_PLEOS

sp|Q02081|LAC4_THACU

sp|P97609|HAIR_RAT

sp|P86327|LACC2_CERUI

sp|Q6YYZ1|LDL2_ORYSJ

sp|Q9BYZ2|LDH6B_HUMAN

sp|Q66EZ4|LSRG_YERPS

sp|A7FMK2|LSRG_YERP3

sp|P97840|LEG9_RAT

sp|B2K4G3|GUAC_YERPB

sp|P57790|KEAP1_RAT

sp|Q3K110|GUAC_STRA1

sp|C1CEK5|GUAC_STRZJ

sp|B7MAG9|KEFF_ECO45

sp|Q92EQ7|IOLA_LISIN

sp|C5CT60|GUAC_VARPS

sp|Q8Y9Y4|IOLA_LISMO

sp|Q1RG92|GUAC_ECOUT

sp|A4FID1|IOLG2_SACEN

sp|Q0KBC7|LTND_CUPNH

sp|Q72WY4|GUAC_BACC1

sp|A7YD35|IOD3_SPAAU

sp|Q09FZ9|NU5C_PLAOC

sp|P34854|NU5M_ANOGA

sp|Q3C1N9|NU5C_NICSY

sp|Q2YKW2|AHPD_BRUA2

sp|Q9UV71|AOX2_CANAX

sp|Q945B5|AOP2V_ARATH

sp|Q96UR9|AOX_MONFR

sp|Q54NS9|AIFA_DICDI

sp|A5GUA8|CHLN_SYNR3

sp|Q9SA86|BBE9_ARATH

sp|Q9FKU9|BBE25_ARATH

sp|Q61419|CMAH_MOUSE

sp|B3Q7C5|

sp|Q8XU98|GCSP_RALSO

sp|Q31N26|DXR_SYNE7

sp|A6SZP5|DXR_JANMA

sp|Q11W68|FABV_CYTH3

sp|Q9K168|DYR_NEIMB

sp|P15093|DYRA_HALVD

sp|Q3T094|ETHE1_BOVIN

sp|Q04800|FRP1_SCHPO

sp|Q6ETL8|GLO12_ORYSJ

sp|B1LDD2|E4PD_ECOSM

sp|A0A0E4AZP0|FSA1_FUSSF

sp|A8APE1|E4PD_CITK8

sp|Q8HXW0|GGLO_PIG

sp|A2TBU0|EASA_EPIFI

sp|P80706|FEDB_AMYME

sp|Q9ZE14|DAPB_RICPR

sp|O94207|EASD_CLAPU

sp|Q8TUV8|GGR2_METKA

sp|Q16CE5|DAPB_ROSDO

sp|A8C7R3|EASA_CLAFS

sp|Q8TVE9|GGR1_METKA

sp|P43127|FRE_VIBHA

sp|B5F744|DAPB_SALA4

sp|A1KV41|GPXA_NEIMF

sp|Q1H537|DCVR_ARATH

sp|Q8L910|GPX4_ARATH

sp|O34215|GADH2_PANCY

sp|P0A9S4|GATD_ECO57

sp|Q4AEH0|GPX4_MACFU

sp|A5JYX5|DHS3_CAEEL

sp|Q67KE6|GRDA_SYMTH

sp|Q54823|DNRQ_STRPE

sp|Q0IRB0|GRC13_ORYSJ

sp|C4ZZR6|CYSH_ECOBW

sp|Q2P0H1|CYSH_XANOM

sp|B7LEI2|CYSI_ECO55

sp|Q82W45|CYSI_NITEU

sp|B7LWN9|CYSI_ESCF3

sp|A6TD47|CYSH_KLEP7

sp|P9WIK2|CYSH_MYCTO

sp|Q11B66|CYSH_CHESB

sp|B7PP17|DRE2_IXOSC

sp|A4SLF8|DAPB_AERS4

sp|A8MF41|DAPB_ALKOO

sp|C6DCZ7|CYSI1_PECCP

sp|Q63D

sp|Q8FJE0|HCP_ECOL6

sp|B8EBN7|HCP_SHEB2

sp|Q64UD5|HCP_BACFR

sp|Q9EUT9|LIMB_RHOER

sp|Q7M826|MFRB_WOLSU

sp|P44001|HISX_HAEIN

sp|P58674|ISPH_NOSS1

sp|Q2RQM7|HISX_RHORT

sp|Q8ESR8|HISX_OCEIH

sp|B7UMW5|HCP_ECO27

sp|O94230|IDH2_KLULA

sp|Q47QS9|HISX_THEFY

sp|A7MPE1|AROE_CROS8

sp|P11947|COX1_TETPY

sp|A0KR69|AROE_SHESA

sp|C1CEW5|AROE_STRZJ

sp|P56119|AROE_HELPY

sp|Q96062|COX1_RHIUN

sp|P29163|COX2_PNECA

sp|Q4VKV0|ALD_METSP

sp|O67049|AROE_AQUAE

sp|P54115|ALDH6_YEAST

sp|A0L3K8|AROE_MAGMM

sp|Q8EPU9|AROE_OCEIH

sp|P26858|COX3_MARPO

sp|Q730K1|AROE_BACC1

sp|Q3SFZ9|AROE_THIDA

sp|Q04442|COX3_BACPE

sp|P80980|COXN_THUOB

sp|Q9V4T3|C4AD1_DROME

sp|P49383|ADH2_KLULA

sp|P0DKI2|C76AN_BETVU

sp|Q9LVZ3|ADS32_ARATH

sp|P39451|ADHP_ECOLI

sp|P22797|ADH1_PELPE

sp|P9WQC7|ADHB_MYCTU

sp|Q7XU38|C87A3_ORYSJ

sp|Q6C0Z6|CCPR_YARLI

sp|Q99643|C560_HUMAN

sp|Q2YV20|AZOR_STAAB

sp|Q43147|C85A1_SOLLC

sp|Q5CCK1|C90A4_ORYSJ

sp|Q7W1T9|ASPD1_BORPA

sp|Q28056|ASPH_BOVIN

sp|P43013|AZOR_HAEIN

sp|A9N5

sp|Q9I5F3|AZOR1_PSEAE

sp|A1SB44|AZOR_SHEAM

sp|A1L515|ASPH1_BOVIN

sp|Q9P7R0|ARO1_SCHPO

sp|B5R1R2|CAIA_SALEP

sp|B4T6J8|CAIA_SALNS

sp|Q53TZ2|ARAA_AZOBR

sp|P26830|AHPC_BACAY

sp|C5G8R4|ARO1_AJEDR

sp|C0ZYQ9|AHPD_RHOE4

sp|Q945B4|AOP3L_ARATH

sp|Q9T4F6|CHLN_NEPOL

sp|A9BEG8|CHLN_PROM4

sp|B0C7T1|CHLN_ACAM1

sp|O49814|BCH2_CAPAN

sp|Q62LN5|ASTD_BURMA

sp|B9LKM2|BCHL_CHLSY

sp|Q27793|DRTS_TRYCR

sp|A8LQ27|BCHN_DINSH

sp|Q6FCQ0|ASTD_ACIAD

sp|Q3AWT5|CHLB_SYNS9

sp|Q95666|CHLB_PINST

sp|P14062|6PGD_SALTY

sp|Q9PKX7|6PGD_CHLMU

sp|G3KLH4|ADAC_ASPNG

sp|Q3SZB4|ACADM_BOVIN

sp|A5V4U1|FOLD1_SPHWW

sp|B0B9I6|FOLD_CHLT2

sp|P00360|G3P1_YEAST

sp|Q6FJ14|DUS3_CANGA

sp|Q7XP65|G2OX6_ORYSJ

sp|A1CNY3|DUS3_ASPCL

sp|O43026|G3P2_SCHPO

sp|Q8EAJ0|DUSA_SHEON

sp|B8GGP5|G1PDH_METPE

sp|Q8K979|FOLD_BUCAP

sp|Q8LEA2|G2OX1_ARATH

sp|P17329|G3P2_CAEEL

sp|Q88KX0|DUSA_PSEPK

sp|Q2GLF2|FOLD_ANAPZ

sp|Q8ZJ14|DUSA_YERPE

sp|Q3AAY6|FOLD_CARHZ

sp|P33642|GLYOX_PSEAE

sp|Q9LSV0|GLYR1_ARATH

sp|A5IBF0|FOLD_LEGPC




sp|Q8PXL6|DAPB_METMA

sp|Q11190|ETFD_CAEEL

sp|O46119|FRIH_ECHGR

sp|Q8K385|FRRS1_MOUSE

sp|B5FUI3|E4PD_SALDC

sp|A1RN69|E4PD_SHESW

sp|B5QXJ7|E4PD_SALEP

sp|Q8MIP0|FRIH_HORSE

sp|Q07103|FDH_NEUCR

sp|M1BYJ7|ENDB1_SOLTU

sp|Q7V7D4|DAPB_PROMM

sp|A0A0N0DCA3|DEP5_FUSLA

sp|B8B4W4|GLO19_ORYSI

sp|A8C7R6|EASC_CLAFS

sp|D5IGG6|FDR_SPHSX

sp|P00380|DYR_ENTFC

sp|Q5LLT7|DAPB_RUEPO

sp|Q2NER9|GGR3_METST

sp|D4AK47|EASE_ARTBC

sp|P19920|DCMM_OLICO

sp|A4Q8F7|GH109_ELIME

sp|Q6YT73|GLO5_ORYSJ

sp|A6LB54|GH109_PARD8

sp|Q9ZLI1|FTN_HELPJ

sp|A4FN60|GH109_SACEN

sp|S3D9F9|GLOC_GLAL2

sp|Q4WAW8|FTME_ASPFU

sp|E1WS50|FTN_BACF6

sp|Q8CNP5|FTN_STAES

sp|S0DQ98|FSR4_GIBF5

sp|Q8BGW1|FTO_MOUSE

sp|D4GYI2|GLPA1_HALVD

sp|Q65GY3|FENR1_BACLD

sp|Q1XDA1|FTRC_PYRYE

sp|Q5R7X0|FTO_PONAB

sp|A0R951|FENR1_BACAH

sp|Q8ETS1|FENR1_OCEIH

sp|A0LXL9|FENR1_GRAFK

sp|Q55389|FTRC_SYNY3

sp|A7GKN4|FENR1_BACCN

sp|B7MG07|GLPB_ECO45

sp|Q81XS0|FENR2_BACAN

sp|A8FYT6|DAPB_SHESH

sp|B5ET22|GLPB_ALIFM

sp|A8ADX8|GLPB_CITK8



sp|P77437|HYFF_ECOLI

sp|Q5WDH8|HISX_BACSK

sp|Q5LDB2|HCP_BACFN

sp|Q7M827|MFRA_WOLSU

sp|P0CI31|HCAB_ECOLI

sp|Q82W26|HISX_NITEU

sp|Q32DZ1|HCP_SHIDS

sp|B7JR48|HDOX_BACC0

sp|B4TRQ3|HCP_SALSV

sp|Q5L782|HEM1_CHLAB

sp|Q6AB02|HEM1_CUTAK

sp|A1K442|HEM1_AZOSB

sp|C3MJ24|HEM1_SULIL

sp|A0Q2B3|HEM1_CLONN

sp|Q9SLK0|ICDHX_ARATH

sp|A9WPH1|HEM1_RENSM

sp|Q4WR19|HELC_ASPFU

sp|A9L2D5|HEM1_SHEB9

sp|Q2KHZ4|MIA40_BOVIN

sp|A4HVU6|JBP2_LEIIN

sp|A3N0L8|MNMC_ACTP2

sp|B0BPE2|MNMC_ACTPJ

sp|A7H2B2|MNMC_CAMJD

sp|A2QK68|KTND_ASPNC

sp|A6VMR0|MNMC_ACTSZ

sp|Q62FV6|MNMC_BURMA

sp|Q6LNT9|MNMC_PHOPR

sp|Q8F0E4|MNMC_LEPIN

sp|Q8DB38|MNMC_VIBVU

sp|Q8ECR0|MNMC_SHEON

sp|A3M2P8|NUOA_ACIBT

sp|Q6G0J2|LLDD_BARQU

sp|B0V6L1|LLDD_ACIBY

sp|A3N3E5|LLDD_ACTP2

sp|B1IZI5|LLDD_ECOLC

sp|A5UFG9|LLDD_HAEIG

sp|B7L725|LLDD_ECO55

sp|C0Q1T7|LLDD_SALPC

sp|A4TKI4|LLDD_YERPP

sp|Q8FCB1|LLDD_ECOL6

sp|Q2R0L2|LAC19_ORYSJ

sp|Q62IN5|NUOA_BURMA

sp|Q03966|LAC1_CRYPA

sp|C3LWP7|LLDD_VIBCM

sp|Q12541|LAC1_AGABI

sp|B4T986|

sp|Q0DK35|GRXC7_ORYSJ

sp|P80873|GS39_BACSU

sp|A9R691|GPDA_YERPG

sp|P51110|DFRA_VITVI

sp|P80874|GS69_BACSU

sp|Q3MDZ9|GPDA_TRIV2

sp|P35571|GPDM_RAT

sp|Q9WWR3|CYOC_PSEPU

sp|O67716|DHAS_AQUAE

sp|P57523|DHAS_BUCAI

sp|O31440|CYPC_BACSU

sp|Q2NI00|DHQS_METST

sp|O76062|ERG24_HUMAN

sp|A0A0U2V7I8|CYP3_USTMD

sp|Q9LDR4|ERG24_ARATH

sp|P68434|ERG3_MYCBO

sp|P83695|DPS_BREBE

sp|P0C935|DPS_PORGI

sp|Q5PG12|DPS_SALPA

sp|Q6Y1R6|DPS_PROHU

sp|P43313|DPS_HELPY

sp|Q8XF78|DPS_SALTI

sp|A8XKG6|DPYD_CAEBR

sp|Q7XKU5|DAO_ORYSJ

sp|Q9C7S7|ERO1_ARATH

sp|P0AEA9|CYSG_ECO57

sp|B3GZA0|CYSG_ACTP7

sp|Q65T49|CYSG_MANSM

sp|B0BTC2|CYSG_ACTPJ

sp|A9MME5|CYSG_SALAR

sp|Q3KA85|CYSG_PSEPF

sp|Q15YU1|CYSG_PSEA6

sp|Q1QAX7|CYSG_PSYCK

sp|C6UCN7|CYSI_ECOBR

sp|P17846|CYSI_ECOLI

sp|A0BV78|DRE21_PARTE

sp|O32213|CYSI_BACSU

sp|B1MN34|CYSH_MYCA9

sp|B5QW35|CYSI_SALEP

sp|Q6FZ13|DAPB_BARQU

sp|Q4JK73|DHB11_MACFA

sp|P19574|DHF5_ERWCI

sp|Q9ZL20|DHOM_HELPJ

sp|P00372|DHML_METEA

sp|P00365|DHE2_NEUCR

sp|P28998|

sp|Q9LPC3|BBE1_ARATH

sp|Q944X7|AOP3V_ARATH

sp|P53109|AIM14_YEAST

sp|P84729|BAS1_PINST

sp|B5F7J0|ASTD_SALA4

sp|A5UUJ4|BCHL_ROSS1

sp|Q2ISY7|BCHB_RHOP2

sp|Q7ADE6|ASTD_ECO57

sp|Q7X2C7|BCHE_RUBGE

sp|A8H8E0|ASTD_SHEPA

sp|P26277|BCHZ_ROSDO

sp|B1XGK7|ASTD_ECODH

sp|Q81M95|ARGC_BACAN

sp|A9MAG5|ARGC_BRUC2

sp|B0RHD0|ARGC_CLAMS

sp|Q9A8H5|ARGC_CAUVC

sp|Q9RY72|ARGC_DEIRA

sp|C1DHY0|ARGC_AZOVD

sp|B8E0N8|ARGC_DICTD

sp|A0QHB4|ARGC_MYCA1

sp|O26934|ARGC_METTH

sp|P9WPZ9|ARGC_MYCTU

sp|A0B6F6|ARGC_METTP

sp|A4G1T4|ARGC_HERAR

sp|O87890|ARGC_NOSEL

sp|O87874|BCRC_THAAR

sp|B3E0U0|ARGC_METI4

sp|A0AJ40|ARGC_LISW6

sp|Q5ATH2|APDB_EMENI

sp|Q72RJ9|ARGC_LEPIC

sp|P44411|BCP_HAEIN

sp|Q9I5Q9|ARGC_PSEAE

sp|B1JE24|ARGC_PSEPW

sp|Q9VAI1|CIA30_DROME

sp|B8HGC3|ARGC_PSECP

sp|Q889Z3|ARGC_PSESM

sp|Q0MQ84|CIA30_PANTR

sp|C3K2Z7|ARGC_PSEFS

sp|Q1MIP7|ARGC_RHIL3

sp|Q9LQI7|CIA30_ARATH

sp|Q982X3|ARGC_RHILO

sp|A4FKC8|ARGC_SACEN

sp|A6UZG3|ARGC_PSEA7

sp|B5ZXZ1|ARGC_RHILW

sp|A5VXM5|ARGC_PSEP1

sp|A1Y

sp|V3TQ67|FRDA_SERS3

sp|P53573|ETFA_BRADU

sp|Q2KIG0|ETFD_BOVIN

sp|P0AEN2|FRE_ECOL6

sp|Q666Q0|E4PD_YERPS

sp|A7MJQ2|E4PD_CROS8

sp|A1A835|FRMA_ECOK1

sp|P06131|FDHA_METFO

sp|Q10CE4|GLO1_ORYSJ

sp|Q9FM82|GGLO4_ARATH

sp|Q2NFF7|GGR2_METST

sp|O27753|GGR2_METTH

sp|A8C7R7|EASD_CLAFS

sp|O57920|GGR_PYRHO

sp|P19917|DCMM_PSECH

sp|A9MR41|DAPB_SALAR

sp|P19915|DCMS_HYDPS

sp|Q9LJH5|GLO4_ARATH

sp|B5R1Q4|DAPB_SALEP

sp|B8B7C5|GLO5_ORYSI

sp|Q4WAX0|FTMG_ASPFU

sp|Q01772|GLOX_PHACH

sp|Q6G840|FTN_STAAS

sp|P0A999|FTNA_ECOL6

sp|B1KNF7|GH109_SHEWM

sp|S3E7Q2|GLOE_GLAL2

sp|Q01S58|GH109_SOLUE

sp|A9L0Q8|DAPB_SHEB9

sp|A3QGV8|DAPB_SHELP

sp|Q73EA6|FENR1_BACC1

sp|O31475|FENR1_BACSU

sp|P0A9C0|GLPA_ECOLI

sp|A8H756|DAPB_SHEPA

sp|B0BTN3|GLPB_ACTPJ

sp|P41344|FENR1_ORYSJ

sp|Q8D7Y4|GLPB_VIBVU

sp|B5YVQ3|GHRA_ECO5E

sp|A4W948|GHRA_ENT38

sp|A9C3H6|FENR_DELAS

sp|Q8Z7M6|GHRA_SALTI

sp|A5FJT9|FENR_FLAJ1

sp|A8AX95|DAPB_STRGC

sp|B1VXZ4|DAPB_STRGG

sp|B4SHT7|DAPB_STRM5

sp|B3WCB2|FENR_LACCB

sp|A4IS

## Create pandas dataframe using seqID & sequences

In [14]:
# create dataframe of 2000 additional isomerase sequences
#d_iso = {'name':seq_ids,'sequence':seqs}
d_1 = {'name':seqID,'sequence':t_seqs}
df = pd.DataFrame(d_1)
df.head()

Unnamed: 0,name,sequence
0,sp|Q8L540|LTO1_ARATH,MMARFVSVSSCQFHFGFREVSPPSVTSYPRRFEVSDRRFPAIPIKC...
1,sp|Q9AL95|NROR_CLOAB,MKSTKILILGAGPAGFSAAKAALGKCDDITMINSEKYLPYYRPRLN...
2,sp|P55931|ETFD_PIG,MMVPLAKLASPAYQCFHALKIKKNYLPLCATRWSSTCKVPRITTHY...
3,sp|Q16134|ETFD_HUMAN,MLVPLAKLSCLAYQCFHALKIKKNYLPLCATRWSSTSTVPRITTHY...
4,sp|Q9NZC7|WWOX_HUMAN,MAALRYAGLDDTDSEDELPPGWEERTTKDGWVYYANHTEEKTQWEH...


In [15]:
d_2 = {'name':seq_ids,'sequence':seqs}

df2 = pd.DataFrame(d_2)
df2.head()

Unnamed: 0,name,sequence
0,sp|Q8L540|LTO1_ARATH,MMARFVSVSSCQFHFGFREVSPPSVTSYPRRFEVSDRRFPAIPIKC...
1,sp|Q9AL95|NROR_CLOAB,MKSTKILILGAGPAGFSAAKAALGKCDDITMINSEKYLPYYRPRLN...
2,sp|P55931|ETFD_PIG,MMVPLAKLASPAYQCFHALKIKKNYLPLCATRWSSTCKVPRITTHY...
3,sp|Q16134|ETFD_HUMAN,MLVPLAKLSCLAYQCFHALKIKKNYLPLCATRWSSTSTVPRITTHY...
4,sp|Q9NZC7|WWOX_HUMAN,MAALRYAGLDDTDSEDELPPGWEERTTKDGWVYYANHTEEKTQWEH...


In [16]:
df.shape

(46750, 2)

In [17]:
df2.shape

(46750, 2)

In [18]:
# remove non-coding amino acid letters like X,B,U
df['sequence'] = df['sequence'].str.replace('U','G')
df['sequence'] = df['sequence'].str.replace('X','G')
df['sequence'] = df['sequence'].str.replace('B','G')

In [19]:
# remove non-coding amino acid letters like X,B,U
df2['sequence'] = df2['sequence'].str.replace('U','G')
df2['sequence'] = df2['sequence'].str.replace('X','G')
df2['sequence'] = df2['sequence'].str.replace('B','G')

In [20]:
#df['MW'] = df['sequence'].apply(lambda seq: ProteinAnalysis(seq).molecular_weight())
#df.head()

In [21]:
#df2['MW'] = df2['sequence'].apply(lambda seq: ProteinAnalysis(seq).molecular_weight())
#df2.head()

In [22]:
# Create 2 columns for the dataframe
df['classification'] = 'OXIDOREDUCTASE'
df['type'] = 5

df.head(10)

Unnamed: 0,name,sequence,classification,type
0,sp|Q8L540|LTO1_ARATH,MMARFVSVSSCQFHFGFREVSPPSVTSYPRRFEVSDRRFPAIPIKC...,OXIDOREDUCTASE,5
1,sp|Q9AL95|NROR_CLOAB,MKSTKILILGAGPAGFSAAKAALGKCDDITMINSEKYLPYYRPRLN...,OXIDOREDUCTASE,5
2,sp|P55931|ETFD_PIG,MMVPLAKLASPAYQCFHALKIKKNYLPLCATRWSSTCKVPRITTHY...,OXIDOREDUCTASE,5
3,sp|Q16134|ETFD_HUMAN,MLVPLAKLSCLAYQCFHALKIKKNYLPLCATRWSSTSTVPRITTHY...,OXIDOREDUCTASE,5
4,sp|Q9NZC7|WWOX_HUMAN,MAALRYAGLDDTDSEDELPPGWEERTTKDGWVYYANHTEEKTQWEH...,OXIDOREDUCTASE,5
5,sp|Q9UHB4|NDOR1_HUMAN,MPSPQLLVLFGSQTGTAQDVSERLGREARRRRLGCRVQALDSYPVV...,OXIDOREDUCTASE,5
6,sp|Q6NPS8|NDOR1_ARATH,MGEKQRKLLVLYASQTGNALDAAERIGREAERRGLPASVVSTDEFD...,OXIDOREDUCTASE,5
7,sp|Q12181|NDOR1_YEAST,MSSSKKIVILYGSETGNAHDFATILSHRLHRWHFSHTFCSIGDYDP...,OXIDOREDUCTASE,5
8,sp|Q921G7|ETFD_MOUSE,MLVRLTKLSCPAYHWFHALKIKKCLPLCAPRCSSTSAVPQITTHYT...,OXIDOREDUCTASE,5
9,sp|P37769|KDUD_ECOLI,MILSAFSLEGKVAVVTGCDTGLGQGMALGLAQAGCDIVGINIVEPT...,OXIDOREDUCTASE,5


In [23]:
df.shape

(46750, 4)

In [24]:
# remove the duplicate protein sequences
df = df.drop_duplicates(subset='sequence', keep="first")
df.shape

(39793, 4)

In [25]:
df.head(10)

Unnamed: 0,name,sequence,classification,type
0,sp|Q8L540|LTO1_ARATH,MMARFVSVSSCQFHFGFREVSPPSVTSYPRRFEVSDRRFPAIPIKC...,OXIDOREDUCTASE,5
1,sp|Q9AL95|NROR_CLOAB,MKSTKILILGAGPAGFSAAKAALGKCDDITMINSEKYLPYYRPRLN...,OXIDOREDUCTASE,5
2,sp|P55931|ETFD_PIG,MMVPLAKLASPAYQCFHALKIKKNYLPLCATRWSSTCKVPRITTHY...,OXIDOREDUCTASE,5
3,sp|Q16134|ETFD_HUMAN,MLVPLAKLSCLAYQCFHALKIKKNYLPLCATRWSSTSTVPRITTHY...,OXIDOREDUCTASE,5
4,sp|Q9NZC7|WWOX_HUMAN,MAALRYAGLDDTDSEDELPPGWEERTTKDGWVYYANHTEEKTQWEH...,OXIDOREDUCTASE,5
5,sp|Q9UHB4|NDOR1_HUMAN,MPSPQLLVLFGSQTGTAQDVSERLGREARRRRLGCRVQALDSYPVV...,OXIDOREDUCTASE,5
6,sp|Q6NPS8|NDOR1_ARATH,MGEKQRKLLVLYASQTGNALDAAERIGREAERRGLPASVVSTDEFD...,OXIDOREDUCTASE,5
7,sp|Q12181|NDOR1_YEAST,MSSSKKIVILYGSETGNAHDFATILSHRLHRWHFSHTFCSIGDYDP...,OXIDOREDUCTASE,5
8,sp|Q921G7|ETFD_MOUSE,MLVRLTKLSCPAYHWFHALKIKKCLPLCAPRCSSTSAVPQITTHYT...,OXIDOREDUCTASE,5
9,sp|P37769|KDUD_ECOLI,MILSAFSLEGKVAVVTGCDTGLGQGMALGLAQAGCDIVGINIVEPT...,OXIDOREDUCTASE,5


## Save dataframe of sequences as a csv file

In [27]:
df.to_csv('uniprot-oxidoreductase-Feb1.csv',index = None)

proteins = pd.read_csv('uniprot-oxidoreductase-Feb1.csv')

proteins.head()

Unnamed: 0,name,sequence,classification,type
0,sp|Q8L540|LTO1_ARATH,MMARFVSVSSCQFHFGFREVSPPSVTSYPRRFEVSDRRFPAIPIKC...,OXIDOREDUCTASE,5
1,sp|Q9AL95|NROR_CLOAB,MKSTKILILGAGPAGFSAAKAALGKCDDITMINSEKYLPYYRPRLN...,OXIDOREDUCTASE,5
2,sp|P55931|ETFD_PIG,MMVPLAKLASPAYQCFHALKIKKNYLPLCATRWSSTCKVPRITTHY...,OXIDOREDUCTASE,5
3,sp|Q16134|ETFD_HUMAN,MLVPLAKLSCLAYQCFHALKIKKNYLPLCATRWSSTSTVPRITTHY...,OXIDOREDUCTASE,5
4,sp|Q9NZC7|WWOX_HUMAN,MAALRYAGLDDTDSEDELPPGWEERTTKDGWVYYANHTEEKTQWEH...,OXIDOREDUCTASE,5


In [28]:
proteins.shape

(39793, 4)