In [1]:
import pandas as pd
import numpy as np

In [2]:
from Bio.Seq import Seq
from Bio import SeqIO
#from Bio.Alphabet import IUPAC
import sys
# import pyensembl
import os
#https://towardsdatascience.com/a-simple-guide-to-command-line-arguments-with-argparse-6824c30ab1c3
import warnings
warnings.filterwarnings("ignore")
from Bio.SeqUtils import seq3

In [3]:
# SK: Dictionary of protein names to sequences
proteins = {}
for record in SeqIO.parse("../raw_files/gencode.v36.pc_translations.fa", "fasta"):
    name = record.id.split("|")[1].split(".")[0]
    proteins[name] = str(record.seq)
    

# SK: Dictionary of protein names to CDS dna transcript

dna_transcripts = {}
for record in SeqIO.parse("../raw_files/gencode.v36.pc_transcripts.fa", "fasta"):
	name = record.id.split("|")[0].split(".")[0]
	record_c = record.id.split("|")
	for i in record_c:
		if "CDS" in i:
			coords = i.replace("CDS:","")
	start = int(coords.split("-")[0])
	end = int(coords.split("-")[1])
	dna_seq = str(record.seq)[start-1:end]
	dna_transcripts[name] = dna_seq
    
uniprotID_ENST_mapping = pd.read_csv("../../data/SFARI_TFs_with_ENST.csv")
uniprotID_ENST_mapping = uniprotID_ENST_mapping[["uniprotID", "ENST"]]
uniprotID_ENST_mapping["ENST"] = uniprotID_ENST_mapping["ENST"].str.split(".").str[0]
uniprotID_ENST_mapping_dict= dict(zip(uniprotID_ENST_mapping["uniprotID"],uniprotID_ENST_mapping["ENST"]))
uniprotID_ENST_mapping_dict['O60479'] = 'ENST00000434704'

# Plan:
1. Use cds coordinates, create df with genomic coordinates as one column and corresponding nt as second column
2. Use position in variant file to resplace the value in the nt column
3. Translate the nt column

In [4]:
directory = "../outputs/mutations/domains_bed_format/"
files = os.listdir(directory)

In [17]:
def save_variant_fasta(uniprotID):
    if uniprotID in uniprotID_ENST_mapping_dict.keys():
        # finding ENST corresponding to uniprotID, reading in missense SNP variants
        ENST = uniprotID_ENST_mapping_dict[uniprotID]
        variants = pd.read_csv("../outputs/mutations/domains_expanded_iWES_v2_variants_snv_classified/" + ENST + ".bed", sep = "\t", header = None)
        # display(variants)
        no_syn_variants = variants[variants[22] == "No-Syn"]
        # no_syn_variants = variants[variants[3] == "AD"] SK: 3/14- do I need this
        
        # Finding strand
        strand = no_syn_variants[13].iloc[1]

        # Finding wt nucleotide sequence of transcript, and corresponding AA sequence
        wt_nt_seq = dna_transcripts[ENST]
        wt_AA_seq = str(Seq(wt_nt_seq).translate())
        display(wt_AA_seq)

        if strand == "-":
            wt_nt_seq = str(Seq(wt_nt_seq).complement())

        nt_df = pd.DataFrame({"nt" : [*wt_nt_seq]})
        nt_df["pred_prot_pos"] = [i for i in range(1, int((len(nt_df)) / 3 + 1)) for _ in range(3)]
    
        cds_bed = pd.read_csv("../outputs/mutations/cds_bed_format/" + ENST, sep = "\t", header = None)
        cds_bed = cds_bed[[1, 2]]
        cds_bed[1] += 1

        if strand == "-":
            cds_bed = cds_bed.sort_values(by = 1, ascending = False)
        else:
            cds_bed = cds_bed.sort_values(by = 1, ascending = True)

        range_col = []
        for start, end in zip(cds_bed[1], cds_bed[2]):
            if strand == "-":
                range_col += list(range(end, start - 1, -1))
            else:
                range_col += list(range(start, end + 1))

        nt_df["gen_pos"] = range_col
        nt_df = nt_df.set_index("gen_pos")


        # Checking 
        for i in no_syn_variants.index:
            pos = no_syn_variants[2].loc[i]
            sfari_nt = no_syn_variants[no_syn_variants[2] == pos][17].loc[i]
            nt_df_nt = nt_df.loc[pos].iloc[0]
            if sfari_nt != nt_df_nt:
                print("mismatch!")

        # display(no_syn_variants)
        no_syn_variants["name"] = "g." + no_syn_variants[0].astype(str) + "."+ no_syn_variants[2].astype(str) + no_syn_variants[17] + ">" + no_syn_variants[18]
        
        names = []
        TF_seqs = []
        prot_positions = []


        for i in no_syn_variants.index:
            var_pos = no_syn_variants[2].loc[i]
            wt_nt = no_syn_variants[17].loc[i]
            var_nt = no_syn_variants[18].loc[i]
            g_name = no_syn_variants["name"].loc[i]

            nt_df_var_copy = nt_df.copy(deep = True)

            if nt_df_var_copy.at[var_pos, "nt"] == wt_nt:
                nt_df_var_copy.at[var_pos, "nt"] = var_nt
            else:
                print(uniprotID + " mismatch!")

            new_nt_seq = "".join(nt_df_var_copy["nt"])
            if strand == "-":
                new_nt_seq = Seq(new_nt_seq).complement()
            new_AA_seq = str(Seq(new_nt_seq).translate())

            # prot_pos = 0
            for i in range(len(new_AA_seq)):
                if new_AA_seq[i] != wt_AA_seq[i]:
                    prot_change_descrip = "p." + seq3(wt_AA_seq[i]) + str(i + 1)+ seq3(new_AA_seq[i])
                    prot_positions.append(i + 1)
                    names.append(g_name + "(" + prot_change_descrip + ")")
                    TF_seqs.append(new_AA_seq)

                # prot_pos = i + 1    

                
            
        variant_TF_seqs_df = pd.DataFrame({"name": names,
                     "TF_seq": TF_seqs, 
                    "prot_pos":prot_positions})
        variant_TF_seqs_df

        def return_AD_seqs(AD_start, AD_end):
            print(AD_start)
            print(AD_end)
            #display(variant_TF_seqs_df)
            within_AD = variant_TF_seqs_df[(AD_start <= variant_TF_seqs_df["prot_pos"]) & (variant_TF_seqs_df["prot_pos"] <= AD_end)]
            #display(within_AD)
            within_AD["AD_seq"] = within_AD["TF_seq"].str[AD_start - 1: AD_end]
            #display(within_AD)
            return within_AD

        # return_AD_seqs(340, 477)

        known_ADs = pd.read_csv("../../output/known_ADs_considering_isoforms_and_canonical.csv")
        AD_rows = known_ADs[known_ADs["uniprotID"] == uniprotID]
        AD_rows

        gene = AD_rows["Gene"].iloc[0].replace(" ", "").replace("/", "-")

        def save_fasta(start, end):
            df = return_AD_seqs(start, end)
            display(df)
            ofile = open("../outputs/AD_variant_fasta/" + gene + "_" + uniprotID + "_" + "AD_" + str(start) + "-" + str(end), "w")
            for i in df.index:
                ofile.write(">" + df["name"].loc[i] + "\n" + df["AD_seq"].loc[i] + "\n")
            ofile.close()
            
    
        display(AD_rows)
        for start, end in zip(AD_rows["Start"], AD_rows["End"]):
            save_fasta(start, end)
        
        return variant_TF_seqs_df


In [18]:
for uniprotID in list(files):
    save_variant_fasta(uniprotID)

'MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEKRMSKDEERAVKDELLGEKPEIKQKWASRLLAKLRKDIRPEFREDFVLTITGKKPPCCVLSNPDQKGKIRRIDCLRQADKVWRLDLVMVILFKGIPLESTDGERLYKSPQCSNPGLCVQPHHIGVTIKELDLYLAYFVHTPESGQSDSSNQQGDADIKPLPNGHLSFQDCFVTSGVWNVTELVRVSQTPVATASGPNFSLADLESPSYYNINQVTLGRRSITSPPSTSTTKRPKSIDDSEMESPVDDVFYPGTGRSPAAGSSQSSGWPNDVDAGPASLKKSGKLDFCSALSSQGSSPRMAFTHHPLPVLAGVRPGSPRATASALHFPSTSIIQQSSPYFTHPTIRYHHHHGQDSLKEFVQFVCSDGSGQATGQPNGSGQGKVPGSFLLPPPPPVARPVPLPMPDSKSTSTAPDGAALTPPSPSFATTGASSANRFVSIGPRDGNFLNIPQQSQSWFL*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
403,NFIX,414,502,Q14938,[['Q14938']],,Q14938,"PMID: 21189253, Soto",TF,QATGQPNGSGQGKVPGSFLLPPPPPVARPVPLPMPDSKSTSTAPDG...,


414
502


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
31,g.19.13081853C>G(p.Gln418Glu),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,418,QATGEPNGSGQGKVPGSFLLPPPPPVARPVPLPMPDSKSTSTAPDG...
32,g.19.13088001G>A(p.Gly423Ser),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,423,QATGQPNGSSQGKVPGSFLLPPPPPVARPVPLPMPDSKSTSTAPDG...
33,g.19.13088017C>T(p.Pro428Leu),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,428,QATGQPNGSGQGKVLGSFLLPPPPPVARPVPLPMPDSKSTSTAPDG...
34,g.19.13088038C>T(p.Pro435Leu),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,435,QATGQPNGSGQGKVPGSFLLPLPPPVARPVPLPMPDSKSTSTAPDG...
35,g.19.13088049G>A(p.Val439Met),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,439,QATGQPNGSGQGKVPGSFLLPPPPPMARPVPLPMPDSKSTSTAPDG...
36,g.19.13088058C>G(p.Pro442Ala),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,442,QATGQPNGSGQGKVPGSFLLPPPPPVARAVPLPMPDSKSTSTAPDG...
37,g.19.13088092C>T(p.Thr453Ile),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,453,QATGQPNGSGQGKVPGSFLLPPPPPVARPVPLPMPDSKSISTAPDG...
38,g.19.13088097A>G(p.Thr455Ala),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,455,QATGQPNGSGQGKVPGSFLLPPPPPVARPVPLPMPDSKSTSAAPDG...
39,g.19.13088104C>T(p.Pro457Leu),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,457,QATGQPNGSGQGKVPGSFLLPPPPPVARPVPLPMPDSKSTSTALDG...
40,g.19.13088118T>A(p.Leu462Met),MYSPYCLTQDEFHPFIEALLPHVRAFSYTWFNLQARKRKYFKKHEK...,462,QATGQPNGSGQGKVPGSFLLPPPPPVARPVPLPMPDSKSTSTAPDG...


'MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLHATQHYGAHAPHPNVMPASMGSAVNDALKRDKDAIYGHPLFPLLALVFEKCELATCTPREPGVAGGDVCSSDSFNEDIAVFAKQVRAEKPLFSSNPELDNLMIQAIQVLRFHLLELEKVHELCDNFCHRYISCLKGKMPIDLVIDERDGSSKSDHEELSGSSTNLADHNPSSWRDHDDATSTHSAGTPGPSSGGHASQSGDNSSEQGDGLDNSVASPGTGDDDDPDKDKKRQKKRGIFPKVATNIMRAWLFQHLTHPYPSEEQKKQLAQDTGLTILQVNNWFINARRRIVQPMIDQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDGQQHMGIRPAGLQSMPGDYVSQGGPMGMSMAQPSYTPPQMTPHPTQLRHGPPMHSYLPSHPHHPAMMMHGGPPTHPGMTMSAQSPTMLNSVDPNVGGQVMDIHAQ*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
333,MEIS2,340,477,O14770,[['O14770']],,O14770,"activation_regions.txt, GSL",TF,DQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDGQQHMGIRPAGLQ...,


340
477


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
5,g.15.36892177T>C(p.Gln477Arg),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,477,DQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDGQQHMGIRPAGLQ...
6,g.15.36892183T>C(p.His475Arg),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,475,DQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDGQQHMGIRPAGLQ...
7,g.15.36892191C>T(p.Met472Ile),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,472,DQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDGQQHMGIRPAGLQ...
8,g.15.36892192A>G(p.Met472Thr),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,472,DQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDGQQHMGIRPAGLQ...
9,g.15.36892197C>A(p.Gln470His),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,470,DQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDGQQHMGIRPAGLQ...
...,...,...,...,...
67,g.15.36895169T>G(p.Met377Leu),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,377,DQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDGQQHLGIRPAGLQ...
68,g.15.36895181C>T(p.Gly373Ser),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,373,DQSNRAGFLLDPSVSQGAAYSPEGQPMGSFVLDSQQHMGIRPAGLQ...
69,g.15.36895229C>T(p.Ala357Thr),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,357,DQSNRAGFLLDPSVSQGTAYSPEGQPMGSFVLDGQQHMGIRPAGLQ...
70,g.15.36895247G>A(p.Pro351Ser),MAQRYDELPHYGGMDGVGVPASMYGDPHAPRPIPPVHHLNHGPPLH...,351,DQSNRAGFLLDSSVSQGAAYSPEGQPMGSFVLDGQQHMGIRPAGLQ...


'MEQKPSKVECGSDPEENSARSPDGKRKRKNGQCSLKTSMSGYIPSYLDKDEQCVVCGDKATGYHYRCITCEGCKGFFRRTIQKNLHPTYSCKYDSCCVIDKITRNQCQLCRFKKCIAVGMAMDLVLDDSKRVAKRKLIEQNRERRRKEEMIRSLQQRPEPTPEEWDLIHIATEAHRSTNAQGSHWKQRRKFLPDDIGQSPIVSMPDGDKVDLEAFSEFTKIITPAITRVVDFAKKLPMFSELPCEDQIILLKGCCMEIMSLRAAVRYDPESDTLTLSGEMAVKREQLKNGGLGVVSDAIFELGKSLSAFNLDDTEVALLQAVLLMSTDRSGLLCVDKIEKSQEAYLLAFEHYVNHRKHNIPHFWPKLLMKEREVQSSILYKGAAAEGRPGGSLGVHPEGQQLLGMHVVQGPQVRQLEQQLGEAGSLQGPVLQHQSPKSPQQRLLELLHRSGILHARAVCGEDDSSEADSPSSSEEEPEVCEDLAGNAASP*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
662,THRA,1,52,P10827,"[['P10827', 'P10827-2', 'P10827-3', 'P10827-4']]",,P10827,"PMID: 27347890, Soto",TF,MEQKPSKVECGSDPEENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...,


1
52


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
18,g.17.40074491G>T(p.Met1Ile),IEQKPSKVECGSDPEENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...,1,IEQKPSKVECGSDPEENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...
19,g.17.40074525G>A(p.Asp13Asn),MEQKPSKVECGSNPEENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...,13,MEQKPSKVECGSNPEENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...
20,g.17.40074528C>T(p.Pro14Ser),MEQKPSKVECGSDSEENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...,14,MEQKPSKVECGSDSEENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...
21,g.17.40074532A>C(p.Glu15Ala),MEQKPSKVECGSDPAENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...,15,MEQKPSKVECGSDPAENSARSPDGKRKRKNGQCSLKTSMSGYIPSY...
22,g.17.40074538A>G(p.Asn17Ser),MEQKPSKVECGSDPEESSARSPDGKRKRKNGQCSLKTSMSGYIPSY...,17,MEQKPSKVECGSDPEESSARSPDGKRKRKNGQCSLKTSMSGYIPSY...
23,g.17.40076872G>A(p.Ala19Thr),MEQKPSKVECGSDPEENSTRSPDGKRKRKNGQCSLKTSMSGYIPSY...,19,MEQKPSKVECGSDPEENSTRSPDGKRKRKNGQCSLKTSMSGYIPSY...
24,g.17.40076875A>G(p.Arg20Gly),MEQKPSKVECGSDPEENSAGSPDGKRKRKNGQCSLKTSMSGYIPSY...,20,MEQKPSKVECGSDPEENSAGSPDGKRKRKNGQCSLKTSMSGYIPSY...
25,g.17.40076877G>C(p.Arg20Ser),MEQKPSKVECGSDPEENSASSPDGKRKRKNGQCSLKTSMSGYIPSY...,20,MEQKPSKVECGSDPEENSASSPDGKRKRKNGQCSLKTSMSGYIPSY...
26,g.17.40076879C>T(p.Ser21Leu),MEQKPSKVECGSDPEENSARLPDGKRKRKNGQCSLKTSMSGYIPSY...,21,MEQKPSKVECGSDPEENSARLPDGKRKRKNGQCSLKTSMSGYIPSY...
27,g.17.40076894G>A(p.Arg26Gln),MEQKPSKVECGSDPEENSARSPDGKQKRKNGQCSLKTSMSGYIPSY...,26,MEQKPSKVECGSDPEENSARSPDGKQKRKNGQCSLKTSMSGYIPSY...


'MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSSDYGQTSKMSPRVPQQDWLSQPPARVTIKMECNPSQVNGSRNSPDECSVAKGGKMVGSPDTVGMNYGSYMEEKHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVNILLFQNIDGKELCKMTKDDFQRLTPSYNADILLSHLHYLRETPLPHLTSDDVDKALQNSPRLMHARNTGGAAFIFPNTSVYPEATQRITTRPDLPYEPPRRSAWTGHGHPTPQSKAAQPSPSTVPKTEDQRPQLDPYQILGPTSSRLANPGSGQIQLWQFLLELLSDSSNSSCITWEGTNGEFKMTDPDEVARRWGERKSKPNMNYDKLSRALRYYYDKNIMTKVHGKRYAYKFDFHGIAQALQPHPPESSLYKYPSDLPYMGSYHAHPQKMNFVAPHPPALPVTSSSFFAAPNPYWNSPTGGIYPNTRLPTSHMPSHLGTYY*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
125,ERG,433,479,P11308,[['P11308-3']],,P11308,"PMID: 9681824, Soto",TF,PHPPALPVTSSSFFAAPNPYWNSPTGGIYPNTRLPTSHMPSHLGTYY,
126,ERG,118,261,P11308,"[['P11308-1', 'P11308-3', 'P11308-5', 'P11308-...",nan / ENST00000288319,P11308 / P11308,"PMID: 14603248, Soto / DelRosso et al.",TF,MTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVNILLFQNI...,


433
479


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
7,g.21.38383429G>C(p.Pro479Ala),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,479,QKMNFVAPHPPALPVTSSSFFAAPNPYWNSPTGGIYPNTRLPTSHMA
8,g.21.38383431A>G(p.Met478Thr),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,478,QKMNFVAPHPPALPVTSSSFFAAPNPYWNSPTGGIYPNTRLPTSHTP
9,g.21.38383437C>G(p.Ser476Thr),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,476,QKMNFVAPHPPALPVTSSSFFAAPNPYWNSPTGGIYPNTRLPTTHMP
10,g.21.38383438T>C(p.Ser476Gly),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,476,QKMNFVAPHPPALPVTSSSFFAAPNPYWNSPTGGIYPNTRLPTGHMP
11,g.21.38383450T>C(p.Arg472Gly),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,472,QKMNFVAPHPPALPVTSSSFFAAPNPYWNSPTGGIYPNTGLPTSHMP
12,g.21.38383470C>T(p.Gly465Glu),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,465,QKMNFVAPHPPALPVTSSSFFAAPNPYWNSPTEGIYPNTRLPTSHMP
13,g.21.38383477G>C(p.Pro463Ala),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,463,QKMNFVAPHPPALPVTSSSFFAAPNPYWNSATGGIYPNTRLPTSHMP
14,g.21.38383489A>T(p.Tyr459Asn),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,459,QKMNFVAPHPPALPVTSSSFFAAPNPNWNSPTGGIYPNTRLPTSHMP
15,g.21.38383493G>T(p.Asn457Lys),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,457,QKMNFVAPHPPALPVTSSSFFAAPKPYWNSPTGGIYPNTRLPTSHMP
16,g.21.38383498G>C(p.Pro456Ala),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,456,QKMNFVAPHPPALPVTSSSFFAAANPYWNSPTGGIYPNTRLPTSHMP


118
261


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
25,g.21.38400580T>C(p.Arg254Gly),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,254,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
26,g.21.38400585G>T(p.Thr252Lys),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,252,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
27,g.21.38400591C>T(p.Arg250Lys),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,250,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
28,g.21.38400597G>A(p.Thr248Met),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,248,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
29,g.21.38400625G>T(p.Pro239Thr),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,239,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
30,g.21.38400639G>A(p.Ala234Val),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,234,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
31,g.21.38402573C>T(p.Met226Ile),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,226,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
32,g.21.38402574A>G(p.Met226Thr),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,226,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
33,g.21.38402580C>T(p.Arg224Gln),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,224,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...
34,g.21.38402625T>C(p.His209Arg),MIQTVPDPAAHIKEALSVVSEDQSLFECAYGTPHLAKTEMTASSSS...,209,KHMPPPNMTTNERRVIVPADPTLWSTDHVRQWLEWAVKEYGLPDVN...


'MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAMTFYSPAVMNYSIPSNVTNLEGGPGRQTTSPNVLWPTPGHLSPLVVHRQLSHLYAEPQKSPWCEARSLEHTLPVNRETLKRKVSGNRCASPVTGPGSKRDAHFCAVCSDYASGYHYGVWSCEGCKAFFKRSIQGHNDYICPATNQCTIDKNRRKSCQACRLRKCYEVGMVKCGSRRERCGYRLVRRQRSADEQLHCAGKAKRSGGHAPRVRELLLDALSPEQLVLTLLEAEPPHVLISRPSAPFTEASMMMSLTKLADKELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSIDHPGKLIFAPDLVLDRDEGKCVEGILEIFDMLLATTSRFRELKLQHKEYLCVKAMILLNSSMYPLVTATQDADSSRKLAHLLNAVTDALVWVIAKSGISSQQQSMRLANLLMLLSHVRHASNKGMEHLLNMKCKNVVPVYDLLLEMLNAHVLRGCKSSITGSECSPAEDSKSKEGSQNPQSQ*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
129,ESR2,1,148,Q92731,"[['Q92731', 'Q92731-2', 'Q92731-3', 'Q92731-4'...",,Q92731,"PMID: 21964318, Soto",TF,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,
130,ESR2,304,500,Q92731,[['Q92731']],,Q92731,"PMID: 21964318, Soto",TF,KELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSID...,


1
148


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
27,g.14.64280082T>C(p.Asp145Gly),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,145,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
28,g.14.64280085C>G(p.Arg144Thr),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,144,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
29,g.14.64280089T>G(p.Lys143Gln),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,143,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
30,g.14.64280091G>A(p.Ser142Leu),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,142,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
31,g.14.64280095C>A(p.Gly141Cys),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,141,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
32,g.14.64280106A>G(p.Val137Ala),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,137,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
33,g.14.64280115G>A(p.Ala134Val),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,134,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
34,g.14.64280116C>A(p.Ala134Ser),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,134,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
35,g.14.64280116C>T(p.Ala134Thr),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,134,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...
36,g.14.64280121C>T(p.Arg132His),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,132,MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...


304
500


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
77,g.14.64233234A>G(p.Val499Ala),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,499,KELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSID...
78,g.14.64233235C>T(p.Val499Met),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,499,KELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSID...
79,g.14.64233240G>A(p.Ala497Val),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,497,KELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSID...
80,g.14.64233248C>T(p.Met494Ile),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,494,KELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSID...
81,g.14.64233264T>G(p.Asp489Ala),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,489,KELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSID...
...,...,...,...,...
148,g.14.64257341C>T(p.Asp326Asn),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,326,KELVHMISWAKKIPGFVELSLFNQVRLLESCWMEVLMMGLMWRSID...
149,g.14.64257341C>G(p.Asp326His),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,326,KELVHMISWAKKIPGFVELSLFHQVRLLESCWMEVLMMGLMWRSID...
150,g.14.64257346A>C(p.Leu324Arg),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,324,KELVHMISWAKKIPGFVELSRFDQVRLLESCWMEVLMMGLMWRSID...
151,g.14.64260470T>C(p.Ser311Gly),MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAM...,311,KELVHMIGWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSID...


'MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVRRQSYSSTSRGISVTKKTHTSQIEIIPCKICGDKSSGIHYGVITCEGCKGFFRRSQQSNATYSCPRQKNCLIDRTSRNRCQHCRLQKCLAVGMSRDAVKFGRMSKKQRDSLYAEVQKHRMQQQQRDHQQQPGEAEPLTPTYNISANGLTELHDDLSNYIDGHTPEGSKADSAVSSFYLDIQPSPDQSGLDINGIKPEPICDYTPASGFFPYCSFTNGETSPTVSMAELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQREVMWQLCAIKITEAIQYVVEFAKRIDGFMELCQNDQIVLLKAGSLEVVFIRMCRAFDSQNNTVYFDGKYASPDVFKSLGCEDFISFVFEFGKSLCSMHLTEDEIALFSAFVLMSADRSWLQEKVKIEKLQQKIQLALQHVLQKNHREDGILTKLICKVSTLRALCGRHTEKLMAFKAIYPDIVRLHFPPLYKELFTSEFEPAMQIDG*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
539,RORA,272,385,P35398,[['P35398']],,P35398,"PMID: 10478845, Soto",TF,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQRE...,


272
385


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
5,g.15.60502790A>G(p.Tyr385His),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,385,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQRE...
6,g.15.60502808C>T(p.Val379Met),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,379,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQRE...
7,g.15.60502823A>G(p.Ser374Pro),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,374,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQRE...
8,g.15.60503622C>T(p.Ala330Thr),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,330,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQRE...
9,g.15.60503637T>C(p.Ile325Val),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,325,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQRE...
10,g.15.60503663C>T(p.Arg316Gln),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,316,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQQE...
11,g.15.60503663C>A(p.Arg316Leu),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,316,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQLE...
12,g.15.60503664G>A(p.Arg316Trp),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,316,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNKQWE...
13,g.15.60505510T>C(p.Lys314Glu),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,314,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQNEQRE...
14,g.15.60505512T>C(p.Asn313Ser),MESAPAAPDPAASEPGSSGADAAAGSRETPLNQESARKSEPPAPVR...,313,ELEHLAQNISKSHLETCQYLREELQQITWQTFLQEEIENYQSKQRE...


'MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHPEVNGDTKWHSFKSYYGIPCMKGSQNSRVSPDFTQESRGYSKCLQNGGIKRTVSEPSLSGLLQIKKLKQDQKANGERRNFGVSQERNPGESSQPNVSDLSDKKESVSSVAQENAVKDFTSFSTHNCSGPENPELQILNEQEGKSANYHDKNIVLLKNKAVLMPNGATVSASSVEHTHGELLEKTLSQYYPDCVSIAVQKTTSHINAINSQATNELSCEITHPSHTSGQINSAQTSNSELPPKPAAVVSEACDADDADNASKLAAMLNTCSFQKPEQLQQQKSVFEICPSPAENNIQGTTKLASGEEFCSGSSSNLQAPGGSSERYLKQNEMNGAYFKQSSVFTKDSFSATTTPPPPSQLLLSPPPPLPQVPQLPSEGKSTLNGGVLEEHHHYPNQSNTTLLREVKIEGKPEAPPSQSPNPSTHVCSPSPMLSERPQNNCVNRNDIQTAGTMTVPLCSEKTRPMSEHLKHNPPIFGSSGELQDNCQQLMRNKEQEILKGRDKEQTRDLVPPTQHYLKPGWIELKAPRFHQAESHLKRNEASLPSILQYQPNLSNQMTSKQYTGNSNMPGGLPRQAYTQKTTQLEHKSQMYQVEMNQGQSQGTVDQHLQFQKPSHQVHFSKTDHLPKAHVQSLCGTRFHFQQRADSQTEKLMSPVLKQHLNQQASETEPFSNSHLLQHKPHKQAAQTQPSQSSHLPQNQQQQQKLQIKNKEEILQTFPHPQSNNDQQREGSFFGQTKVEECFHGENQYSKSSEFETHNVQMGLEEVQNINRRNSPYSQTMKSSACKIQVSCSNNTHLVSENKEQTTHPELFAGNKTQNLHHMQYFPNNVIPKQDLLHRCFQEQEQKSQQASVLQGYKNRNQDMSGQQAAQLAQQRYLIHNHANVFPVPDQGGSHTQTPPQKDTQKHAALRWHLLQKQEQQQTQQPQTESCHSQMHRPIKVEPGCKPHACMHTAPPE

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
647,TET2,1582,1751,Q6N021,"[['Q6N021'], ['Q6N021'], ['Q6N021']]",ENST00000540549 / ENST00000540549 / ENST000005...,Q6N021 / Q6N021 / Q6N021,DelRosso et al. / DelRosso et al. / DelRosso e...,TF,SSHTSDIYGSTSPMNFYSTSSQAAGSYLNSSNPMNPYPGLLNQNTQ...,


1582
1751


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.4.105275270A>G(p.Asp1587Gly),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1587,SSHTSGIYGSTSPMNFYSTSSQAAGSYLNSSNPMNPYPGLLNQNTQ...
1,g.4.105275276A>G(p.Tyr1589Cys),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1589,SSHTSDICGSTSPMNFYSTSSQAAGSYLNSSNPMNPYPGLLNQNTQ...
2,g.4.105275281A>T(p.Ser1591Cys),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1591,SSHTSDIYGCTSPMNFYSTSSQAAGSYLNSSNPMNPYPGLLNQNTQ...
3,g.4.105275293A>G(p.Met1595Val),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1595,SSHTSDIYGSTSPVNFYSTSSQAAGSYLNSSNPMNPYPGLLNQNTQ...
4,g.4.105275303A>G(p.Tyr1598Cys),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1598,SSHTSDIYGSTSPMNFCSTSSQAAGSYLNSSNPMNPYPGLLNQNTQ...
5,g.4.105275306C>T(p.Ser1599Phe),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1599,SSHTSDIYGSTSPMNFYFTSSQAAGSYLNSSNPMNPYPGLLNQNTQ...
6,g.4.105275306C>G(p.Ser1599Cys),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1599,SSHTSDIYGSTSPMNFYCTSSQAAGSYLNSSNPMNPYPGLLNQNTQ...
7,g.4.105275323G>T(p.Ala1605Ser),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1605,SSHTSDIYGSTSPMNFYSTSSQASGSYLNSSNPMNPYPGLLNQNTQ...
8,g.4.105275326G>T(p.Gly1606Cys),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1606,SSHTSDIYGSTSPMNFYSTSSQAACSYLNSSNPMNPYPGLLNQNTQ...
9,g.4.105275327G>C(p.Gly1606Ala),MEQDRTNHVEGNRLSPFLIPSPPICQTEPLATKLQNGSPLPERAHP...,1606,SSHTSDIYGSTSPMNFYSTSSQAAASYLNSSNPMNPYPGLLNQNTQ...


'MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDRTTVLIEQDPGTLEDEDDDGQCGEHLPFLVGGEEGFHLIDHEAMSQGYVQHIISPDQIHLTINPGSTPMPRNIEGATLTLQSECPETKRKEVKRYQCTFEGCPRTYSTAGNLRTHQKTHRGEYTFVCNQEGCGKAFLTSYSLRIHVRVHTKEKPFECDVQGCEKAFNTLYRLKAHQRLHTGKTFNCESEGCSKYFTTLSDLRKHIRTHTGEKPFRCDHDGCGKAFAASHHLKTHVRTHTGERPFFCPSNGCEKTFSTQYSLKSHMKGHDNKGHSYNALPQHNGSEDTNHSLCLSDLSLLSTDSELRENSSTTQGQDLSTISPAIIFESMFQNSDDTAIQEDPQQTASLTESFNGDAESVSDVPPSTGNSASLSLPLVLQPGLSEPPQPLLPASAPSAPPPAPSLGPGSQQAAFGNPPALLQPPEVPVPHSTQFAANHQEFLPHPQAPQPIVPGLSVVAGASASAAAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPTPNTAILQSSLVMGEQNLQWILNGATSSPQNQEQIQQASKVEKVFFTTAVPVASSPGSSVQQIGLSVPVIIIKQEEACQCQCACRDSAKERASSRRKGCSSPPPPEPSPQAPDGPSLQLPAQTFSSAPVPGSSSSTLPSSCEQSRQAETPSDPQTETLSAMDVSEFLSLQSLDTPSNLIPIEALLQGEEEMGLTSSFSK*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
348,MTF1,510,624,Q14872,[['Q14872']],,Q14872,"PMID: 7610056, Soto",TF,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...,
349,MTF1,328,509,Q14872,"[['Q14872'], ['Q14872']]",nan / nan,Q14872 / Q14872,"PMID: 7610056, Soto / PMID: 7610056, Soto",TF,GSEDTNHSLCLSDLSLLSTDSELRENSSTTQGQDLSTISPAIIFES...,


510
624


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.1.37815545C>T(p.Gly618Asp),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,618,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
1,g.1.37815561A>C(p.Ser613Ala),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,613,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
2,g.1.37815564T>A(p.Ser612Cys),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,612,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
3,g.1.37817422G>T(p.Pro610Thr),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,610,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
4,g.1.37817458C>G(p.Val598Leu),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,598,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
5,g.1.37822134T>G(p.Asn585Thr),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,585,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
6,g.1.37822163T>C(p.Ile575Met),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,575,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
7,g.1.37822165T>C(p.Ile575Val),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,575,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
8,g.1.37822188A>G(p.Met567Thr),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,567,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...
9,g.1.37822189T>C(p.Met567Val),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,567,AAVASAVAAPAPPQSTTEPLPAMVQTLPLGANSVLTNNPTITITPT...


328
509


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
30,g.1.37822381C>T(p.Ala503Thr),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,503,GSEDTNHSLCLSDLSLLSTDSELRENSSTTQGQDLSTISPAIIFES...
31,g.1.37822390A>G(p.Ser500Pro),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,500,GSEDTNHSLCLSDLSLLSTDSELRENSSTTQGQDLSTISPAIIFES...
32,g.1.37822409C>A(p.Gln493His),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,493,GSEDTNHSLCLSDLSLLSTDSELRENSSTTQGQDLSTISPAIIFES...
33,g.1.37822413G>A(p.Pro492Leu),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,492,GSEDTNHSLCLSDLSLLSTDSELRENSSTTQGQDLSTISPAIIFES...
34,g.1.37822422G>A(p.Pro489Leu),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,489,GSEDTNHSLCLSDLSLLSTDSELRENSSTTQGQDLSTISPAIIFES...
...,...,...,...,...
93,g.1.37832250T>C(p.Ser355Gly),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,355,GSEDTNHSLCLSDLSLLSTDSELRENSGTTQGQDLSTISPAIIFES...
94,g.1.37832261C>T(p.Arg351Gln),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,351,GSEDTNHSLCLSDLSLLSTDSELQENSSTTQGQDLSTISPAIIFES...
95,g.1.37832274C>T(p.Asp347Asn),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,347,GSEDTNHSLCLSDLSLLSTNSELRENSSTTQGQDLSTISPAIIFES...
96,g.1.37832282A>G(p.Leu344Pro),MGEHSPDNNIIYFEAEEDELTPDDKMLRFVDKNGLVPSSSGTVYDR...,344,GSEDTNHSLCLSDLSLPSTDSELRENSSTTQGQDLSTISPAIIFES...


'MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAHQGVRPCDISRQLRVSHGCVSKILGRYYETGSIKPGVIGGSKPKVATPKVVEKIAEYKRQNPTMFAWEIRDRLLAERVCDNDTVPSVSSINRIIRTKVQQPPNQPVPASSHSIVSTGSVTQVSSVSTDSAGSSYSISGILGITSPSADTNKRKRDEGIQESPVPNGHSLPGRDFLRKQMRGDLFTQQQLEVLDRVFERQHYSDIFTTTEPIKPEQTTEYSAMASLAGGLDDMKANLASPTPADIGSSVPGPQSYPIVTGRDLASTTLPGYPPHVPPAGQGSYSAPTLTGMVPGSEFSGSPYSHPQYSSYNDSWRFPNPGLLGSPYYYSAAARGAAPPAAATAYDRH*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
459,PAX5,304,358,Q02548,"[['Q02548'], ['Q02548']]",nan / nan,Q02548 / Q02548,"PMID: 8617244, Soto / Choi 2000 list, GSL",TF,GRDLASTTLPGYPPHVPPAGQGSYSAPTLTGMVPGSEFSGSPYSHP...,


304
358


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
10,g.9.36846888A>G(p.Ser352Pro),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,352,GRDLASTTLPGYPPHVPPAGQGSYSAPTLTGMVPGSEFSGSPYSHP...
11,g.9.36846915C>T(p.Gly343Arg),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,343,GRDLASTTLPGYPPHVPPAGQGSYSAPTLTGMVPGSEFSRSPYSHP...
12,g.9.36846921A>T(p.Phe341Ile),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,341,GRDLASTTLPGYPPHVPPAGQGSYSAPTLTGMVPGSEISGSPYSHP...
13,g.9.36846926C>T(p.Ser339Asn),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,339,GRDLASTTLPGYPPHVPPAGQGSYSAPTLTGMVPGNEFSGSPYSHP...
14,g.9.36882039C>T(p.Ser326Asn),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,326,GRDLASTTLPGYPPHVPPAGQGNYSAPTLTGMVPGSEFSGSPYSHP...
15,g.9.36882052C>T(p.Ala322Thr),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,322,GRDLASTTLPGYPPHVPPTGQGSYSAPTLTGMVPGSEFSGSPYSHP...
16,g.9.36882054G>C(p.Pro321Arg),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,321,GRDLASTTLPGYPPHVPRAGQGSYSAPTLTGMVPGSEFSGSPYSHP...
17,g.9.36882054G>T(p.Pro321His),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,321,GRDLASTTLPGYPPHVPHAGQGSYSAPTLTGMVPGSEFSGSPYSHP...
18,g.9.36882055G>A(p.Pro321Ser),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,321,GRDLASTTLPGYPPHVPSAGQGSYSAPTLTGMVPGSEFSGSPYSHP...
19,g.9.36882061C>T(p.Val319Ile),MDLEKNYPTPRTSRTGHGGVNQLGGVFVNGRPLPDVVRQRIVELAH...,319,GRDLASTTLPGYPPHIPPAGQGSYSAPTLTGMVPGSEFSGSPYSHP...


'MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLSRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARAPRSEEDKRRAVIEKWVNGEYSEEPAPTPVLGRIAREGLELPPEGVYMVQPQGCSDEEDHAEEPSKDGGALEEKDSDGAASKEDSGPSTRQASGEASSLRDYAASTMTEFLGMFGYDDQNTRDELARKISFEKLHAGSTPEAATSSMLPTSEDTLSKRARFSKYEEYIRKLKAGEQLSWPAPSTKTEERVGKEVVGTLPGLRLPSSTAHLETKATILPLPSHSSVQMQNLVARASKYDFFIQKLKTGENLRPQNGSTYKKPSKYDLENVKYLHLFKPGEGSPDMGGAIAFKTGKVGRPSKYDVRGIQKPGPAKVPPTPSLAPAPLASVPSAPSAPGPGPEPPASLSFNTPEYLKSTFSKTDSITTGTVSTVKNGLPTDKPAVTEDVNIYQKYIARFSGSQHCGHIHCAYQYREHYHCLDPECNYQRFTSKQDVIRHYNMHKKRDNSLQHGFMRFSPLDDCSVYYHGCHLNGKSTHYHCMQVGCNKVYTSTSDVMTHENFHKKNTQLINDGFQRFRATEDCGTADCQFYGQKTTHFHCRRPGCTFTFKNKCDIEKHKSYHIKDDAYAKDGFKKFYKYEECKYEGCVYSKATNHFHCIRAGCGFTFTSTSQMTSHKRKHERRHIRSSGALGLPPSLLGAKDTEHEESSNDDLVDFSALSSKNSSLSASPTSQQSSASLAAATAATEAGPSATKPPNSKISGLLPQGLPGSIPLALALSNSGLPTPTPYFPILAGRGSTSLPVGTPSLLGAVSSGSAASATPDTPTLVASGAGDSAPVAAASVPAPPASIMERISASKGLISPMMARLAAAALKPSATFDPGSGQQVTPARFPPAQVKPEPGESTGAPGPHEASQDRSLDLTVKEPSNESNGHAVPANSSLLSSLMNKMSQGNPGLGSLLNIKAEAEGSPAAEPSPFLGKAVKALVQ

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
50,CASZ1,31,185,Q86V15,"[['Q86V15', 'Q86V15-2']]",,Q86V15,"PMID: 22331471, 26296975, Soto",TF,LNAICAKLSRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...,


31
185


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.1.10660487G>T(p.Phe185Leu),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLSRQVVVEK...,185,LNAICAKLSRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...
1,g.1.10660490C>G(p.Glu184Asp),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLSRQVVVEK...,184,LNAICAKLSRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...
2,g.1.10660509G>A(p.Ala178Val),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLSRQVVVEK...,178,LNAICAKLSRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...
3,g.1.10660510C>T(p.Ala178Thr),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLSRQVVVEK...,178,LNAICAKLSRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...
4,g.1.10660512T>C(p.Tyr177Cys),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLSRQVVVEK...,177,LNAICAKLSRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...
...,...,...,...,...
82,g.1.10665469C>T(p.Arg40His),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLSHQVVVEK...,40,LNAICAKLSHQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...
83,g.1.10665470G>A(p.Arg40Cys),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLSCQVVVEK...,40,LNAICAKLSCQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...
84,g.1.10665473T>G(p.Ser39Arg),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICAKLRRQVVVEK...,39,LNAICAKLRRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...
85,g.1.10665482C>T(p.Ala36Thr),MDLGTAEGTRCTDPPAGKPAMAPKRKGGLKLNAICTKLSRQVVVEK...,36,LNAICTKLSRQVVVEKRADAGSHTEGSPSQPRDQERSGPESGAARA...


'MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGGSSGSSGSGSGGGRRGAAAAAAAMASETSGHQGYQGFRKEAGDFYYMAGNKDPVTTGTPQPPQRRPSGPVQSYGPPQGSSFGNQYGSEGHVGQFQAQHSGLGGVSHYQQDYTGPFSPGSAQYQQQASSQQQQQQVQQLRQQLYQSHQPLPQATGQPASSSSHLQPMQRPSTLPSSAAGYQLRVGQFGQHYQSSASSSSSSSFPSPQRFSQSGQSYDGSYNVNAGSQYEGHNVGSNAQAYGTQSNYSYQPQSMKNFEQAKIPQGTQQGQQQQQPQQQQHPSQHVMQYTNAATKLPLQSQVGQYNQPEVPVRSPMQFHQNFSPISNPSPAASVVQSPSCSSTPSPLMQTGENLQCGQGSVPMGSRNRILQLMPQLSPTPSMMPSPNSHAAGFKGFGLEGVPEKRLTDPGLSSLSALSTQVANLPNTVQHMLLSDALTPQKKTSKRPSSSKKADSCTNSEGSSQPEEQLKSPMAESLDGGCSSSSEDQGERVRQLSGQSTSSDTTYKGGASEKAGSSPAQGAQNEPPRLNASPAAREEATSPGAKDMPLSSDGNPKVNEKTVGVIVSREAMTGRVEKPGGQDKGSQEDDPAATQRPPSNGGAKETSHASLPQPEPPGGGGSKGNKNGDNNSNHNGEGNGQSGHSAAGPGFTSRTEPSKSPGSLRYSYKDSFGSAVPRNVSGFPQYPTGQEKGDFTGHGERKGRNEKFPSLLQEVLQGYHHHPDRRYSRSTQEHQGMAGSLEGTTRPNVLVSQTNELASRGLLNKSIGSLLENPHWGPWERKSSSTAPEMKQINLTDYPIPRKFEIEPQSSAHEPGGSLSERRSVICDISPLRQIVRDPGAHSLGHMSADTRIGRNDRLNPTLSQSVILPGGLVSMETKLKSQSGQIKEEDFEQSKSQASFNNKKSGDHCHPPSIKHESYRGNASPGAATHDSLSDYGPQDSRPTPMRRVPGRVGGRE

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
630,TCF20,1,327,Q9UGU0,"[['Q9UGU0', 'Q9UGU0-2']]",,Q9UGU0,"PMID: 10995766, Soto",TF,MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,


1
327


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
2,g.22.42214326T>C(p.His327Arg),MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,327,MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...
3,g.22.42214328C>G(p.Gln326His),MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,326,MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...
4,g.22.42214330G>C(p.Gln326Glu),MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,326,MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...
5,g.22.42214336G>A(p.Pro324Ser),MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,324,MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...
6,g.22.42214353G>A(p.Pro318Leu),MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,318,MQSFREQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...
...,...,...,...,...
116,g.22.42215259C>G(p.Ser16Thr),MQSFREQSSYHGNQQTYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,16,MQSFREQSSYHGNQQTYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...
117,g.22.42215272C>T(p.Gly12Arg),MQSFREQSSYHRNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,12,MQSFREQSSYHRNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...
118,g.22.42215288C>A(p.Glu6Asp),MQSFRDQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,6,MQSFRDQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...
119,g.22.42215292C>T(p.Arg5Gln),MQSFQEQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...,5,MQSFQEQSSYHGNQQSYPQEVHGSSRLEEFSPRQAQMFQNFGGTGG...


'MASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEEEDDDDEDGGGGDHGGGGGHGHAGHHHHHHHHHHHPPMIALQPLVTDDPTQVHHHQEVILVQTREEVVGGDDSDGLRAEDGFEDQILIPVPAPAGGDDDYIEQTLVTVAAAGKSGGGGSSSSGGGRVKKGGGKKSGKKSYLSGGAGAAGGGGADPGNKKWEQKQVQIKTLEGEFSVTMWSSDEKKDIDHETVVEEQIIGENSPPDYSEYMTGKKLPPGGIPGIDLSDPKQLAEFARMKPRKIKEDDAPRTIACPHKGCTKMFRDNSAMRKHLHTHGPRVHVCAECGKAFVESSKLKRHQLVHTGEKPFQCTFEGCGKRFSLDFNLRTHVRIHTGDRPYVCPFDGCNKKFAQSTNLKSHILTHAKAKNNQ*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
699,YY1,1,69,P25490,[['P25490']],,P25490,"PMID: 7731805, Soto",TF,MASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,
700,YY1,371,397,P25490,[['P25490']],,P25490,"activation_regions.txt, GSL",TF,RTHVRIHTGDRPYVCPFDGCNKKFAQS,


1
69


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.14.100239245A>C(p.Met1Leu),LASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,1,LASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...
1,g.14.100239246T>A(p.Met1Lys),KASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,1,KASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...
2,g.14.100239246T>G(p.Met1Arg),RASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,1,RASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...
3,g.14.100239251T>G(p.Ser3Ala),MAAGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,3,MAAGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...
4,g.14.100239251T>A(p.Ser3Thr),MATGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,3,MATGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...
5,g.14.100239264T>A(p.Leu7His),MASGDTHYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,7,MASGDTHYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...
6,g.14.100239266T>A(p.Tyr8Asn),MASGDTLNIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,8,MASGDTLNIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...
7,g.14.100239270T>A(p.Ile9Asn),MASGDTLYNATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,9,MASGDTLYNATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...
8,g.14.100239292G>A(p.Met16Ile),MASGDTLYIATDGSEIPAEIVELHEIEVETIPVETIETTVVGEEEE...,16,MASGDTLYIATDGSEIPAEIVELHEIEVETIPVETIETTVVGEEEE...
9,g.14.100239301G>C(p.Glu19Asp),MASGDTLYIATDGSEMPADIVELHEIEVETIPVETIETTVVGEEEE...,19,MASGDTLYIATDGSEMPADIVELHEIEVETIPVETIETTVVGEEEE...


371
397


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
30,g.14.100277467G>A(p.Arg371His),MASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,371,HTHVRIHTGDRPYVCPFDGCNKKFAQS
31,g.14.100277469A>G(p.Thr372Ala),MASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,372,RAHVRIHTGDRPYVCPFDGCNKKFAQS
32,g.14.100277490G>A(p.Gly379Arg),MASGDTLYIATDGSEMPAEIVELHEIEVETIPVETIETTVVGEEEE...,379,RTHVRIHTRDRPYVCPFDGCNKKFAQS


'MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKSSLVNESETNQNSSSDSEAERRPPPRSESFRDKSRESLEEAAKRQDGGLFKGPPYPGYPFIMIPDLTSPYLPNGSLSPTARTLHFQSGSTHYSAYKTIEHQIAVQYLQMKWPLLDVQAGSLQSRQALKDARSPSPAHIVSNKVPVVQHPHHVHPLTPLITYSNEHFTPGNPPPHLPADVDPKTGIPRPPHPPDISPYYPLSPGTVGQIPHPLGWLVPQQGQPVYPITTGGFRHPYPTALTVNASMSRFPPHMVPPHHTLHTTGIPHPAIVTPTVKQESSQSDVGSLHSSKHQDSKKEEEKKKPHIKKPLNAFMLYMKEMRAKVVAECTLKESAAINQILGRRWHALSREEQAKYYELARKERQLHMQLYPGWSARDNYGKKKKRKRDKQPGETNEHSECFLNPCLSLPPITDLSAPKKCRARFGLDQQNNWCGPCRRKKKCVRYIQGEGSCLSPPSSDGSLLDSPPPSPNLLGSPPRDAKSQTEQTQPLSLSLKPDPLAHLSMMPPPPALLLAEATHKASALCPNGALDLPPAALQPAAPSSSIAQPSTSSLHSHSSLAGTQPQPLSLVTKSLE*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
637,TCF7L2,1,53,Q9NQB0,"[['Q9NQB0', 'Q9NQB0-10', 'Q9NQB0-11', 'Q9NQB0-...",,Q9NQB0,"PMID: 21281469, Soto",TF,MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,
638,TCF7L2,459,505,Q9NQB0,"[['Q9NQB0'], ['Q9NQB0']]",nan / nan,Q9NQB0 / Q9NQB0,"PMID: 21281469, Soto / activation_regions.txt,...",TF,SAPKKCRARFGLDQQNNWCGPCRRKKKCVRYIQGEGSCLSPPSSDGS,


1
53


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
6,g.10.112950823G>A(p.Asp23Asn),MPQLNGGGGDDLGANDELISFKNEGEQEEKSSENSSAERDLADVKS...,23,MPQLNGGGGDDLGANDELISFKNEGEQEEKSSENSSAERDLADVKS...
7,g.10.112950840G>C(p.Glu28Asp),MPQLNGGGGDDLGANDELISFKDEGEQDEKSSENSSAERDLADVKS...,28,MPQLNGGGGDDLGANDELISFKDEGEQDEKSSENSSAERDLADVKS...
8,g.10.112950847A>G(p.Ser31Gly),MPQLNGGGGDDLGANDELISFKDEGEQEEKGSENSSAERDLADVKS...,31,MPQLNGGGGDDLGANDELISFKDEGEQEEKGSENSSAERDLADVKS...
9,g.10.112950848G>T(p.Ser31Ile),MPQLNGGGGDDLGANDELISFKDEGEQEEKISENSSAERDLADVKS...,31,MPQLNGGGGDDLGANDELISFKDEGEQEEKISENSSAERDLADVKS...
10,g.10.112950854A>G(p.Glu33Gly),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSGNSSAERDLADVKS...,33,MPQLNGGGGDDLGANDELISFKDEGEQEEKSSGNSSAERDLADVKS...
11,g.10.112950856A>G(p.Asn34Asp),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSEDSSAERDLADVKS...,34,MPQLNGGGGDDLGANDELISFKDEGEQEEKSSEDSSAERDLADVKS...
12,g.10.112950860C>G(p.Ser35Cys),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENCSAERDLADVKS...,35,MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENCSAERDLADVKS...
13,g.10.112950869A>C(p.Glu38Ala),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAARDLADVKS...,38,MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAARDLADVKS...


459
505


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
14,g.10.113159928G>A(p.Ala460Thr),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,460,STPKKCRARFGLDQQNNWCGPCRRKKKCVRYIQGEGSCLSPPSSDGS
15,g.10.113159975C>G(p.Asn475Lys),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,475,SAPKKCRARFGLDQQNKWCGPCRRKKKCVRYIQGEGSCLSPPSSDGS
16,g.10.113159983G>A(p.Gly478Asp),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,478,SAPKKCRARFGLDQQNNWCDPCRRKKKCVRYIQGEGSCLSPPSSDGS
17,g.10.113165569G>C(p.Cys486Ser),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,486,SAPKKCRARFGLDQQNNWCGPCRRKKKSVRYIQGEGSCLSPPSSDGS
18,g.10.113165571G>A(p.Val487Ile),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,487,SAPKKCRARFGLDQQNNWCGPCRRKKKCIRYIQGEGSCLSPPSSDGS
19,g.10.113165590A>G(p.Glu493Gly),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,493,SAPKKCRARFGLDQQNNWCGPCRRKKKCVRYIQGGGSCLSPPSSDGS
20,g.10.113165593G>A(p.Gly494Asp),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,494,SAPKKCRARFGLDQQNNWCGPCRRKKKCVRYIQGEDSCLSPPSSDGS
21,g.10.113165601C>G(p.Leu497Val),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,497,SAPKKCRARFGLDQQNNWCGPCRRKKKCVRYIQGEGSCVSPPSSDGS
22,g.10.113165610C>A(p.Pro500Thr),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,500,SAPKKCRARFGLDQQNNWCGPCRRKKKCVRYIQGEGSCLSPTSSDGS
23,g.10.113165614C>G(p.Ser501Cys),MPQLNGGGGDDLGANDELISFKDEGEQEEKSSENSSAERDLADVKS...,501,SAPKKCRARFGLDQQNNWCGPCRRKKKCVRYIQGEGSCLSPPCSDGS


'MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEKQQMGADGMYDKLRMLNGQTGSWGTRPGWYPGTSVPGQPTQDGCQQQEGGGENTNSISSNGEDSDEAQMRLQLKRKLQRNRTSFTQEQIEALEKEFERTHYPDVFARERLAAKIDLPEARIQVWFSNRRAKWRREEKLRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLGRTDTALTNTYSALPPMPSFTMANNLPMQPPVPSQTSSYSCMLPTSPSVNGRSYDTYTPPHMQTHMNSQPMGTSGTTSTGLISPGVSVPVQVPGSEPDMSQYWPRLQ*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
460,PAX6,271,422,P26367,[['P26367']],,P26367,"PMID: 7951315, Soto",TF,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...,


271
422


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
13,g.11.31789937C>A(p.Gln422His),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,422,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
14,g.11.31789937C>G(p.Gln422His),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,422,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
15,g.11.31789938T>C(p.Gln422Arg),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,422,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
16,g.11.31789938T>G(p.Gln422Pro),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,422,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
17,g.11.31789939G>T(p.Gln422Lys),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,422,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
18,g.11.31789943T>A(p.Arg420Ser),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,420,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
19,g.11.31789944C>A(p.Arg420Ile),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,420,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
20,g.11.31789944C>G(p.Arg420Thr),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,420,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
21,g.11.31789957G>T(p.Gln416Lys),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,416,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...
22,g.11.31789961C>A(p.Met414Ile),MQNSHSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRIL...,414,LRNQRRQASNTPSHIPISSSFSTSVYQPIPQPTTPVSSFTSGSMLG...


'MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPVGGGGPGAPPSPPAVAAAAAAAGSSGAGVPGGAAAASAASSSSASSSSSSSSSASSGPALLRVGPGFDAALQVSAAIGTNLRRFRAVFGESGGGGGSGEDEQFLGFGSDEEVRVRSPTRSPSVKTSPRKPRGRPRSGSDRNSAILSDPSVFSPLNKSETKSGDKIKKKDSKSIEKKRGRPPTFPGVKIKITHGKDISELPKGNKEDSLKKIKRTPSATFQQATKIKKLRAGKLSPLKSKFKTGKLQIGRKGVQIVRRRGRPPSTERIKTPSGLLINSELEKPQKVRKDKEGTPPLTKEDKTVVRQSPRRIKPVRIIPSSKRTDATIAKQLLQRAKKGAQKKIEKEAAQLQGRKVKTQVKNIRQFIMPVVSAISSRIIKTPRRFIEDEDYDPPIKIARLESTPNSRFSAPSCGSSEKSSAASQHSSQMSSDSSRSSSPSVDTSTDSQASEEIQVLPEERSDTPEVHPPLPISQSPENESNDRRSRRYSVSERSFGSRTTKKLSTLQSAPQQQTSSSPPPPLLTPPPPLQPASSISDHTPWLMPPTIPLASPFLPASTAPMQGKRKSILREPTFRWTSLKHSRSEPQYFSSAKYAKEGLIRKPIFDNFRPPPLTPEDVGFASGFSASGTAASARLFSPLHSGTRFDMHKRSPLLRAPRFTPSEAHSRIFESVTLPSNRTSAGTSSSGVSNRKRKRKVFSPIRSEPRSPSHSMRTRSGRLSSSELSPLTPPSSVSSSLSISVSPLATSALNPTFTFPSHSLTQSGESAEKNQRPRKQTSAPAEPFSSSSPTPLFPWFTPGSQTERGRNKDKAPEELSKDRDADKSVEKDKSRERDREREKENKRESRKEKRKKGSEIQSSSALYPVGRVSKEKVVGEDVATSSSAKKATGRKKSSSHDSGTDITSVTLGDTTAVKTKILIKKGRGNLEKTNLDLGPTAPSLEKEKTLCLSTPSSSTV

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
307,KMT2A / KMT2A/ALL1/HRX / KMT2A/MLL1/CXXC7,2782,2921,Q03164,"[['Q03164'], ['Q03164'], ['Q03164-3'], ['Q0316...",ENST00000389506 / nan / nan / nan,Q03164 / Q03164 / Q03164 / Q03164,"DelRosso et al. / PMID: 8618864, Soto / Choi 2...",TF,NCHSVSRVKTQGQDSLEAQLSSLESSRRVHTSTPSDKNLLDTYNTE...,


2782
2921


Unnamed: 0,name,TF_seq,prot_pos,AD_seq


'MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDGANDEELEAEFLALVGGQPPALEKLKGKGPLPMEAIEKMASLCMRDPDEDEEEGTDEDDLEADDDLLAELNEVLGEEQKASETPPPVAQPKPEAPHPGLETTLQERLALYQTAIESARQAGDSAKMRRYDRGLKTLENLLASIRKGNAIDEADIPPPVAIGKGPASTPTYSPAPTQPAPRIASAPEPRVTLEGPSATAPASSPGLAKPQMPPGPCSPGPLAQLQSRQRDYKLAALHAKQQGDTTAAARHFRVAKSFDAVLEALSRGEPVDLSCLPPPPDQLPPDPPSPPSQPPTPATAPSTTEVPPPPRTLLEALEQRMERYQVAAAQAKSKGDQRKARMHERIVKQYQDAIRAHKAGRAVDVAELPVPPGFPPIQGLEATKPTQQSLVGVLETAMKLANQDEGPEDEEDEVPKKQNSPVAPTAQPKAPPSRTPQSGSAPTAKAPPKATSTRAQQQLAFLEGRKKQLLQAALRAKQKNDVEGAKMHLRQAKGLEPMLEASRNGLPVDITKVPPAPVNKDDFALVQRPGPGLSQEAARRYGELTKLIRQQHEMCLNHSNQFTQLGNITETTKFEKLAEDCKRSMDILKQAFVRGLPTPTARFEQRTFSVIKIFPDLSSNDMLLFIVKGINLPTPPGLSPGDLDVFVRFDFPYPNVEEAQKDKTSVIKNTDSPEFKEQFKLCINRSHRGFRRAIQTKGIKFEVVHKGGLFKTDRVLGTAQLKLDALEIACEVREILEVLDGRRPTGGRLEVMVRIREPLTAQQLETTTERWLVIDPVPAAVPTQVAGPKGKAPPVPAPARESGNRSARPLHSLSVLAFDQERLERKILALRQARRPVPPEVAQQYQDIMQRSQWQRAQLEQGGVGIRREYAAQLERQLQFYTEAARRLGNDGSRDAAKEALYRRNLVESELQRLRR*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
45,C2D1A,22,60,Q6P1N0,"[['Q6P1N0', 'Q6P1N0-2']]",,Q6P1N0,Staller Activity Data,TF,GLLVDLSPDGLMIPEDGANDEELEAEFLALVGGQPPALE,


22
60


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.19.13909859A>T(p.Met33Leu),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLLIPEDGANDEELEA...,33,GLLVDLSPDGLLIPEDGANDEELEAEFLALVGGQPPALE
1,g.19.13909871G>A(p.Asp37Asn),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPENGANDEELEA...,37,GLLVDLSPDGLMIPENGANDEELEAEFLALVGGQPPALE
2,g.19.13909874G>A(p.Gly38Arg),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDRANDEELEA...,38,GLLVDLSPDGLMIPEDRANDEELEAEFLALVGGQPPALE
3,g.19.13909875G>A(p.Gly38Glu),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDEANDEELEA...,38,GLLVDLSPDGLMIPEDEANDEELEAEFLALVGGQPPALE
4,g.19.13909877G>A(p.Ala39Thr),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDGTNDEELEA...,39,GLLVDLSPDGLMIPEDGTNDEELEAEFLALVGGQPPALE
5,g.19.13909883G>A(p.Asp41Asn),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDGANNEELEA...,41,GLLVDLSPDGLMIPEDGANNEELEAEFLALVGGQPPALE
6,g.19.13909884A>G(p.Asp41Gly),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDGANGEELEA...,41,GLLVDLSPDGLMIPEDGANGEELEAEFLALVGGQPPALE
7,g.19.13909893T>C(p.Leu44Pro),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDGANDEEPEA...,44,GLLVDLSPDGLMIPEDGANDEEPEAEFLALVGGQPPALE
8,g.19.13909898G>T(p.Ala46Ser),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDGANDEELES...,46,GLLVDLSPDGLMIPEDGANDEELESEFLALVGGQPPALE
9,g.19.13909899C>T(p.Ala46Val),MHKRKGPPGPPGRGAAAARQLGLLVDLSPDGLMIPEDGANDEELEV...,46,GLLVDLSPDGLMIPEDGANDEELEVEFLALVGGQPPALE


'MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDELLFLGGPASSAYALSPFSASGGWGRAGHLHPKGRELDPAAPPEGQLLREVRALGVPFVPRTSVDAWLVHSVAAGSADEAHGLLGAAAASSTGGAGASVDGGSQAVQGGGGDPRAARSGPLDAGEEEKAPAEPTAQVPDAGGCASEENGVLREKHEAVDHSSQHEENEERVSAQKENSLQQNDDDENKIAEKPDWEAEKTTESRNERHLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNSSAHYHVNFSQAISQDVNLHEAILLCPNNTFRRDPTARTSQSQEPFLQLNSHTTNPEQTLPGTNLTGFLSPVDNHMRNLTSQDLLYDLDINIFDEINLMSLATEDNFDPIDVSQLFDEPDSDSGLSLDSSHNNTSVIKSNSSHSVCDEGAIGYCTDHESSSHHDLEGAVGGYYPEPSKLCHLDQSDSDFHGDLTFQHVFHNHTYHLQPTAPESTSEPFPWPGKSQKIRSRYLEDTDRNLSRDEQRAKALHIPFSVDEIVGMPVDSFNSMLSRYYLTDLQVSLIRDIRRRGKNKVAAQNCRKRKLDIILNLEDDVCNLQAKKETLKREQAQCNKAINIMKQKLHDLYHDIFSRLRDDQGRPVNPNHYALQCTHDGSILIVPKELVASGHKKETQKGKRK*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
400,NFE2L3 / NF2L3,252,451,Q9Y4A8,"[['Q9Y4A8'], ['Q9Y4A8'], ['Q9Y4A8'], ['Q9Y4A8']]",ENST00000056233 / nan / ENST00000056233 / nan,Q9Y4A8 / Q9Y4A8 / Q9Y4A8 / Q9Y4A8,"DelRosso et al. / PMID: 15388789, Soto / DelRo...",TF,HLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...,


252
451


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.7.26183704C>G(p.His252Asp),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,252,DLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...
1,g.7.26183704C>T(p.His252Tyr),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,252,YLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...
2,g.7.26183705A>G(p.His252Arg),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,252,RLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...
3,g.7.26183719G>C(p.Asp257His),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,257,HLNGTHTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...
4,g.7.26183761T>C(p.Ser271Pro),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,271,HLNGTDTSFSLEDLFQLLSPQPENSLEGISLGDIPLPGSISDGMNS...
...,...,...,...,...
93,g.7.26185039A>G(p.Ile447Met),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,447,HLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...
94,g.7.26185040G>A(p.Gly448Ser),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,448,HLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...
95,g.7.26185041G>T(p.Gly448Val),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,448,HLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...
96,g.7.26185047G>A(p.Cys450Tyr),MKHLKRWWSAGGGLLHLTLLLSLAGLRVDLDLYLLLPPPTLLQDEL...,450,HLNGTDTSFSLEDLFQLLSSQPENSLEGISLGDIPLPGSISDGMNS...


'MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQHDYYSGQPYGQTVNPYTYHHQFNLNGLAGTGAYSPKSEYTYGASYRQYGAYREQPLPAQDPVSVKEEPEAEVRMVNGKPKKVRKPRTIYSSYQLAALQRRFQKAQYLALPERAELAAQLGLTQTQVKIWFQNRRSKFKKLYKNGEVPLEHSPNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYLDDPTNSWYHAQNLSGPHLQQQPPQPATLHHASPGPPPNPGAVY*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
88,DLX3,199,263,O60479,[['O60479']],,O60479,"PMID: 9889271, Soto",TF,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYL...,
89,DLX3,2,91,O60479,"[['O60479'], ['O60479']]",ENST00000434704 / nan,O60479 / O60479,"DelRosso et al. / PMID: 9889271, Soto",TF,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...,


199
263


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
12,g.17.49991594G>C(p.Gln263Glu),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,263,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYL...
13,g.17.49991621C>T(p.Ala254Thr),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,254,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYL...
14,g.17.49991621C>A(p.Ala254Ser),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,254,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYL...
15,g.17.49991641G>A(p.Pro247Leu),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,247,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYL...
16,g.17.49991642G>A(p.Pro247Ser),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,247,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYL...
17,g.17.49991645C>G(p.Asp246His),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,246,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYL...
18,g.17.49991645C>T(p.Asp246Asn),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,246,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYL...
19,g.17.49991651G>C(p.Leu244Val),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,244,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPSYV...
20,g.17.49991657T>A(p.Ser242Cys),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,242,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPYSASPCYL...
21,g.17.49991671T>C(p.Tyr237Cys),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,237,PNNSDSMACNSPPSPALWDTSSHSTPAPARSQLPPPLPCSASPSYL...


2
91


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
35,g.17.49994731C>A(p.Ala90Ser),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,90,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
36,g.17.49994736T>C(p.Tyr88Cys),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,88,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
37,g.17.49994739G>T(p.Thr87Asn),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,87,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
38,g.17.49994767C>T(p.Gly78Ser),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,78,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
39,g.17.49994772C>A(p.Gly76Val),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,76,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
40,g.17.49994772C>T(p.Gly76Asp),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,76,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
41,g.17.49994775G>C(p.Ala75Gly),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,75,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
42,g.17.49994785T>G(p.Asn72His),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,72,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
43,g.17.49994791T>A(p.Asn70Tyr),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,70,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...
44,g.17.49994805T>C(p.Tyr65Cys),MSGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAP...,65,SGSFDRKLSSILTDISSSLSCHAGSKDSPTLPESSVTDLGYYSAPQ...


'MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEKRMSKEEERAVKDELLSEKPEVKQKWASRLLAKLRKDIRPEYREDFVLTVTGKKPPCCVLSNPDQKGKMRRIDCLRQADKVWRLDLVMVILFKGIPLESTDGERLVKSPQCSNPGLCVQPHHIGVSVKELDLYLAYFVHAADSSQSESPSQPSDADIKDQPENGHLGFQDSFVTSGVFSVTELVRVSQTPIAAGTGPNFSLSDLESSSYYSMSPGAMRRSLPSTSSTSSTKRLKSVEDEMDSPGEEPFYTGQGRSPGSGSQSSGWHEVEPGMPSPTTLKKSEKSGFSSPSPSQTSSLGTAFTQHHRPVITGPRASPHATPSTLHFPTSPIIQQPGPYFSHPAIRYHPQETLKEFVQLVCPDAGQQAGQVGFLNPNGSSQGKVHNPFLPTPMLPPPPPPPMARPVPLPVPDTKPPTTSTEGGAASPTSPTYSTPSTSPANRFVSVGPRDPSFVNIPQQTQSWYLG*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
401,NFIA,427,509,Q12857,[['Q12857']],,Q12857,"PMID: 9325160, Soto",TF,VHNPFLPTPMLPPPPPPPMARPVPLPVPDTKPPTTSTEGGAASPTS...,


427
509


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
38,g.1.61406629C>T(p.Pro441Leu),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,441,VHNPFLPTPMLPPPLPPPMARPVPLPVPDTKPPTTSTEGGAASPTS...
39,g.1.61406638C>T(p.Pro444Leu),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,444,VHNPFLPTPMLPPPPPPLMARPVPLPVPDTKPPTTSTEGGAASPTS...
40,g.1.61406652G>C(p.Val449Leu),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,449,VHNPFLPTPMLPPPPPPPMARPLPLPVPDTKPPTTSTEGGAASPTS...
41,g.1.61406662C>T(p.Pro452Leu),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,452,VHNPFLPTPMLPPPPPPPMARPVPLLVPDTKPPTTSTEGGAASPTS...
42,g.1.61406719C>A(p.Thr471Lys),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,471,VHNPFLPTPMLPPPPPPPMARPVPLPVPDTKPPTTSTEGGAASPKS...
43,g.1.61426468A>G(p.Tyr475Cys),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,475,VHNPFLPTPMLPPPPPPPMARPVPLPVPDTKPPTTSTEGGAASPTS...
44,g.1.61426491G>A(p.Ala483Thr),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,483,VHNPFLPTPMLPPPPPPPMARPVPLPVPDTKPPTTSTEGGAASPTS...
45,g.1.61426498G>A(p.Arg485Gln),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,485,VHNPFLPTPMLPPPPPPPMARPVPLPVPDTKPPTTSTEGGAASPTS...
46,g.1.61426503G>A(p.Val487Ile),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,487,VHNPFLPTPMLPPPPPPPMARPVPLPVPDTKPPTTSTEGGAASPTS...
47,g.1.61426503G>C(p.Val487Leu),MYSPLCLTQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK...,487,VHNPFLPTPMLPPPPPPPMARPVPLPVPDTKPPTTSTEGGAASPTS...


'MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENSASESSDTELPEKERGGEPKGPEDSGAGGTGCGGADDPAKKKKQRRQRTHFTSQQLQELEATFQRNRYPDMSMREEIAVWTNLTEPRVRVWFKNRRAKWRKRERNQQLDLCKGGYVPQFSGLVQPYEDVYAAGYSYNNWAAKSLAPAPLSTKSFTFFNSMSPLSSQSMFSAPSSISSMTMPSSMGPGAVPGMPNSGLNNINNLTGSSLNSAMSPGACPYGTPASPYSVYRDTCNSSLASLRLKSKQHSSFGYGGLQGPASGLNACQYNS*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
476,PITX1,234,283,P78337,[['P78337']],,P78337,"PMID: 12242290, Soto",TF,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYGTPASPYSVYRDTCN...,


234
283


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
10,g.5.135028885G>C(p.Ser280Trp),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,280,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYGTPASPYSVYRDTCN...
11,g.5.135028887G>C(p.Asn279Lys),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,279,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYGTPASPYSVYRDTCK...
12,g.5.135028901G>A(p.Arg275Trp),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,275,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYGTPASPYSVYWDTCN...
13,g.5.135028907C>A(p.Val273Phe),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,273,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYGTPASPYSFYRDTCN...
14,g.5.135028910T>C(p.Ser272Gly),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,272,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYGTPASPYGVYRDTCN...
15,g.5.135028918G>A(p.Ser269Leu),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,269,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYGTPALPYSVYRDTCN...
16,g.5.135028928T>C(p.Thr266Ala),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,266,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYGAPASPYSVYRDTCN...
17,g.5.135028931C>G(p.Gly265Arg),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,265,VPGMPNSGLNNINNLTGSSLNSAMSPGACPYRTPASPYSVYRDTCN...
18,g.5.135028936G>A(p.Pro263Leu),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,263,VPGMPNSGLNNINNLTGSSLNSAMSPGACLYGTPASPYSVYRDTCN...
19,g.5.135028945C>T(p.Gly260Asp),MDAFKGGMSLERLPEGLRPPPPPPHDMGPAFHLARPADPREPLENS...,260,VPGMPNSGLNNINNLTGSSLNSAMSPDACPYGTPASPYSVYRDTCN...


'MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGAAQSLPAPLTSRADPEKAVQGSPKSSSAPFEAELHLPPKLRRLYGPGGGRLLQGAAAAAAAAAAAAAAAATATAGPRGEAPPPPPPTARPGERPDGAGAAAAAAAAAAAAWDTLKISQAPQVSISRSKSYRENGAPFVPPPPALDELGGPGGVTHPEERLGVAGGPGSAPAAGGGTGTEDDEEELLEDEEDEDEEEELLEDDEEELLEDDARALLKEPRRCPVAATGAVAAAAAAAVATEGGELSPKEELLLHPEDAEGKDGEDSVCLSAGSDSEEGLLKRKQRRYRTTFTSYQLEELERAFQKTHYPDVFTREELAMRLDLTEARVQVWFQNRRAKWRKREKAGAQTHPPGLPFPGPLSATHPLSPYLDASPFPPHHPALDSAWTAAAAAAAAAFPSLPPPPGSASLPPSGAPLGLSTFLGAAVFRHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGALADPATAAADRRASSIAALRLKAKEHAAQLTQLNILPGTSTGKEVC*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
22,ARX,472,562,Q96QS3,[['Q96QS3']],,Q96QS3,"PMID: 17331656, Soto",TF,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...,


472
562


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
3,g.X.25004733G>C(p.His542Gln),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,542,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...
4,g.X.25004741T>C(p.Lys540Glu),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,540,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...
5,g.X.25004774C>T(p.Ala529Thr),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,529,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...
6,g.X.25004785G>A(p.Ala525Val),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,525,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...
7,g.X.25004786C>A(p.Ala525Ser),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,525,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...
8,g.X.25004794G>C(p.Thr522Arg),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,522,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...
9,g.X.25004798C>T(p.Ala521Thr),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,521,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...
10,g.X.25004801G>A(p.Pro520Ser),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,520,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAL...
11,g.X.25004810G>C(p.Leu517Val),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,517,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGAV...
12,g.X.25004813C>A(p.Ala516Ser),MSNQYQEEGCSERPECKSKSPTLLSSYCIDSILGRRSPCKMRLLGA...,516,RHPAFISPAFGRLFSTMAPLTSASTAAALLRQPTPAVEGAVASGSL...


'MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPESGIVADIELENVLDPDSFYELKSQPLPLRSSLPISLQATPATPATLSASSSAGGSRTPAMSSSSSSRVLLRQQLMRAQAQEQERRERREQAAAAPFPSPAPASPAISVVGVSAGGHTLSRPPPAQVPREVLKVQTHLENPTRYHLQQARRQQVKQYLSTTLGPKLASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSSEKEIDDVIDEIISLESSYNDEMLSYLPGGTTGLQLPSTLPVSGNLLDVYSSQGVATPAITVSNSCPAELPNIKREISETEAKALLKERQKKDNHNLIERRRRFNINDRIKELGTLIPKSSDPEMRWNKGTILKASVDYIRKLQKEQQRSKDLESRQRSLEQANRSLQLRIQELELQAQIHGLPVPPTPGLLSLATTSASDSLKPEQLDIEEEGRPGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPFHLGLEDILMEEEEGVVGGLSGGALSPLRAASDPLLSSVSPAVSKASSRRSSFSMEEES*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
655,TFE3,212,333,P19532,"[['P19532'], ['P19532']]",nan / ENST00000315869,P19532 / P19532 / P19532,"Choi 2000 list / activation_regions.txt, GSL /...",TF,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...,
656,TFE3,472,575,P19532,"[['P19532'], ['P19532'], ['P19532']]",nan / nan / ENST00000315869,P19532 / P19532 / P19532,"PMID: 7479029, Soto / Staller Activity Data / ...",TF,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...,
657,TFE3,1,127,P19532,[['P19532']],,P19532,"PMID: 7479029, Soto",TF,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,


212
333


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.X.49034145C>A(p.Arg331Leu),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,331,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
1,g.X.49034214C>T(p.Ser308Asn),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,308,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
2,g.X.49034217C>T(p.Ser307Asn),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,307,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
3,g.X.49034225A>T(p.Asp304Glu),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,304,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
4,g.X.49038015T>A(p.Ser294Cys),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,294,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
5,g.X.49038033T>C(p.Thr288Ala),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,288,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
6,g.X.49038042C>T(p.Gly285Arg),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,285,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
7,g.X.49038050T>C(p.Tyr282Cys),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,282,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
8,g.X.49038094C>G(p.Glu267Asp),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,267,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...
9,g.X.49038097A>C(p.Asp266Glu),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,266,ASQALTPPPGPASAQPLPAPEAAHTTGPTGSAPNSPMALLTIGSSS...


472
575


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
23,g.X.49030170T>G(p.Glu572Asp),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,572,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
24,g.X.49030177C>T(p.Ser570Asn),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,570,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
25,g.X.49030190G>A(p.Arg566Cys),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,566,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
26,g.X.49030193G>A(p.Arg565Cys),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,565,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
27,g.X.49030213G>A(p.Ala558Val),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,558,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
28,g.X.49030252C>T(p.Arg545Gln),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,545,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
29,g.X.49030252C>A(p.Arg545Leu),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,545,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
30,g.X.49030253G>A(p.Arg545Trp),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,545,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
31,g.X.49030259G>T(p.Pro543Thr),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,543,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...
32,g.X.49030262A>T(p.Ser542Thr),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,542,PGAATFHVGGGPAQNAPHQQPPAPPSDALLDLHFPSDHLGDLGDPF...


1
127


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
63,g.X.49039291C>T(p.Arg117Gln),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,117,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
64,g.X.49039301C>T(p.Val114Ile),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,114,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
65,g.X.49039301C>A(p.Val114Phe),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,114,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
66,g.X.49039345C>T(p.Gly99Glu),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,99,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
67,g.X.49039369G>A(p.Thr91Ile),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,91,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
68,g.X.49039390G>C(p.Ala84Gly),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,84,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
69,g.X.49039408A>G(p.Leu78Pro),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,78,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
70,g.X.49040459A>G(p.Ser76Pro),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,76,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
71,g.X.49040462G>A(p.Arg75Cys),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,75,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...
72,g.X.49040468G>A(p.Pro73Ser),MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...,73,MSHAAEPARDGVEASAEGPRAVFVLLEERRPADSAQLLSLNSLLPE...


'MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSGVGLARAHFEKQPPSNLRKSNFFHFVLALYDRQGQPVEIERTAFVDFVEKEKEPNNEKTNNGIHYKLQLLYSNGVRTEQDLYVRLIDSMTKQAIVYEGQDKNPEMCRVLLTHEIMCSRCCDKKSCGNRNETPSDPVIIDRFFLKFFLKCNQNCLKNAGNPRDMRRFQVVVSTTVNVDGHVLAVSDNMFVHNNSKHGRRARRLDPSEGTAPSYLENATPCIKAISPSEGWTTGGATVIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPRHIPGVVEVTLSYKSKQFCKGAPGRFVYTALNEPTIDYGFQRLQKVIPRHPGDPERLPKEVLLKRAADLVEALYGMPHNNQEIILKRAADIAEALYSVPRNHNQIPTLGNNPAHTGMMGVNSFSSQLAVNVSETSQANDQVGYSRNTSSVSPRGYVPSSTPQQSNYNTVSTSMNGYGSGAMASLGVPGSPGFLNGSSANSPYGIVPSSPTMAASSVTLPSNCSSTHGIFSFSPANVISAVKQKSAFAPVVRPQASPPPSCTSANGNGLQAMSGLVVPPM*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
101,EBF3,281,360,Q9H4W6,[['Q9H4W6-2']],ENST00000368648,Q9H4W6,DelRosso et al.,TF,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPRHIPGVVEV...,


281
360


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.10.129848444C>G(p.Arg359Thr),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,359,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPRHIPGVVEV...
1,g.10.129848463T>C(p.Ile353Val),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,353,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPRHIPGVVEV...
2,g.10.129848478G>A(p.Leu348Phe),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,348,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPRHIPGVVEV...
3,g.10.129867141C>T(p.Ala347Thr),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,347,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPRHIPGVVEV...
4,g.10.129867156G>A(p.Arg342Cys),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,342,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPRHIPGVVEV...
5,g.10.129867213C>T(p.Val323Ile),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,323,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPRHIPGIVEV...
6,g.10.129867227C>G(p.Arg318Thr),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,318,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPPTHIPGVVEV...
7,g.10.129867230G>A(p.Pro317Leu),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,317,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPLRHIPGVVEV...
8,g.10.129867231G>T(p.Pro317Thr),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,317,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTPTRHIPGVVEV...
9,g.10.129867233G>A(p.Pro316Leu),MFGIQENIPRGGTTMKEEPLGSGMNPVRSWMHTAGVVDANTAAQSG...,316,VIIIGDNFFDGLQVVFGTMLVWSELITPHAIRVQTLPRHIPGVVEV...


'MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAARREAASPGTPGPPPPPPAASGPGPGAAAAPHLLAASILADLRGGPGAAPGGASPASSSSAASSPSSGRAPGAAPSAAAKSHRCPFPDCAKAYYKSSHLKSHLRTHTGERPFACDWQGCDKKFARSDELARHHRTHTGEKRFSCPLCSKRFTRSDHLAKHARRHPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPAPSPAPAGL*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
297,KLF16,209,252,Q9BXK1,[['Q9BXK1']],,Q9BXK1,"PMID: 22203677, Soto",TF,HPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPAPSPAPAGL,


209
252


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.19.1854470C>T(p.Ala250Thr),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,250,HPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPAPSPAPTGL
1,g.19.1854472G>C(p.Pro249Arg),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,249,HPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPAPSPARAGL
2,g.19.1854473G>A(p.Pro249Ser),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,249,HPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPAPSPASAGL
3,g.19.1854473G>C(p.Pro249Ala),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,249,HPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPAPSPAAAGL
4,g.19.1854484G>C(p.Pro245Arg),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,245,HPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPARSPAPAGL
5,g.19.1854485G>C(p.Pro245Ala),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,245,HPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPAASPAPAGL
6,g.19.1854487G>A(p.Ala244Val),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,244,HPGFHPDLLRRPGARSTSPSDSLPCSLAGSPAPSPVPSPAPAGL
7,g.19.1854504G>C(p.Ser238Arg),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,238,HPGFHPDLLRRPGARSTSPSDSLPCSLAGRPAPSPAPSPAPAGL
8,g.19.1854524G>A(p.Pro232Ser),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,232,HPGFHPDLLRRPGARSTSPSDSLSCSLAGSPAPSPAPSPAPAGL
9,g.19.1854544G>A(p.Thr225Ile),MSAAVACVDYFAADVLMAISSGAVVHRGRPGPEGAGPAAGLDVRAA...,225,HPGFHPDLLRRPGARSISPSDSLPCSLAGSPAPSPAPSPAPAGL


'MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKGFFRRSMKRKALFTCPFNGDCRITKDNRRHCQACRLKRCVDIGMMKEFILTDEEVQRKREMILKRKEEEALKDSLRPKLSEEQQRIIAILLDAHHKTYDPTYSDFCQFRPPVRVNDGGGSHPSRPNSRHTPSFSGDSSSSCSDHCITSSDMMDSSSFSNLDLSEEDSDDPSVTLELSQLSMLPHLADLVSYSIQKVIGFAKMIPGFRDLTSEDQIVLLKSSAIEVIMLRSNESFTMDDMSWTCGNQDYKYRVSDVTKAGHSLELIEPLIKFQVGLKKLNLHEEEHVLLMAICIVSPDRPGVQDAALIEAIQDRLSNTLQTYIRCRHPPPGSHLLYAKMIQKLADLRSLNEEHSKQYRCLSFQPECSMKLTPLVLEVFGNEIS*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
691,VDR,415,427,P11473,[['P11473']],,P11473,"PMID: 15908514, Soto",TF,TPLVLEVFGNEIS,
692,VDR,195,238,P11473,"[['P11473'], ['P11473']]",nan / nan,P11473 / P11473,"Choi 2000 list OR uniprot. check, GSL / Stalle...",TF,DMMDSSSFSNLDLSEEDSDDPSVTLELSQLSMLPHLADLVSYSI,


415
427


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
38,g.12.47844750G>A(p.Ser427Phe),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,427,TPLVLEVFGNEIF
39,g.12.47844759T>C(p.Asn424Ser),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,424,TPLVLEVFGSEIS


195
238


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
40,g.12.47855697G>C(p.Leu230Val),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,230,DMMDSSSFSNLDLSEEDSDDPSVTLELSQLSMLPHVADLVSYSI
41,g.12.47855720G>C(p.Ser222Cys),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,222,DMMDSSSFSNLDLSEEDSDDPSVTLELCQLSMLPHLADLVSYSI
42,g.12.47855721A>C(p.Ser222Ala),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,222,DMMDSSSFSNLDLSEEDSDDPSVTLELAQLSMLPHLADLVSYSI
43,g.12.47855725C>G(p.Glu220Asp),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,220,DMMDSSSFSNLDLSEEDSDDPSVTLDLSQLSMLPHLADLVSYSI
44,g.12.47855742G>A(p.Pro215Ser),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,215,DMMDSSSFSNLDLSEEDSDDSSVTLELSQLSMLPHLADLVSYSI
45,g.12.47855743G>T(p.Asp214Glu),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,214,DMMDSSSFSNLDLSEEDSDEPSVTLELSQLSMLPHLADLVSYSI
46,g.12.47855744T>C(p.Asp214Gly),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,214,DMMDSSSFSNLDLSEEDSDGPSVTLELSQLSMLPHLADLVSYSI
47,g.12.47855753T>C(p.Asp211Gly),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,211,DMMDSSSFSNLDLSEEGSDDPSVTLELSQLSMLPHLADLVSYSI
48,g.12.47855762C>T(p.Ser208Asn),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,208,DMMDSSSFSNLDLNEEDSDDPSVTLELSQLSMLPHLADLVSYSI
49,g.12.47855773A>C(p.Asn204Lys),MEAMAASTSLPDPGDFDRNVPRICGVCGDRATGFHFNAMTCEGCKG...,204,DMMDSSSFSKLDLSEEDSDDPSVTLELSQLSMLPHLADLVSYSI


'MTTLDSNNNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFPPSPTGSLTQDPARSFGSIPPSLSDDGSPSSSSSSSSSSSSFYNGSPPGSLQVAMEDSSRVSPSKSTSNITKLNGMVLLCKVCGDVASGFHYGVHACEGCKGFFRRSIQQNIQYKRCLKNENCSIVRINRNRCQQCRFKKCLSVGMSRDAVRFGRIPKREKQRMLAEMQSAMNLANNQLSSQCPLETSPTQHPTPGPMGPSPPPAPVPSPLVGFSQFPQQLTPPRSPSPEPTVEDVISQVARAHREIFTYAHDKLGSSPGNFNANHASGSPPATTPHRWENQGCPPAPNDNNTLAAQRHNEALNGLRQAPSSYPPTWPPGPAHHSCHQSNSNGHRLCPTHVYAAPEGKAPANSPRQGNSKNVLLACPMNMYPHGRSGRTVQEIWEDFSMSFTPAVREVVEFAKHIPGFRDLSQHDQVTLLKAGTFEVLMVRFASLFNVKDQTVMFLSRTTYSLQELGAMGMGDLLSAMFDFSEKLNSLALTEEELGLFTAVVLVSADRSGMENSASVEQLQETLLRALRALVLKNRPLETSRFTKLLLKLPDLRTLNNMHSEKLLSFRVDAQ*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
418,NR1D1,8,285,P20393,"[['P20393'], ['P20393'], ['P20393']]",nan / nan / nan,P20393 / P20393 / P20393,"PMID: 23201262, Soto / activation_regions.txt,...",TF,NNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFP...,


8
285


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.17.40096082G>A(p.Arg204Cys),MTTLDSNNNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQ...,204,NNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFP...
1,g.17.40096463G>A(p.Ser195Phe),MTTLDSNNNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQ...,195,NNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFP...
2,g.17.40096469C>G(p.Cys193Ser),MTTLDSNNNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQ...,193,NNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFP...
3,g.17.40096503G>A(p.Arg182Cys),MTTLDSNNNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQ...,182,NNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFP...
4,g.17.40096506T>G(p.Asn181His),MTTLDSNNNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQ...,181,NNTGGVITYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFP...
...,...,...,...,...
97,g.17.40097328T>C(p.Asn36Ser),MTTLDSNNNTGGVITYIGSSGSSPSRTSPESLYSDSSNGSFQSLTQ...,36,NNTGGVITYIGSSGSSPSRTSPESLYSDSSNGSFQSLTQGCPTYFP...
98,g.17.40097355G>A(p.Thr27Ile),MTTLDSNNNTGGVITYIGSSGSSPSRISPESLYSDNSNGSFQSLTQ...,27,NNTGGVITYIGSSGSSPSRISPESLYSDNSNGSFQSLTQGCPTYFP...
99,g.17.40097385A>G(p.Ile17Thr),MTTLDSNNNTGGVITYTGSSGSSPSRTSPESLYSDNSNGSFQSLTQ...,17,NNTGGVITYTGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFP...
100,g.17.40097391G>A(p.Thr15Ile),MTTLDSNNNTGGVIIYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQ...,15,NNTGGVIIYIGSSGSSPSRTSPESLYSDNSNGSFQSLTQGCPTYFP...


'MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFTGSNVEDRSSSGSWGNGGHPSPSRNYGDGTPYDHMTSRDLGSHDNLSPPFVNSRIQSKTERGSYSSYGRESNLQGCHQQSLLGGDMDMGNPGTLSPTKPGSQYYQYSSNNPRRRPLHSSAMEVQTKKVRKVPPGLPSSVYAPSASTADYNRDSPGYPSSKPATSTFPSSFFMQDGHHSSDPWSSSSGMNQPGYAGMLGNSSHIPQSSSYCSLHPHERLSYPSHSSADINSSLPPMSTFHRSGTNHYSTSSCTPPANGTDSIMANRGSGAAGSSQTGDALGKALASIYSPDHTNNSFSSNPSTPVGSPPSLSAGTAVWSRNGGQASSSPNYEGPLHSLQSRIEDRLERLDDAIHVLRNHAVGPSTAMPGGHGDMHGIIGPSHNGAMGGLGSGYGTGLLSANRHSLMVGTHREDGVALRGSHSLLPNQVPVPQLPVQSATSPDLNPPQDPYRGMPPGLQGQSVSSGSSEIKSDDEGDENLQDTKSSEDKKLDDDKKDIKSITSNNDDEDLTPEQKAEREKERRMANNARERLRVRDINEAFKELGRMVQLHLKSDKPQTKLLILHQAVAVILSLEQQVRERNLNPKAACLKRREEEKVSSEPPPLSLAGPHPGMGDASNHMGQM*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
633,TCF4,1,100,P15884,"[['P15884', 'P15884-3', 'P15884-4', 'P15884-5']]",,P15884,"PMID: 24594265, Soto",TF,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,
634,TCF4,340,400,P15884,"[['P15884', 'P15884-3']]",,P15884,"PMID: 24594265, Soto",TF,SFSSNPSTPVGSPPSLSAGTAVWSRNGGQASSSPNYEGPLHSLQSR...,


1
100


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.18.55461054T>C(p.Asn90Ser),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,90,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
1,g.18.55461064A>G(p.Ser87Pro),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,87,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
2,g.18.55461078C>A(p.Ser82Ile),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,82,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
3,g.18.55461083C>T(p.Met80Ile),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,80,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
4,g.18.55461086G>C(p.His79Gln),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,79,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
5,g.18.55461088G>C(p.His79Asp),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,79,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
6,g.18.55461090T>C(p.Asp78Gly),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,78,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
7,g.18.55461093T>C(p.Tyr77Cys),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,77,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
8,g.18.55461100T>C(p.Thr75Ala),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,75,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...
9,g.18.55461111T>C(p.Tyr71Cys),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,71,MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...


340
400


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
16,g.18.55254652C>T(p.Val399Ile),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,399,SFSSNPSTPVGSPPSLSAGTAVWSRNGGQASSSPNYEGPLHSLQSR...
17,g.18.55254667C>G(p.Asp394His),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,394,SFSSNPSTPVGSPPSLSAGTAVWSRNGGQASSSPNYEGPLHSLQSR...
18,g.18.55254690A>G(p.Ile386Thr),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,386,SFSSNPSTPVGSPPSLSAGTAVWSRNGGQASSSPNYEGPLHSLQSR...
19,g.18.55254693C>T(p.Arg385Gln),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,385,SFSSNPSTPVGSPPSLSAGTAVWSRNGGQASSSPNYEGPLHSLQSQ...
20,g.18.55257321G>C(p.His380Gln),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,380,SFSSNPSTPVGSPPSLSAGTAVWSRNGGQASSSPNYEGPLQSLQSR...
21,g.18.55257356C>T(p.Ala369Thr),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,369,SFSSNPSTPVGSPPSLSAGTAVWSRNGGQTSSSPNYEGPLHSLQSR...
22,g.18.55257362C>G(p.Gly367Arg),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,367,SFSSNPSTPVGSPPSLSAGTAVWSRNGRQASSSPNYEGPLHSLQSR...
23,g.18.55259955G>T(p.Leu355Ile),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,355,SFSSNPSTPVGSPPSISAGTAVWSRNGGQASSSPNYEGPLHSLQSR...
24,g.18.55259982A>C(p.Ser346Ala),MHHQQRMAALGTDKELSDLLDFSAMFSPPVSSGKNGPTSLASGHFT...,346,SFSSNPATPVGSPPSLSAGTAVWSRNGGQASSSPNYEGPLHSLQSR...


'MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQESPAQELLAPQPFPGPSSVLREGSQEKTGQQQKPPKRPPIEASVHISQLPQHPLTPAFMSPGKPEHLLEGSTWQLVDPMRPGPSGSFVAPGLHPQSQLLPSHASIIPPEDLPGVPKVFVPRPSQVSLKPTEEAHKKERKPQKPGKYICQYCSRPCAKPSVLQKHIRSHTGERPYPCGPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGGEMYPHGLEMERIPGEEFEEPTEGESTDSEEETSATSGHPAELSPRPKQPLLSSGLYSSGSHSSSHERCSLSQSSTAQSLEDPPPFVEPSSEHPLSHKPEDTHTIKQKLALRLSERKKVIDEQAFLSPGSKGSTESGYFSRSESAEQQVSPPNTNAKSYAEIIFGKCGRIGQRTAMLTATSTQPLLPLSTEDKPSLVPLSVPRTQVIEHITKLITINEAVVDTSEIDSVKPRRSSLSRRSSMESPKSSLYREPLSSHSEKTKPEQSLLSLQHPPSTAPPVPLLRSHSMPSAACTISTPHHPFRGSYSFDDHITDSEALSHSSHVFTSHPRMLKRQPAIELPLGGEYSSEEPGPSSKDTASKPSDEVEPKESELTKKTKKGLKTKGVIYECNICGARYKKRDNYEAHKKYYCSELQIAKPISAGTHTSPEAEKSQIEHEPWSQMMHYKLGTTLELTPLRKRRKEKSLGDEEEPPAFESTKSQFGSPGPSDAARNLPLESTKSPAEPSKSVPSLEGPTGFQPRTPKPGSGSESGKERRTTSKEISVIQHTSSFEKSDSLEQPSGLEGEDKPLAQFPSPPPAPHGRSAHSLQPKLVRQPNIQVPEILVTEEPDRPDTEPEPPPKEPEKTEEFQWPQRSQTLAQLPAEKLPPKKKRLRLAEMAQSSGESSFESSVPLSRSPSQESNVSLSGSSRSASFERDDHGKAEAPSPSSDMRPKPLGTHMLTVPSHHPHAREMRRSASEQSPNV

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
234,HIVEP3,211,1074,Q5T1R4,"[['Q5T1R4', 'Q5T1R4-2']]",,Q5T1R4,"activation_regions.txt, GSL",TF,IRSHTGERPYPCGPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGG...,


211
1074


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.1.41581583G>A(p.Pro1072Leu),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,1072,IRSHTGERPYPCGPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGG...
1,g.1.41581587C>G(p.Glu1071Gln),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,1071,IRSHTGERPYPCGPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGG...
2,g.1.41581590C>T(p.Glu1070Lys),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,1070,IRSHTGERPYPCGPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGG...
3,g.1.41581592C>T(p.Ser1069Asn),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,1069,IRSHTGERPYPCGPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGG...
4,g.1.41581593T>A(p.Ser1069Cys),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,1069,IRSHTGERPYPCGPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGG...
...,...,...,...,...
331,g.1.41584074G>A(p.His242Tyr),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,242,IRSHTGERPYPCGPCGFSFKTKSNLYKHRKSYAHRIKAGLASGMGG...
332,g.1.41584117G>C(p.Phe227Leu),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,227,IRSHTGERPYPCGPCGLSFKTKSNLYKHRKSHAHRIKAGLASGMGG...
333,g.1.41584131C>T(p.Gly223Ser),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,223,IRSHTGERPYPCSPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGG...
334,g.1.41584154G>A(p.Thr215Ile),MDPEQSVKGTKKAEGSPRKRLTKGEAIQTSVSSSVPYPGSGTAATQ...,215,IRSHIGERPYPCGPCGFSFKTKSNLYKHRKSHAHRIKAGLASGMGG...


'MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFTRSQLDVLEALFAKTRYPDIFMREEVALKINLPESRVQVWFKNRRAKCRQQQQSGSGTKSRPAKKKSSPVRESSGSESSGQFTPPAVSSSASSSSSASSSSANPAAAAAAGLGGNPVAAASSLSTPAASSIWSPASISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATAAASYPMSYGQGGSYGQGYPTPSSSYFGGVDCSSYLAPMHSHHHPHQLSPMAPSSMAGHHHHHPHAHHPLSQSSGHHHHHHHHHHQGYGGSGLAFNSADCLDYKEPGAAAASSAWKLNFNSPDCLDYKDQASWRFQVL*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
449,OTX1,172,354,P32242,[['P32242']],,P32242,"PMID: 19890851, Soto",TF,AASSIWSPASISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...,


172
354


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
10,g.2.63055787C>T(p.Pro179Leu),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,179,AASSIWSLASISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...
11,g.2.63055790C>T(p.Ala180Val),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,180,AASSIWSPVSISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...
12,g.2.63055792T>C(p.Ser181Pro),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,181,AASSIWSPAPISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...
13,g.2.63055801C>G(p.Pro184Ala),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,184,AASSIWSPASISAGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...
14,g.2.63055801C>T(p.Pro184Ser),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,184,AASSIWSPASISSGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...
...,...,...,...,...
72,g.2.63056264C>A(p.Pro338His),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,338,AASSIWSPASISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...
73,g.2.63056270G>C(p.Cys340Ser),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,340,AASSIWSPASISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...
74,g.2.63056283G>T(p.Lys344Asn),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,344,AASSIWSPASISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...
75,g.2.63056290G>T(p.Ala347Ser),MMSYLKQPPYGMNGLGLAGPAMDLLHPSVGYPATPRKQRRERTTFT...,347,AASSIWSPASISPGSAPASVSVPEPLAAPSNTSCMQRSVAAGAATA...


'MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGISPQHIAQDSSLDGPPGPPDGATVPLEGFSLSQAADLANKGPKWEKSHAEIAEQAKHEAEIETRIAELRKEGFWSLKRLPKVPEPPRPKGHWDYLCEEMQWLSADFAQERRWKRGVARKVVRMVIRHHEEQRQKEERARREEQAKLRRIASTMAKDVRQFWSNVEKVVQFKQQSRLEEKRKKALDLHLDFIVGQTEKYSDLLSQSLNQPLTSSKAGSSPCLGSSSAASSPPPPASRLDDEDGDFQPQEDEEEDDEETIEVEEQQEGNDAEAQRREIELLRREGELPLEELLRSLPPQLLEGPSSPSQTPSSHDSDTRDGPEEGAEEEPPQVLEIKPPPSAVTQRNKQPWHPDEDDEEFTANEEEAEDEEDTIAAEEQLEGEVDHAMELSELAREGELSMEELLQQYAGAYAPGSGSSEDEDEDEVDANSSDCEPEGPVEAEEPPQEDSSSQSDSVEDRSEDEEDEHSEEEETSGSSASEESESEESEDAQSQSQADEEEEDDDFGVEYLLARDEEQSEADAGSGPPTPGPTTLGPKKEITDIAAAAESLQPKGYTLATTQVKTPIPLLLRGQLREYQHIGLDWLVTMYEKKLNGILADEMGLGKTIQTISLLAHLACEKGNWGPHLIIVPTSVMLNWEMELKRWCPSFKILTYYGAQKERKLKRQGWTKPNAFHVCITSYKLVLQDHQAFRRKNWRYLILDEAQNIKNFKSQRWQSLLNFNSQRRLLLTGTPLQNSLMELWSLMHFLMPHVFQSHREFKEWFSNPLTGMIEGSQEYNEGLVKRLHKVLRPFLLRRVKVDVEKQMPKKYEHVIRCRLSKRQRCLYDDFMAQTTTKETLATGHFMSVINILMQLRKVCNHPNLFDPRPVTSPFITPGICFSTASLVLRATDVHPLQRIDMGRFDLIGLEGRVSRYEADTFLPRHRLSRRVLLEVATAPDPPPRPKPVKMKVNRMLQ

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
600,SRCAP,2575,3230,Q6ZRS2,[['Q6ZRS2']],,Q6ZRS2,"PMID: 11522779, Soto",TF,SSLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...,
601,SRCAP,198,1445,Q6ZRS2,[['Q6ZRS2']],,Q6ZRS2,"PMID: 11522779, Soto",TF,MAKDVRQFWSNVEKVVQFKQQSRLEEKRKKALDLHLDFIVGQTEKY...,


2575
3230


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.16.30738612C>T(p.Arg2858Cys),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,2858,SSLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...
1,g.16.30738616G>A(p.Arg2859Gln),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,2859,SSLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...
2,g.16.30738643G>A(p.Arg2868Lys),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,2868,SSLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...
3,g.16.30737767C>T(p.Ser2576Leu),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,2576,SLLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...
4,g.16.30737773C>G(p.Ser2578Cys),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,2578,SSLCLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...
...,...,...,...,...
334,g.16.30739717G>A(p.Arg3226His),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,3226,SSLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...
335,g.16.30739717G>T(p.Arg3226Leu),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,3226,SSLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...
336,g.16.30739723C>T(p.Ala3228Val),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,3228,SSLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...
337,g.16.30739726A>G(p.Lys3229Arg),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,3229,SSLSLVPPKDLLPVAVEILPVSEKNLSLTPSAPSLTLEAGSIPNGQ...


198
1445


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
339,g.16.30707687G>A(p.Arg203Lys),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,203,MAKDVKQFWSNVEKVVQFKQQSRLEEKRKKALDLHLDFIVGQTEKY...
340,g.16.30709519C>A(p.Gln214Lys),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,214,MAKDVRQFWSNVEKVVKFKQQSRLEEKRKKALDLHLDFIVGQTEKY...
341,g.16.30709523T>A(p.Phe215Tyr),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,215,MAKDVRQFWSNVEKVVQYKQQSRLEEKRKKALDLHLDFIVGQTEKY...
342,g.16.30709552C>A(p.Arg225Ser),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,225,MAKDVRQFWSNVEKVVQFKQQSRLEEKSKKALDLHLDFIVGQTEKY...
343,g.16.30709588G>C(p.Val237Leu),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,237,MAKDVRQFWSNVEKVVQFKQQSRLEEKRKKALDLHLDFILGQTEKY...
...,...,...,...,...
742,g.16.30723680A>G(p.Asn1419Ser),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,1419,MAKDVRQFWSNVEKVVQFKQQSRLEEKRKKALDLHLDFIVGQTEKY...
743,g.16.30723692T>G(p.Leu1423Arg),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,1423,MAKDVRQFWSNVEKVVQFKQQSRLEEKRKKALDLHLDFIVGQTEKY...
744,g.16.30723715G>A(p.Val1431Ile),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,1431,MAKDVRQFWSNVEKVVQFKQQSRLEEKRKKALDLHLDFIVGQTEKY...
745,g.16.30723739C>A(p.Leu1439Ile),MQSSPSPAHPQLPVLQTQMVSDGMTGSNPVSPASSSSPASSGAGGI...,1439,MAKDVRQFWSNVEKVVQFKQQSRLEEKRKKALDLHLDFIVGQTEKY...


'MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAGPLGQGALDAVQSLPLKNPFYDSSDNPYTRWLASTEGLQYSLHGLAAGAPPQDSSSKSPEPSADESPDNDKETPGGGGDAGKKRKRRVLFSKAQTYELERRFRQQRYLSAPEREHLASLIRLTPTQVKIWFQNHRYKMKRARAEKGMEVTPLPSPRRVAVPVLVRDGKPCHALKAQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPTAHPLVQAQQWTW*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
411,NKX2-2,220,273,O95096,[['O95096']],,O95096,"PMID: 10944215, Soto",TF,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPTAHPL...,


220
273


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
11,g.20.21511936T>G(p.Gln270Pro),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,270,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPTAHPL...
12,g.20.21511943C>A(p.Ala268Ser),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,268,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPTAHPL...
13,g.20.21511949C>A(p.Val266Phe),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,266,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPTAHPL...
14,g.20.21511955G>C(p.Pro264Ala),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,264,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPTAHAL...
15,g.20.21511956G>T(p.His263Gln),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,263,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPTAQPL...
16,g.20.21511961C>G(p.Ala262Pro),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,262,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPTPHPL...
17,g.20.21511964T>C(p.Thr261Ala),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,261,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYPAAHPL...
18,g.20.21511967G>C(p.Pro260Ala),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,260,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPQYATAHPL...
19,g.20.21511973G>T(p.Gln258Lys),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,258,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTPKYPTAHPL...
20,g.20.21511975G>A(p.Pro257Leu),MSLTNTKTGFSVKDILDLPDTNDEEGSVAEGPEEENEGPEPAKRAG...,257,AQDLAAATFQAGIPFSAYSAQSLQHMQYNAQYSSASTLQYPTAHPL...


'MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEELAELLSANISDIDSLSVKPDKCKILKKTVDQIQLMKRMEQEKSTTDDDVQKSDISSSSQGVIEKESLGPLLLEALDGFFFVVNCEGRIVFVSENVTSYLGYNQEELMNTSVYSILHVGDHAEFVKNLLPKSLVNGVPWPQEATRRNSHTFNCRMLIHPPDEPGTENQEACQRYEVMQCFTVSQPKSIQEDGEDFQSCLICIARRLPRPPAITGVESFMTKQDTTGKIISIDTSSLRAAGRTGWEDLVRKCIYAFFQPQGREPSYARQLFQEVMTRGTASSPSYRFILNDGTMLSAHTKCKLCYPQSPDMQPFIMGIHIIDREHSGLSPQDDTNSGMSIPRVNPSVNPSISPAHGVARSSTLPPSNSNMVSTRINRQQSSDLHSSSHSNSSNSQGSFGCSPGSQIVANVALNQGQASSQSSNPSLNLNNSPMEGTGISLAQFMSPRRQVTSGLATRPRMPNNSFPPNISTLSSPVGMTSSACNNNNRSYSNIPVTSLQGMNEGPNNSVGFSASSPVLRQMSSQNSPSRLNIQPAKAESKDNKEIASILNEMIQSDNSSSDGKPLDSGLLHNNDRLSDGDSKYSQTSHKLVQLLTTTAEQQLRHADIDTSCKDVLSCTGTSNSASANSSGGSCPSSHSSLTERHKILHRLLQEGSPSDITTLSVEPDKKDSASTSVSVTGQVQGNSSIKLELDASKKKESKDHQLLRYLLDKDEKDLRSTPNLSLDDVKVKVEKKEQMDPCNTNPTPMTKPTPEEIKLEAQSQFTADLDQFDQLLPTLEKAAQLPGLCETDRMDGAVTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIDNQFGQPGTGDQIPWTNNTVTAINQSKSEDQCISSQLDELLCPPTTVEGRNDEKALLEQLVSFLSGKDETELAELDRALGIDKLVQGGGLDVLSERFPPQQATPPLIMEERPNLYSQPYSSPS

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
372,NCOA1,1,93,Q15788,"[['Q15788', 'Q15788-2', 'Q15788-3']]",,Q15788,"PMID: 9575154, Soto",TF,MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,
373,NCOA1,1241,1385,Q15788,"[['Q15788', 'Q15788-2', 'Q15788-3']]",,Q15788,"PMID: 9427757, Soto",TF,GEANFAPSLSPGSSMVPMPIPPPQSSLLQQTPPASGYQSPDMKAWQ...,
374,NCOA1,840,1011,Q15788,"[['Q15788', 'Q15788-2', 'Q15788-3'], ['Q15788-...",nan / ENST00000288599,Q15788 / Q15788,"PMID: 9427757, 9575154, Soto / DelRosso et al.",TF,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIDNQFGQPGT...,


1
93


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.2.24658690G>A(p.Gly5Arg),MSGLRDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,5,MSGLRDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...
1,g.2.24658705G>A(p.Asp10Asn),MSGLGDSSSNPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,10,MSGLGDSSSNPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...
2,g.2.24658708C>T(p.Pro11Ser),MSGLGDSSSDSANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,11,MSGLGDSSSDSANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...
3,g.2.24658714A>G(p.Asn13Asp),MSGLGDSSSDPADPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,13,MSGLGDSSSDPADPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...
4,g.2.24658728T>G(p.His17Gln),MSGLGDSSSDPANPDSQKRKGSPCDTLASSTEKRRREQENKYLEEL...,17,MSGLGDSSSDPANPDSQKRKGSPCDTLASSTEKRRREQENKYLEEL...
5,g.2.24658744C>A(p.Pro23Thr),MSGLGDSSSDPANPDSHKRKGSTCDTLASSTEKRRREQENKYLEEL...,23,MSGLGDSSSDPANPDSHKRKGSTCDTLASSTEKRRREQENKYLEEL...
6,g.2.24658747T>C(p.Cys24Arg),MSGLGDSSSDPANPDSHKRKGSPRDTLASSTEKRRREQENKYLEEL...,24,MSGLGDSSSDPANPDSHKRKGSPRDTLASSTEKRRREQENKYLEEL...
7,g.2.24658750G>A(p.Asp25Asn),MSGLGDSSSDPANPDSHKRKGSPCNTLASSTEKRRREQENKYLEEL...,25,MSGLGDSSSDPANPDSHKRKGSPCNTLASSTEKRRREQENKYLEEL...
8,g.2.24658753A>G(p.Thr26Ala),MSGLGDSSSDPANPDSHKRKGSPCDALASSTEKRRREQENKYLEEL...,26,MSGLGDSSSDPANPDSHKRKGSPCDALASSTEKRRREQENKYLEEL...
9,g.2.24658753A>C(p.Thr26Pro),MSGLGDSSSDPANPDSHKRKGSPCDPLASSTEKRRREQENKYLEEL...,26,MSGLGDSSSDPANPDSHKRKGSPCDPLASSTEKRRREQENKYLEEL...


1241
1385


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
19,g.2.24752006A>G(p.Asn1244Ser),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1244,GEASFAPSLSPGSSMVPMPIPPPQSSLLQQTPPASGYQSPDMKAWQ...
20,g.2.24752007C>G(p.Asn1244Lys),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1244,GEAKFAPSLSPGSSMVPMPIPPPQSSLLQQTPPASGYQSPDMKAWQ...
21,g.2.24752015C>T(p.Pro1247Leu),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1247,GEANFALSLSPGSSMVPMPIPPPQSSLLQQTPPASGYQSPDMKAWQ...
22,g.2.24752017T>C(p.Ser1248Pro),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1248,GEANFAPPLSPGSSMVPMPIPPPQSSLLQQTPPASGYQSPDMKAWQ...
23,g.2.24752024G>T(p.Ser1250Ile),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1250,GEANFAPSLIPGSSMVPMPIPPPQSSLLQQTPPASGYQSPDMKAWQ...
24,g.2.24752029G>A(p.Gly1252Arg),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1252,GEANFAPSLSPRSSMVPMPIPPPQSSLLQQTPPASGYQSPDMKAWQ...
25,g.2.24752053A>G(p.Ile1260Val),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1260,GEANFAPSLSPGSSMVPMPVPPPQSSLLQQTPPASGYQSPDMKAWQ...
26,g.2.24752056C>G(p.Pro1261Ala),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1261,GEANFAPSLSPGSSMVPMPIAPPQSSLLQQTPPASGYQSPDMKAWQ...
27,g.2.24752081A>G(p.Gln1269Arg),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1269,GEANFAPSLSPGSSMVPMPIPPPQSSLLRQTPPASGYQSPDMKAWQ...
28,g.2.24752089C>T(p.Pro1272Ser),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,1272,GEANFAPSLSPGSSMVPMPIPPPQSSLLQQTSPASGYQSPDMKAWQ...


840
1011


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
67,g.2.24711069T>A(p.Ser853Thr),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,853,VTSVTIKSEILPATLQSATARPTSRLNRLPELELEAIDNQFGQPGT...
68,g.2.24711081G>A(p.Ala857Thr),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,857,VTSVTIKSEILPASLQSTTARPTSRLNRLPELELEAIDNQFGQPGT...
69,g.2.24726616T>C(p.Ile876Thr),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,876,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEATDNQFGQPGT...
70,g.2.24726618G>T(p.Asp877Tyr),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,877,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIYNQFGQPGT...
71,g.2.24726628T>C(p.Phe880Ser),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,880,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIDNQSGQPGT...
72,g.2.24726630G>A(p.Gly881Arg),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,881,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIDNQFRQPGT...
73,g.2.24726637C>G(p.Pro883Arg),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,883,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIDNQFGQRGT...
74,g.2.24726648G>T(p.Asp887Tyr),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,887,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIDNQFGQPGT...
75,g.2.24726648G>A(p.Asp887Asn),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,887,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIDNQFGQPGT...
76,g.2.24726649A>G(p.Asp887Gly),MSGLGDSSSDPANPDSHKRKGSPCDTLASSTEKRRREQENKYLEEL...,887,VTSVTIKSEILPASLQSATARPTSRLNRLPELELEAIDNQFGQPGT...


'MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEPRRISETFGEDLDCFLHASPPPCIEESFRRLDPLLLPVEAAICEKSSAVDILLSRDKLLSETCLSLQPASSSLDSYTAVNQAQLNAVTSLTPPSSPELSRHLVKTSQTLSAVDGTVTLKLVAKKAALSSVKVGGVATAAAAVTAAGAVKSGQSDSDQGGLGAEACPENKKRVHRCQFNGCRKVYTKSSHLKAHQRTHTGEKPYKCSWEGCEWRFARSDELTRHYRKHTGAKPFKCNHCDRCFSRSDHLALHMKRHI*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
303,KLF7,2,101,O75840,"[['O75840', 'O75840-3']]",ENST00000309446,O75840,DelRosso et al.,TF,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...,


2
101


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.2.207124205C>T(p.Arg101Gln),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,101,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
1,g.2.207124206G>C(p.Arg101Gly),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,101,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
2,g.2.207124211A>G(p.Leu99Pro),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,99,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
3,g.2.207124214A>C(p.Leu98Trp),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,98,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
4,g.2.207124218T>A(p.Ile97Phe),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,97,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
5,g.2.207124229G>A(p.Ser93Leu),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,93,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
6,g.2.207124231G>C(p.Ser92Arg),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,92,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
7,g.2.207124241C>G(p.Cys89Ser),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,89,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
8,g.2.207124241C>T(p.Cys89Tyr),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,89,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...
9,g.2.207124248C>T(p.Ala87Thr),MDVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTE...,87,DVLASYSIFQELQLVHDTGYFSALPSLEETWQQTCLELERYLQTEP...


'MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKKLKEKKDELDSLITAITTNGAHPSKCVTIQRTLDGRLQVAGRKGFPHVIYARLWRWPDLHKNELKHVKYCQYAFDLKCDSVCVNPYHYERVVSPGIDLSGLTLQSNAPSSMMVKDEYVHDFEGQPSLSTEGHSIQTIQHPPSNRASTETYSTPALLAPSESNATSTANFPNIPVASTSQPASILGGSHSEGLLQIASGPQPGQQQNGFTGQPATYHHNSTTTWTGSRTAPYTPNLPHHQNGHLQHHPPMPPHPGHYWPVHNELAFQPPISNHPAPEYWCSIAYFEMDVQVGETFKVPSSCPIVTVDGYVDPSGGDRFCLGQLSNVHRTEAIERARLHIGKGVQLECKGEGDVWVRCLSDHAVFVQSYYLDREAGRAPGDAVHKIYPSAYIKVFDLRQCHRQMQQQAATAQAAAAAQAAAVAGNIPGPGSVGGIAPAISLSAAAGIGVDDLRRLCILRMSFVKGWGPDYPRQSIKETPCWIEIHLHRALQLLDEVLHTMPIADPQPLD*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
561,SMAD4,30,60,Q13485,[['Q13485']],,Q13485,"PMID: 9707553, Soto",TF,GESETFAKRAIESLVKKLKEKKDELDSLITA,
562,SMAD4,275,552,Q13485,"[['Q13485'], ['Q13485']]",nan / nan,Q13485 / Q13485,"Choi 2000 list, GSL / PMID: 9707553, Soto",TF,PYTPNLPHHQNGHLQHHPPMPPHPGHYWPVHNELAFQPPISNHPAP...,


30
60


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.18.51047152G>T(p.Ala36Ser),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFSKRAIESLVKK...,36,GESETFSKRAIESLVKKLKEKKDELDSLITA
1,g.18.51047165T>C(p.Ile40Thr),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRATESLVKK...,40,GESETFAKRATESLVKKLKEKKDELDSLITA
2,g.18.51047193G>C(p.Glu49Asp),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,49,GESETFAKRAIESLVKKLKDKKDELDSLITA


275
552


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
3,g.18.51058381A>C(p.Thr277Pro),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,277,PYPPNLPHHQNGHLQHHPPMPPHPGHYWPVHNELAFQPPISNHPAP...
4,g.18.51058394C>T(p.Pro281Leu),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,281,PYTPNLLHHQNGHLQHHPPMPPHPGHYWPVHNELAFQPPISNHPAP...
5,g.18.51058396C>T(p.His282Tyr),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,282,PYTPNLPYHQNGHLQHHPPMPPHPGHYWPVHNELAFQPPISNHPAP...
6,g.18.51058408G>A(p.Gly286Ser),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,286,PYTPNLPHHQNSHLQHHPPMPPHPGHYWPVHNELAFQPPISNHPAP...
7,g.18.51058427C>T(p.Pro292Leu),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,292,PYTPNLPHHQNGHLQHHLPMPPHPGHYWPVHNELAFQPPISNHPAP...
8,g.18.51058432A>G(p.Met294Val),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,294,PYTPNLPHHQNGHLQHHPPVPPHPGHYWPVHNELAFQPPISNHPAP...
9,g.18.51058436C>T(p.Pro295Leu),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,295,PYTPNLPHHQNGHLQHHPPMLPHPGHYWPVHNELAFQPPISNHPAP...
10,g.18.51058451A>G(p.His300Arg),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,300,PYTPNLPHHQNGHLQHHPPMPPHPGRYWPVHNELAFQPPISNHPAP...
11,g.18.51059883C>G(p.Leu308Val),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,308,PYTPNLPHHQNGHLQHHPPMPPHPGHYWPVHNEVAFQPPISNHPAP...
12,g.18.51059893A>G(p.Gln311Arg),MDNMSITNTPTSNDACLSIVHSLMCHRQGGESETFAKRAIESLVKK...,311,PYTPNLPHHQNGHLQHHPPMPPHPGHYWPVHNELAFRPPISNHPAP...


'MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPSSGLGLQPETHLPEGSLKRCCVLGLPPTSPASSSPCASSDVTSIIRSSQTSLVTCVNGLRSPPLTGDLGGPSKRARPGPASTDSHEGSLQLEACRKASFLKQEPADEFSELFGPHQQGLPPPYPLSQLPPGPSLGGLGLGLAGRVVAGRQACRWVDCCAAYEQQEELVRHIEKSHIDQRKGEDFTCFWAGCVRRYKPFNARYKLLIHMRVHSGEKPNKCMFEGCSKAFSRLENLKIHLRSHTGEKPYLCQHPGCQKAFSNSSDRAKHQRTHLDTKPYACQIPGCSKRYTDPSSLRKHVKAHSAKEQQVRKKLHAGPDTEADVLTECLVLQQLHTSTQLAASDGKGGCGLGQELLPGVYPGSITPHNGLASGLLPPAHDVPSRHHPLDATTSSHHHLSPLPMAESTRDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLPSKPSYPPFQSPPPPPLPSPQGYQGSFHSIQSCFPYGDCYRMAEPAAGGDGLVGETHGFNPLRPNGYHSLSTPLPATGYEALAEASCPTALPQQPSEDVVSSGPEDCGFFPNGAFDHCLGHIPSIYTDT*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
214,GLIS1,447,620,Q8NBF1,[['Q8NBF1']],,Q8NBF1,"PMID: 12042312, Soto",TF,AESTRDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...,


447
620


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.1.53506627C>T(p.Asp619Asn),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,619,AESTRDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...
1,g.1.53506630T>C(p.Thr618Ala),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,618,AESTRDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...
2,g.1.53506633A>G(p.Tyr617His),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,617,AESTRDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...
3,g.1.53506638G>A(p.Ser615Phe),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,615,AESTRDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...
4,g.1.53506641G>A(p.Pro614Leu),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,614,AESTRDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...
...,...,...,...,...
70,g.1.53514628T>C(p.Asp452Gly),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,452,AESTRGGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...
71,g.1.53514631C>T(p.Arg451Gln),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,451,AESTQDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...
72,g.1.53514631C>A(p.Arg451Leu),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,451,AESTLDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...
73,g.1.53514632G>A(p.Arg451Trp),MAEARTSLSAHCRGPLATGLHPDLDLPGRSLATPAPSCYLLGSEPS...,451,AESTWDGLGPGLLSPIVSPLKGLGPPPLPPSSQSHSPGGQPFPTLP...


'MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
267,IKZF1,284,365,Q13422,[['Q13422']],,Q13422,"PMID: 8895580, Soto",TF,GDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRP...,


284
365


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.7.50399920G>A(p.Asp285Asn),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,285,GNKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRP...
1,g.7.50399939C>G(p.Thr291Arg),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,291,GDKGLSDRPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRP...
2,g.7.50399941C>T(p.Pro292Ser),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,292,GDKGLSDTSYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRP...
3,g.7.50399953A>C(p.Ser296Arg),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,296,GDKGLSDTPYDSRASYEKENEMMKSHVMDQAINNAINYLGAESLRP...
4,g.7.50399954G>T(p.Ser296Ile),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,296,GDKGLSDTPYDSIASYEKENEMMKSHVMDQAINNAINYLGAESLRP...
5,g.7.50399956G>A(p.Ala297Thr),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,297,GDKGLSDTPYDSSTSYEKENEMMKSHVMDQAINNAINYLGAESLRP...
6,g.7.50399994C>A(p.His309Gln),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,309,GDKGLSDTPYDSSASYEKENEMMKSQVMDQAINNAINYLGAESLRP...
7,g.7.50400017A>G(p.Asn317Ser),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,317,GDKGLSDTPYDSSASYEKENEMMKSHVMDQAINSAINYLGAESLRP...
8,g.7.50400020C>T(p.Ala318Val),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,318,GDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNVINYLGAESLRP...
9,g.7.50400053C>A(p.Pro329Gln),MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQS...,329,GDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRQ...


'MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEERRSSAAGKSEPLEKQPKTEPSTSASSGCGSDSGYGNSSESLEEKDIQMELQGSELWKRFHDIGTEMIITKAGRRMFPSVRVKVKGLDPGKQYHVAIDVVPVDSKRYRYVYHSSQWMVAGNTDHLCIIPRFYVHPDSPCSGETWMRQIISFDRMKLTNNEMDDKGHIILQSMHKYKPRVHVIEQGSSVDLSQIQSLPTEGVKTFSFKETEFTTVTAYQNQQITKLKIERNPFAKGFRDTGRNRGVLDGLLETYPWRPSFTLDFKTFGADTQSGSSGSSPVTSSGGAPSPLNSLLSPLCFSPMFHLPTSSLGMPCPEAYLPNVNLPLCYKICPTNFWQQQPLVLPAPERLASSNSSQSLAPLMMEVPMLSSLGVTNSKSGSSEDSSDQYLQAPNSTNQMLYGLQSPGNIFLPNSITPEALSCSFHPSYDFYRYNFSMPSRLISGSNHLKVNDDSQVSFGEGKCNHVHWYPAINHYL*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
622,TBX22,402,481,Q9Y458,[['Q9Y458']],ENST00000373294,Q9Y458,DelRosso et al.,TF,SLAPLMMEVPMLSSLGVTNSKSGSSEDSSDQYLQAPNSTNQMLYGL...,


402
481


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
42,g.X.80030758G>A(p.Ala404Thr),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,404,SLTPLMMEVPMLSSLGVTNSKSGSSEDSSDQYLQAPNSTNQMLYGL...
43,g.X.80030771T>C(p.Met408Thr),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,408,SLAPLMTEVPMLSSLGVTNSKSGSSEDSSDQYLQAPNSTNQMLYGL...
44,g.X.80030772G>A(p.Met408Ile),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,408,SLAPLMIEVPMLSSLGVTNSKSGSSEDSSDQYLQAPNSTNQMLYGL...
45,g.X.80030774A>G(p.Glu409Gly),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,409,SLAPLMMGVPMLSSLGVTNSKSGSSEDSSDQYLQAPNSTNQMLYGL...
46,g.X.80030798G>T(p.Gly417Val),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,417,SLAPLMMEVPMLSSLVVTNSKSGSSEDSSDQYLQAPNSTNQMLYGL...
47,g.X.80030818G>A(p.Gly424Ser),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,424,SLAPLMMEVPMLSSLGVTNSKSSSSEDSSDQYLQAPNSTNQMLYGL...
48,g.X.80030837G>A(p.Ser430Asn),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,430,SLAPLMMEVPMLSSLGVTNSKSGSSEDSNDQYLQAPNSTNQMLYGL...
49,g.X.80030846A>C(p.Tyr433Ser),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,433,SLAPLMMEVPMLSSLGVTNSKSGSSEDSSDQSLQAPNSTNQMLYGL...
50,g.X.80030863T>C(p.Ser439Pro),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,439,SLAPLMMEVPMLSSLGVTNSKSGSSEDSSDQYLQAPNPTNQMLYGL...
51,g.X.80030870A>G(p.Asn441Ser),MALSSRARAFSVEALVGRPSKRKLQDPIQAEQPELREKKGGEEEEE...,441,SLAPLMMEVPMLSSLGVTNSKSGSSEDSSDQYLQAPNSTSQMLYGL...


'MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEEIASYLITFEKHDEWLSCAPKTRPQNGSIILYNRKKVKYRKDGYLWKKRKDGKTTREDHMKLKVQGMECLYGCYVHSSIVPTFHRRCYWLLQNPDIVLVHYLNVPALEDCGKGCSPIFCSISSDRREWLKWSREELLGQLKPMFHGIKWSCGNGTEEFSVEHLVQQILDTHPTKPAPRTHACLCSGGLGSGSLTHKCSSTKHRIISPKVEPRALTLTSIPHAHPPEPPPLIAPLPPELPKAHTSPSSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTGLEQRAGGLTPTRHLAPQADPRPSMSLAVVVGTEPSAPPAPPSPAFDPDRFLNSPQRGQTYGGGQGVSPDFPEAEAAHTPCSALEPAAALEPQAAARGPPPQSVAGGRRGNCFFIQDDDSGEELKGHGAAPPIPSPPPSPPPSPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDEAPSIPAPTPQLSPALSTITDFSPEWSYPEGGVKVLITGPWTEAAEHYSCVFDHIAVPASLVQPGVLRCYCPAHEVGLVSLQVAGREGPLSASVLFEYRARRFLSLPSTQLDWLSLDDNQFRMSILERLEQMEKRMAEIAAAGQVPCQGPDAPPVQDEGQGPGFEARVVVLVESMIPRSTWKGPERLAHGSPFRGMSLLHLAAAQGYARLIETLSQWRSVETGSLDLEQEVDPLNVDHFSCTPLMWACALGHLEAAVLLFRWNRQALSIPDSLGRLPLSVAHSRGHVRLARCLEELQRQEPSVEPPFALSPPSSSPDTGLSSVSSPSELSDGTFSVTSAYSSAPDGSPPPAPLPASEMTMEDMAPGQLSSGVPEAPLLLMDYEATNSKGPLSSLPALPPASDDGAAPEDADSPQAVDVIPVDMISLAKQIIEATPERIKREDFVGLPEAGASMRERTGAVGLSETMSWLASYLENVDHFPSST

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
48,CAMTA2,285,468,O94983,"[['O94983', 'O94983-2'], ['O94983', 'O94983-2']]",nan / ENST00000348066,O94983 / O94983,"PMID: 16678093, Soto / DelRosso et al.",TF,KAHTSPSSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTG...,
49,CAMTA2,472,581,O94983,"[['O94983', 'O94983-2']]",ENST00000348066,O94983,DelRosso et al.,TF,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...,


285
468


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
23,g.17.4979920A>T(p.Ser468Thr),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,468,KAHTSPSSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTG...
24,g.17.4979920A>G(p.Ser468Pro),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,468,KAHTSPSSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTG...
25,g.17.4979920A>C(p.Ser468Ala),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,468,KAHTSPSSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTG...
26,g.17.4979929G>T(p.Pro465Thr),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,465,KAHTSPSSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTG...
27,g.17.4979935G>T(p.Pro463Thr),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,463,KAHTSPSSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTG...
...,...,...,...,...
85,g.17.4980411G>A(p.Pro304Leu),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,304,KAHTSPSSSSSSSSSGFAELLEIRPSPPTSRGGSSRGGTAILLLTG...
86,g.17.4980418C>A(p.Ala302Ser),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,302,KAHTSPSSSSSSSSSGFSEPLEIRPSPPTSRGGSSRGGTAILLLTG...
87,g.17.4980451A>G(p.Ser291Pro),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,291,KAHTSPPSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTG...
88,g.17.4980453G>A(p.Pro290Leu),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,290,KAHTSLSSSSSSSSSGFAEPLEIRPSPPTSRGGSSRGGTAILLLTG...


472
581


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
90,g.17.4978536A>T(p.Val578Asp),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,578,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
91,g.17.4978537C>T(p.Val578Ile),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,578,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
92,g.17.4978540G>C(p.Leu577Val),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,577,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
93,g.17.4978555C>T(p.Ala572Thr),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,572,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
94,g.17.4978558T>G(p.Ile571Leu),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,571,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
95,g.17.4978558T>C(p.Ile571Val),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,571,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
96,g.17.4978573A>T(p.Cys566Ser),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,566,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
97,g.17.4978581T>C(p.His563Arg),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,563,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
98,g.17.4978585C>T(p.Glu562Lys),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,562,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...
99,g.17.4978588C>T(p.Ala561Thr),MNTKDTTEVAENSHHLKIFLPKKLLECLPRCPLLPPERLRWNTNEE...,561,SPAPLEPSSRVGRGEALFGGPVGASELEPFSLSSFPDLMGELISDE...


'MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKRRSGMDFDDEDGEGPSKFSRENHSEIERRRRNKMTQYITELSDMVPTCSALARKPDKLTILRMAVSHMKSMRGTGNKSTDGAYKPSFLTEQELKHLILEAADGFLFVVAAETGRVIYVSDSVTPVLNQPQSEWFGSTLYEQVHPDDVEKLREQLCTSENSMTGRILDLKTGTVKKEGQQSSMRMCMGSRRSFICRMRCGNAPLDHLPLNRITTMRKRFRNGLGPVKEGEAQYAVVHCTGYIKAWPPAGMTIPEEDADVGQGSKYCLVAIGRLQVTSSPVCMDMNGMSVPTEFLSRHNSDGIITFVDPRCISVIGYQPQDLLGKDILEFCHPEDQSHLRESFQQVVKLKGQVLSVMYRFRTKNREWMLIRTSSFTFQNPYSDEIEYIICTNTNVKQLQQQQAELEVHQRDGLSSYDLSQVPVPNLPAGVHEAGKSVEKADAIFSQERDPRFAEMFAGISASEKKMMSSASAAGTQQIYSQGSPFPSGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQSQVAWTGSRPPFPGQQIPSQSSKTQSSPFGIGTSHTYPADPSSYSPLSSPATSSPSGNAYSSLANRTPGFAESGQSSGQFQGRPSEVWSQWQSQHHGQQSGEQHSHQQPGQTEVFQDMLPMPGDPTQGTGNYNIEDFADLGMFPPFSE*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
20,ARNT2,524,717,Q9HBZ2,[['Q9HBZ2']],,Q9HBZ2,"PMID: 8657146, Soto",TF,QGSPFPSGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQ...,


524
717


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.15.80576924A>C(p.Gln524His),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,524,HGSPFPSGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQ...
1,g.15.80576938C>G(p.Pro529Arg),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,529,QGSPFRSGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQ...
2,g.15.80576940T>C(p.Ser530Pro),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,530,QGSPFPPGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQ...
3,g.15.80576941C>A(p.Ser530Tyr),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,530,QGSPFPYGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQ...
4,g.15.80576952G>A(p.Gly534Arg),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,534,QGSPFPSGHSRKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQ...
5,g.15.80580433G>A(p.Gly546Arg),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,546,QGSPFPSGHSGKAFSSSVVHVPRVNDIQSSSSTGQNMSQISRQLNQ...
6,g.15.80580436G>C(p.Val547Leu),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,547,QGSPFPSGHSGKAFSSSVVHVPGLNDIQSSSSTGQNMSQISRQLNQ...
7,g.15.80580457T>C(p.Ser554Pro),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,554,QGSPFPSGHSGKAFSSSVVHVPGVNDIQSSPSTGQNMSQISRQLNQ...
8,g.15.80580464C>T(p.Thr556Met),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,556,QGSPFPSGHSGKAFSSSVVHVPGVNDIQSSSSMGQNMSQISRQLNQ...
9,g.15.80580491G>A(p.Arg565Gln),MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKR...,565,QGSPFPSGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISQQLNQ...


'MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLPFSVEALMSDKKPPKEASPLPAESASAGATLRPLLLSGHGAREAHSPGPLVKPFETASVKSENSEDGAAWMQEPGRYSPPPRHMSPTTCTLRKHKTNRKPRTPFTTSQLLALERKFRQKQYLSIAERAEFSSSLNLTETQVKIWFQNRRAKAKRLQEAELEKLKMAAKPMLPSSFSLPFPISSPLQAASIYGASYPFHRPVLPIPPVGLYATPVGYGMYHLS*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
346,MSX2,234,267,P35548,[['P35548']],,P35548,"PMID: 19338779, Soto",TF,SIYGASYPFHRPVLPIPPVGLYATPVGYGMYHLS,


234
267


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
15,g.5.174729482A>G(p.Ile235Val),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,235,SVYGASYPFHRPVLPIPPVGLYATPVGYGMYHLS
16,g.5.174729482A>T(p.Ile235Leu),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,235,SLYGASYPFHRPVLPIPPVGLYATPVGYGMYHLS
17,g.5.174729483T>C(p.Ile235Thr),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,235,STYGASYPFHRPVLPIPPVGLYATPVGYGMYHLS
18,g.5.174729491G>A(p.Ala238Thr),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,238,SIYGTSYPFHRPVLPIPPVGLYATPVGYGMYHLS
19,g.5.174729501C>T(p.Pro241Leu),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,241,SIYGASYLFHRPVLPIPPVGLYATPVGYGMYHLS
20,g.5.174729505C>A(p.Phe242Leu),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,242,SIYGASYPLHRPVLPIPPVGLYATPVGYGMYHLS
21,g.5.174729515G>C(p.Val246Leu),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,246,SIYGASYPFHRPLLPIPPVGLYATPVGYGMYHLS
22,g.5.174729524A>G(p.Ile249Val),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,249,SIYGASYPFHRPVLPVPPVGLYATPVGYGMYHLS
23,g.5.174729530C>T(p.Pro251Ser),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,251,SIYGASYPFHRPVLPIPSVGLYATPVGYGMYHLS
24,g.5.174729537G>T(p.Gly253Val),MASPSKGNDLFSPDEEGPAVVAGPGPGPGGAEGAAEERRVKVSSLP...,253,SIYGASYPFHRPVLPIPPVVLYATPVGYGMYHLS


'MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPFAFIHKIRPIAEQTGICKVRPPPDWQPPFACDVDKLHFTPRIQRLNELEAQTRVKLNFLDQIAKYWELQGSTLKIPHVERKILDLFQLNKLVAEEGGFAVVCKDRKWTKIATKMGFAPGKAVGSHIRGHYERILNPYNLFLSGDSLRCLQKPNLTTDTKDKEYKPHDIPQRQSVQPSETCPPARRAKRMRAEAMNIKIEPEETTEARTHNLRRRMGCPTPKCENEKEMKSSIKQEPIERKDYIVENEKEKPKSRSKKATNAVDLYVCLLCGSGNDEDRLLLCDGCDDSYHTFCLIPPLHDVPKGDWRCPKCLAQECSKPQEAFGFEQAARDYTLRTFGEMADAFKSDYFNMPVHMVPTELVEKEFWRLVSTIEEDVTVEYGADIASKEFGSGFPVRDGKIKLSPEEEEYLDSGWNLNNMPVMEQSVLAHITADICGMKLPWLYVGMCFSSFCWHIEDHWSYSINYLHWGEPKTWYGVPGYAAEQLENVMKKLAPELFVSQPDLLHQLVTIMNPNTLMTHEVPVYRTNQCAGEFVITFPRAYHSGFNQGFNFAEAVNFCTVDWLPLGRQCVEHYRLLHRYCVFSHDEMICKMASKADVLDVVVASTVQKDMAIMIEDEKALRETVRKLGVIDSERMDFELLPDDERQCVKCKTTCFMSAISCSCKPGLLVCLHHVKELCSCPPYKYKLRYRYTLDDLYPMMNALKLRAESYNEWALNVNEALEAKINKKKSLVSFKALIEESEMKKFPDNDLLRHLRLVTQDAEKCASVAQQLLNGKRQTRYRSGGGKSQNQLTVNELRQFVTQLYALPCVLSQTPLLKDLLNRVEDFQQHSQKLLSEETPSAAELQDLLDVSFEFDVELPQLAEMRIRLEQARWLEEVQQACLDPSSLTLDDMRRLIDLGVGLAPYSAVEKAMARLQELLTVSEHWDDKAKSLLKARPRHSLNSLATAVKEIEE

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
291,KDM5B,1302,1391,Q9UGL1,[['Q9UGL1']],ENST00000367265,Q9UGL1,DelRosso et al.,TF,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...,


1302
1391


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.1.202730918G>T(p.Ser1389Arg),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1389,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
1,g.1.202730928A>G(p.Val1386Ala),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1386,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
2,g.1.202730929C>T(p.Val1386Met),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1386,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
3,g.1.202730932G>T(p.Pro1385Thr),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1385,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
4,g.1.202730932G>A(p.Pro1385Ser),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1385,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
5,g.1.202730934G>A(p.Ser1384Leu),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1384,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
6,g.1.202730940C>T(p.Arg1382Gln),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1382,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
7,g.1.202730942G>C(p.Asp1381Glu),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1381,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
8,g.1.202730949T>A(p.Gln1379Leu),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1379,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...
9,g.1.202730953G>C(p.Gln1378Glu),MEAATTLHPGPRPALPLGGPGPLGEFLPPPECPVFEPSWEEFADPF...,1378,NKVSQPPGTTSFSLPDDWDNRTSYLHSPFSTGRSCIPLHGVSPEVN...


'MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDLTNTEITATTSLPSFSTFMDNYSTGYDVKPPCLYQMPLSGQQSSIKVEDIQMHNYQQHSHLPPQSEEMMPHSGSVYYKPSSPPTPTTPGFQVQHSPMWDDPGSLHNFHQNYVATTHMIEQRKTPVSRLSLFSFKQSPPGTPVSSCQMRFDGPLHVPMNPEPAGSHHVVDGQTFAVPNPIRKPASMGFPGLQIGHASQLLDTQVPSPPSRGSPSNEGLCAVCGDNAACQHYGVRTCEGCKGFFKRTVQKNAKYVCLANKNCPVDKRRRNRCQYCRFQKCLAVGMVKEVVRTDSLKGRRGRLPSKPKSPQEPSPPSPPVSLISALVRAHVDSNPAMTSLDYSRFQANPDYQMSGDDTQHIQQFYDLLTGSMEIIRGWAEKIPGFADLPKADQDLLFESAFLELFVLRLAYRSNPVEGKLIFCNGVVLHRLQCVRGFGEWIDSIVEFSSNLQNMNIDISAFSCIAALAMVTERHGLKEPKRVEELQNKIVNCLKDHVTFNNGGLNRPNYLSKLLGKLPELRTLCTQGLQRIFYLKLEDLVPPPAIIDKLFLDTLPF*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
431,NR4A2,1,91,P43354,"[['P43354'], ['P43354']]",nan / ENST00000339562,P43354 / P43354,"PMID: 14672718, Soto / DelRosso et al.",TF,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,
432,NR4A2,584,598,P43354,[['P43354']],,P43354,"PMID: 9572393, Soto",TF,PPAIIDKLFLDTLPF,


1
91


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
9,g.2.156329930T>C(p.Gln86Arg),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,86,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...
10,g.2.156329942G>C(p.Pro82Arg),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,82,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...
11,g.2.156329971G>T(p.Asp72Glu),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,72,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...
12,g.2.156329979C>T(p.Gly70Ser),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,70,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...
13,g.2.156329997T>C(p.Met64Val),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,64,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...
14,g.2.156329999A>C(p.Phe63Cys),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,63,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...
15,g.2.156330024T>C(p.Thr55Ala),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,55,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...
16,g.2.156330090C>T(p.Asp33Asn),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSNFLTPEFVKFSMDL...,33,MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSNFLTPEFVKFSMDL...
17,g.2.156330117A>T(p.Tyr24Asn),MPCVQAQYGSSPQGASPASQSYSNHSSGEYSSDFLTPEFVKFSMDL...,24,MPCVQAQYGSSPQGASPASQSYSNHSSGEYSSDFLTPEFVKFSMDL...
18,g.2.156330138G>C(p.Pro17Ala),MPCVQAQYGSSPQGASAASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,17,MPCVQAQYGSSPQGASAASQSYSYHSSGEYSSDFLTPEFVKFSMDL...


584
598


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
22,g.2.156325772T>C(p.Lys590Arg),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,590,PPAIIDRLFLDTLPF
23,g.2.156325775T>C(p.Asp589Gly),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,589,PPAIIGKLFLDTLPF
24,g.2.156325781A>T(p.Ile587Lys),MPCVQAQYGSSPQGASPASQSYSYHSSGEYSSDFLTPEFVKFSMDL...,587,PPAKIDKLFLDTLPF


'MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALIIFNSTNKLFQYASTDMDKVLLKYTEYNEPHESRTNSDIVETLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQRLCAVPPPNFEMPVSIPVSSHNSLVYSNPVSSLGNPNLLPLAHPSLQRNSMSPGVTHRPPSAGNTGGLMGGDLTSGAGTSAGNGYGNPRNSPGLLVSPGNLNKNMQAKSPPPMNLGMNNRKPDLRVLIPPGSKNTMPSVSEDVDLLLNQRINNSQSAQSLATPVVSVATPTLPGQGMGGYPSAISTTYGTEYSLSSADLSSLSGFNTASALHLGSVTGWQQQHLHNMPPSALSQLGACTSTHLSQSSNLSLPSTQSLNIKSEPVSPPRDRTTTPSRYPQHTRHEAGRSPVDSLSSCSSSYDGSDREDHRNEFHSPIGLTRPSPDERESPSVKRMRLSEGWAT*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
329,MEF2C,87,473,Q06413,[['Q06413']],,Q06413,"PMID: 9891782, 11904443, Soto",TF,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...,


87
473


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
1,g.5.88722627G>A(p.Leu467Phe),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,467,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
2,g.5.88722689G>T(p.Pro446His),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,446,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
3,g.5.88722695T>G(p.His444Pro),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,444,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
4,g.5.88722728C>T(p.Gly433Glu),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,433,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
5,g.5.88722764A>G(p.Val421Ala),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,421,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
6,g.5.88722779G>A(p.Ala416Val),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,416,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
7,g.5.88722791G>A(p.Thr412Met),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,412,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
8,g.5.88722819T>C(p.Thr403Ala),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,403,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
9,g.5.88722824C>T(p.Arg401His),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,401,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...
10,g.5.88722833G>A(p.Pro398Leu),MGRKKIQITRIMDERNRQVTFTKRKFGLMKKAYELSVLCDCEIALI...,398,TLRKKGLNGCDSPDPDADDSVGHSPESEDKYRKINEDIDLMISRQR...


'MTGKLAEKLPVTMSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHYNQMATENVMDIGLTNEKPNPELSYSGSFQPAPGNKTVTYLGKFAFDSPSNWCQDNIISLMSAGILGVPPASGALSTQTSTASMVQPPQGDVEAMYPALPPYSNCGDLYSEPVSFHDPQGNPGLAYSPQDYQSAKPALDSNLFPMIPDYNLYHHPNDMGSIPEHKPFQGMDPIRVNPPPITPLETIKAFKDKQIHPGFGSLPQPPLTLKPIRPRKYPNRPSKTPLHERPHACPAEGCDRRFSRSDELTRHLRIHTGHKPFQCRICMRSFSRSDHLTTHIRTHTGEKPFACEFCGRKFARSDERKRHAKIHLKQKEKKAEKGGAPSASSAPPVSLAPVVTTCA*'

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
107,EGR3,13,214,Q06889,"[['Q06889'], ['Q06889'], ['Q06889']]",nan / ENST00000317216 / nan,Q06889 / Q06889 / Q06889,"PMID: 10987814, Soto / DelRosso et al. / PMID:...",TF,MSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHYNQMATENVMDIG...,


13
214


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.8.22691018G>C(p.Pro207Ala),MTGKLAEKLPVTMSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHY...,207,MSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHYNQMATENVMDIG...
1,g.8.22691019A>C(p.Ile206Met),MTGKLAEKLPVTMSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHY...,206,MSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHYNQMATENVMDIG...
2,g.8.22691021T>A(p.Ile206Phe),MTGKLAEKLPVTMSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHY...,206,MSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHYNQMATENVMDIG...
3,g.8.22691023G>A(p.Ser205Phe),MTGKLAEKLPVTMSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHY...,205,MSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHYNQMATENVMDIG...
4,g.8.22691024A>C(p.Ser205Ala),MTGKLAEKLPVTMSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHY...,205,MSSLLNQLPDNLYPEEIPSALNLFSGSSDSVVHYNQMATENVMDIG...
...,...,...,...,...
56,g.8.22692854T>C(p.Ser31Gly),MTGKLAEKLPVTMSSLLNQLPDNLYPEEIPGALNLFSGSSDSVVHY...,31,MSSLLNQLPDNLYPEEIPGALNLFSGSSDSVVHYNQMATENVMDIG...
57,g.8.22692866C>T(p.Glu27Lys),MTGKLAEKLPVTMSSLLNQLPDNLYPKEIPSALNLFSGSSDSVVHY...,27,MSSLLNQLPDNLYPKEIPSALNLFSGSSDSVVHYNQMATENVMDIG...
58,g.8.22692869G>A(p.Pro26Ser),MTGKLAEKLPVTMSSLLNQLPDNLYSEEIPSALNLFSGSSDSVVHY...,26,MSSLLNQLPDNLYSEEIPSALNLFSGSSDSVVHYNQMATENVMDIG...
59,g.8.22692891G>C(p.Asn18Lys),MTGKLAEKLPVTMSSLLKQLPDNLYPEEIPSALNLFSGSSDSVVHY...,18,MSSLLKQLPDNLYPEEIPSALNLFSGSSDSVVHYNQMATENVMDIG...


'MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKYARHRSVYGCPLAKKRKTQDKQPQEPAPKRKPFAVKADSSSVDECDDSDGTEDMDEKEEDEGEEYSEDNDEPGDEDEEDEEGDREEEEEIEEEDEDDDEDGEDVEDEEEEEEEEEEEEEEEENEDHQMNCHNTRIMQDTEKDDNNNDEYDNYDELVAKSLLNLGKIAEDAAYRARTESEMNSNTSNSLEDDSDKNENLGRKSELSLDLDSDVVRETVDSLKLLAQGHGVVLSENMNDRNYADSMSQQDSRNMNYVMLGKPMNNGLMEKMVEESDEEVCLSSLECLRNQCFDLARKLSETNPQERNPQQNMNIRQHVRPEEDFPGRTPDRNYSDMLNLMRLEEQLSPRSRVFASCAKEDGCHERDDDTTSVNSDRSEEVFDMTKGNLTLLEKAIALETERAKAMREKMAMEAGRRDNMRSYEDQSPRQLPGEDRKPKSSDSHVKKPYYGKDPSRTEKKESKCPTPGCDGTGHVTGLYPHHRSLSGCPHKDRVPPEILAMHESVLKCPTPGCTGRGHVNSNRNSHRSLSGCPIAAAEKLAKAQEKHQSCDVSKSSQASDRVLRPMCFVKQLEIPQYGYRNNVPTTTPRSNLAKELEKYSKTSFEYNSYDNHTYGKRAIAPKVQTRDISPKGYDDAKRYCKDPSPSSSSTSSYAPSSSSNLSCGGGSSASSTCSKSSFDYTHDMEAAHMAATAILNLSTRCREMPQNLSTKPQDLCATRNPDMEVDENGTLDLSMNKQRPRDSCCPILTPLEPMSPQQQAVMNNRCFQLGEGDCWDLPVDYTKMKPRRIDEDESKDITPEDLDPFQEALEERRYPGEVTIPSPKPKYPQCKESKKDLITLSGCPLADKSIRSMLATSSQELKCPTPGCDGSGHITGNYASHRSLSGCPRAKKSGIRIAQSKEDKEDQEPIRCPVPGCDGQGHITGKYASHRSASGCPLAAKRQKDGYLNGSQFSWKS

Unnamed: 0,Gene,Start,End,uniprotID,Matching Isoforms,Canonical Transcript ID,orig_uniprotID,Reference,TileType,ProteinRegionSeq,Notes
367,MYT1L,152,425,Q9UL68,"[['Q9UL68-4'], ['Q9UL68-4'], ['Q9UL68', 'Q9UL6...",ENST00000399161 / ENST00000399161 / nan / ENST...,Q9UL68 / Q9UL68 / Q9UL68 / Q9UL68,DelRosso et al. / DelRosso et al. / PMID: 2929...,TF,EEEEEEEEEEEEEEEENEDHQMNCHNTRIMQDTEKDDNNNDEYDNY...,


152
425


Unnamed: 0,name,TF_seq,prot_pos,AD_seq
0,g.2.1922496C>T(p.Asp425Asn),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,425,EEEEEEEEEEEEEEEENEDHQMNCHNTRIMQDTEKDDNNNDEYDNY...
1,g.2.1922502C>A(p.Val423Leu),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,423,EEEEEEEEEEEEEEEENEDHQMNCHNTRIMQDTEKDDNNNDEYDNY...
2,g.2.1922548C>A(p.Glu407Asp),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,407,EEEEEEEEEEEEEEEENEDHQMNCHNTRIMQDTEKDDNNNDEYDNY...
3,g.2.1922558C>A(p.Gly404Val),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,404,EEEEEEEEEEEEEEEENEDHQMNCHNTRIMQDTEKDDNNNDEYDNY...
4,g.2.1922558C>G(p.Gly404Ala),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,404,EEEEEEEEEEEEEEEENEDHQMNCHNTRIMQDTEKDDNNNDEYDNY...
...,...,...,...,...
64,g.2.1923226C>T(p.Met181Ile),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,181,EEEEEEEEEEEEEEEENEDHQMNCHNTRIIQDTEKDDNNNDEYDNY...
65,g.2.1923227A>C(p.Met181Arg),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,181,EEEEEEEEEEEEEEEENEDHQMNCHNTRIRQDTEKDDNNNDEYDNY...
66,g.2.1923240T>C(p.Asn177Asp),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,177,EEEEEEEEEEEEEEEENEDHQMNCHDTRIMQDTEKDDNNNDEYDNY...
67,g.2.1923242T>C(p.His176Arg),MEVDTEEKRHRTRSKGVRVPVEPAIQELFSCPTPGCDGSGHVSGKY...,176,EEEEEEEEEEEEEEEENEDHQMNCRNTRIMQDTEKDDNNNDEYDNY...


In [8]:
save_variant_fasta("Q9HBZ2").loc[80576921]

'MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKRRSGMDFDDEDGEGPSKFSRENHSEIERRRRNKMTQYITELSDMVPTCSALARKPDKLTILRMAVSHMKSMRGTGNKSTDGAYKPSFLTEQELKHLILEAADGFLFVVAAETGRVIYVSDSVTPVLNQPQSEWFGSTLYEQVHPDDVEKLREQLCTSENSMTGRILDLKTGTVKKEGQQSSMRMCMGSRRSFICRMRCGNAPLDHLPLNRITTMRKRFRNGLGPVKEGEAQYAVVHCTGYIKAWPPAGMTIPEEDADVGQGSKYCLVAIGRLQVTSSPVCMDMNGMSVPTEFLSRHNSDGIITFVDPRCISVIGYQPQDLLGKDILEFCHPEDQSHLRESFQQVVKLKGQVLSVMYRFRTKNREWMLIRTSSFTFQNPYSDEIEYIICTNTNVKQLQQQQAELEVHQRDGLSSYDLSQVPVPNLPAGVHEAGKSVEKADAIFSQERDPRFAEMFAGISASEKKMMSSASAAGTQQIYSQGSPFPSGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQSQVAWTGSRPPFPGQQIPSQSSKTQSSPFGIGTSHTYPADPSSYSPLSSPATSSPSGNAYSSLANRTPGFAESGQSSGQFQGRPSEVWSQWQSQHHGQQSGEQHSHQQPGQTEVFQDMLPMPGDPTQGTGNYNIEDFADLGMFPPFSE*'

nt                 C
pred_prot_pos    523
Name: 80576921, dtype: object

In [9]:
save_variant_fasta("Q9HBZ2").loc[80593695]

'MATPAAVNPPEMASDIPGSVTLPVAPMAATGQVRMAGAMPARGGKRRSGMDFDDEDGEGPSKFSRENHSEIERRRRNKMTQYITELSDMVPTCSALARKPDKLTILRMAVSHMKSMRGTGNKSTDGAYKPSFLTEQELKHLILEAADGFLFVVAAETGRVIYVSDSVTPVLNQPQSEWFGSTLYEQVHPDDVEKLREQLCTSENSMTGRILDLKTGTVKKEGQQSSMRMCMGSRRSFICRMRCGNAPLDHLPLNRITTMRKRFRNGLGPVKEGEAQYAVVHCTGYIKAWPPAGMTIPEEDADVGQGSKYCLVAIGRLQVTSSPVCMDMNGMSVPTEFLSRHNSDGIITFVDPRCISVIGYQPQDLLGKDILEFCHPEDQSHLRESFQQVVKLKGQVLSVMYRFRTKNREWMLIRTSSFTFQNPYSDEIEYIICTNTNVKQLQQQQAELEVHQRDGLSSYDLSQVPVPNLPAGVHEAGKSVEKADAIFSQERDPRFAEMFAGISASEKKMMSSASAAGTQQIYSQGSPFPSGHSGKAFSSSVVHVPGVNDIQSSSSTGQNMSQISRQLNQSQVAWTGSRPPFPGQQIPSQSSKTQSSPFGIGTSHTYPADPSSYSPLSSPATSSPSGNAYSSLANRTPGFAESGQSSGQFQGRPSEVWSQWQSQHHGQQSGEQHSHQQPGQTEVFQDMLPMPGDPTQGTGNYNIEDFADLGMFPPFSE*'

nt                 G
pred_prot_pos    717
Name: 80593695, dtype: object

In [10]:
KMTA2_nt_df = save_variant_fasta("Q03164")
KMTA2_nt_df

'MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPVGGGGPGAPPSPPAVAAAAAAAGSSGAGVPGGAAAASAASSSSASSSSSSSSSASSGPALLRVGPGFDAALQVSAAIGTNLRRFRAVFGESGGGGGSGEDEQFLGFGSDEEVRVRSPTRSPSVKTSPRKPRGRPRSGSDRNSAILSDPSVFSPLNKSETKSGDKIKKKDSKSIEKKRGRPPTFPGVKIKITHGKDISELPKGNKEDSLKKIKRTPSATFQQATKIKKLRAGKLSPLKSKFKTGKLQIGRKGVQIVRRRGRPPSTERIKTPSGLLINSELEKPQKVRKDKEGTPPLTKEDKTVVRQSPRRIKPVRIIPSSKRTDATIAKQLLQRAKKGAQKKIEKEAAQLQGRKVKTQVKNIRQFIMPVVSAISSRIIKTPRRFIEDEDYDPPIKIARLESTPNSRFSAPSCGSSEKSSAASQHSSQMSSDSSRSSSPSVDTSTDSQASEEIQVLPEERSDTPEVHPPLPISQSPENESNDRRSRRYSVSERSFGSRTTKKLSTLQSAPQQQTSSSPPPPLLTPPPPLQPASSISDHTPWLMPPTIPLASPFLPASTAPMQGKRKSILREPTFRWTSLKHSRSEPQYFSSAKYAKEGLIRKPIFDNFRPPPLTPEDVGFASGFSASGTAASARLFSPLHSGTRFDMHKRSPLLRAPRFTPSEAHSRIFESVTLPSNRTSAGTSSSGVSNRKRKRKVFSPIRSEPRSPSHSMRTRSGRLSSSELSPLTPPSSVSSSLSISVSPLATSALNPTFTFPSHSLTQSGESAEKNQRPRKQTSAPAEPFSSSSPTPLFPWFTPGSQTERGRNKDKAPEELSKDRDADKSVEKDKSRERDREREKENKRESRKEKRKKGSEIQSSSALYPVGRVSKEKVVGEDVATSSSAKKATGRKKSSSHDSGTDITSVTLGDTTAVKTKILIKKGRGNLEKTNLDLGPTAPSLEKEKTLCLSTPSSSTV

Unnamed: 0_level_0,nt,pred_prot_pos
gen_pos,Unnamed: 1_level_1,Unnamed: 2_level_1
118436513,A,1
118436514,T,1
118436515,G,1
118436516,G,2
118436517,C,2
...,...,...
118522168,A,3969
118522169,C,3969
118522170,T,3970
118522171,A,3970


In [11]:
KMTA2_nt_df[KMTA2_nt_df["pred_prot_pos"] == 2782]

Unnamed: 0_level_0,nt,pred_prot_pos
gen_pos,Unnamed: 1_level_1,Unnamed: 2_level_1
118504245,A,2782
118504246,A,2782
118504247,C,2782


In [12]:
KMTA2_nt_df.loc[118504676]

nt                  A
pred_prot_pos    2925
Name: 118504676, dtype: object

In [13]:
KMTA2_nt_df.loc[118505096]

nt                  G
pred_prot_pos    3065
Name: 118505096, dtype: object

In [14]:
# The AD bed file coordinates are wrong for this one ... not sure why
# Will investigate further later

In [76]:
KMTA2_wt_seq = 'MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPVGGGGPGAPPSPPAVAAAAAAAGSSGAGVPGGAAAASAASSSSASSSSSSSSSASSGPALLRVGPGFDAALQVSAAIGTNLRRFRAVFGESGGGGGSGEDEQFLGFGSDEEVRVRSPTRSPSVKTSPRKPRGRPRSGSDRNSAILSDPSVFSPLNKSETKSGDKIKKKDSKSIEKKRGRPPTFPGVKIKITHGKDISELPKGNKEDSLKKIKRTPSATFQQATKIKKLRAGKLSPLKSKFKTGKLQIGRKGVQIVRRRGRPPSTERIKTPSGLLINSELEKPQKVRKDKEGTPPLTKEDKTVVRQSPRRIKPVRIIPSSKRTDATIAKQLLQRAKKGAQKKIEKEAAQLQGRKVKTQVKNIRQFIMPVVSAISSRIIKTPRRFIEDEDYDPPIKIARLESTPNSRFSAPSCGSSEKSSAASQHSSQMSSDSSRSSSPSVDTSTDSQASEEIQVLPEERSDTPEVHPPLPISQSPENESNDRRSRRYSVSERSFGSRTTKKLSTLQSAPQQQTSSSPPPPLLTPPPPLQPASSISDHTPWLMPPTIPLASPFLPASTAPMQGKRKSILREPTFRWTSLKHSRSEPQYFSSAKYAKEGLIRKPIFDNFRPPPLTPEDVGFASGFSASGTAASARLFSPLHSGTRFDMHKRSPLLRAPRFTPSEAHSRIFESVTLPSNRTSAGTSSSGVSNRKRKRKVFSPIRSEPRSPSHSMRTRSGRLSSSELSPLTPPSSVSSSLSISVSPLATSALNPTFTFPSHSLTQSGESAEKNQRPRKQTSAPAEPFSSSSPTPLFPWFTPGSQTERGRNKDKAPEELSKDRDADKSVEKDKSRERDREREKENKRESRKEKRKKGSEIQSSSALYPVGRVSKEKVVGEDVATSSSAKKATGRKKSSSHDSGTDITSVTLGDTTAVKTKILIKKGRGNLEKTNLDLGPTAPSLEKEKTLCLSTPSSSTVKHSTSSIGSMLAQADKLPMTDKRVASLLKKAKAQLCKIEKSKSLKQTDQPKAQGQESDSSETSVRGPRIKHVCRRAAVALGRKRAVFPDDMPTLSALPWEEREKILSSMGNDDKSSIAGSEDAEPLAPPIKPIKPVTRNKAPQEPPVKKGRRSRRCGQCPGCQVPEDCGVCTNCLDKPKFGGRNIKKQCCKMRKCQNLQWMPSKAYLQKQAKAVKKKEKKSKTSEKKDSKESSVVKNVVDSSQKPTPSAREDPAPKKSSSEPPPRKPVEEKSEEGNVSAPGPESKQATTPASRKSSKQVSQPALVIPPQPPTTGPPRKEVPKTTPSEPKKKQPPPPESGPEQSKQKKVAPRPSIPVKQKPKEKEKPPPVNKQENAGTLNILSTLSNGNSSKQKIPADGVHRIRVDFKEDCEAENVWEMGGLGILTSVPITPRVVCFLCASSGHVEFVYCQVCCEPFHKFCLEENERPLEDQLENWCCRRCKFCHVCGRQHQATKQLLECNKCRNSYHPECLGPNYPTKPTKKKKVWICTKCVRCKSCGSTTPGKGWDAQWSHDFSLCHDCAKLFAKGNFCPLCDKCYDDDDYESKMMQCGKCDRWVHSKCENLSDEMYEILSNLPESVAYTCVNCTERHPAEWRLALEKELQISLKQVLTALLNSRTTSHLLRYRQAAKPPDLNPETEESIPSRSSPEGPDPPVLTEVSKQDDQQPLDLEGVKRKMDQGNYTSVLEFSDDIVKIIQAAINSDGGQPEIKKANSMVKSFFIRQMERVFPWFSVKKSRFWEPNKVSSNSGMLPNAVLPPSLDHNYAQWQEREENSHTEQPPLMKKIIPAPKPKGPGEPDSPTPLHPPTPPILSTDRSREDSPELNPPPGIEDNRQCALCLTYGDDSANDAGRLLYIGQNEWTHVNCALWSAEVFEDDDGSLKNVHMAVIRGKQLRCEFCQKPGATVGCCLTSCTSNYHFMCSRAKNCVFLDDKKVYCQRHRDLIKGEVVPENGFEVFRRVFVDFEGISLRRKFLNGLEPENIHMMIGSMTIDCLGILNDLSDCEDKLFPIGYQCSRVYWSTTDARKRCVYTCKIVECRPPVVEPDINSTVEHDENRTIAHSPTSFTESSSKESQNTAEIISPPSPDRPPHSQTSGSCYYHVISKVPRIRTPSYSPTQRSPGCRPLPSAGSPTPTTHEIVTVGDPLLSSGLRSIGSRRHSTSSLSPQRSKLRIMSPMRTGNTYSRNNVSSVSTTGTATDLESSAKVVDHVLGPLNSSTSLGQNTSTSSNLQRTVVTVGNKNSHLDGSSSSEMKQSSASDLVSKSSSLKGEKTKVLSSKSSEGSAHNVAYPGIPKLAPQVHNTTSRELNVSKIGSFAEPSSVSFSSKEALSFPHLHLRGQRNDRDQHTDSTQSANSSPDEDTEVKTLKLSGMSNRSSIINEHMGSSSRDRRQKGKKSCKETFKEKHSSKSFLEPGQVTTGEEGNLKPEFMDEVLTPEYMGQRPCNNVSSDKIGDKGLSMPGVPKAPPMQVEGSAKELQAPRKRTVKVTLTPLKMENESQSKNALKESSPASPLQIESTSPTEPISASENPGDGPVAQPSPNNTSCQDSQSNNYQNLPVQDRNLMLPDGPKPQEDGSFKRRYPRRSARARSNMFFGLTPLYGVRSYGEEDIPFYSSSTGKKRGKRSAEGQVDGADDLSTSDEDDLYYYNFTRTVISSGGEERLASHNLFREEEQCDLPKISQLDGVDDGTESDTSVTATTRKSSQIPKRNGKENGTENLKIDRPEDAGEKEHVTKSSVGHKNEPKMDNCHSVSRVKTQGQDSLEAQLSSLESSRRVHTSTPSDKNLLDTYNTELLKSDSDNNNSDDCGNILPSDIMDFVLKNTPSMQALGESPESSSSELLNLGEGLGLDSNREKDMGLFEVFSQQLPTTEPVDSSVSSSISAEEQFELPLELPSDLSVLTTRSPTVPSQNPSRLAVISDSGEKRVTITEKSVASSESDPALLSPGVDPTPEGHMTPDHFIQGHMDADHISSPPCGSVEQGHGNNQDLTRNSSTPGLQVPVSPTVPIQNQKYVPNSTDSPGPSQISNAAVQTTPPHLKPATEKLIVVNQNMQPLYVLQTLPNGVTQKIQLTSSVSSTPSVMETNTSVLGPMGGGLTLTTGLNPSLPTSQSLFPSASKGLLPMSHHQHLHSFPAATQSSFPPNISNPPSGLLIGVQPPPDPQLLVSESSQRTDLSTTVATPSSGLKKRPISRLQTRKNKKLAPSSTPSNIAPSDVVSNMTLINFTPSQLPNHPSLLDLGSLNTSSHRTVPNIIKRSKSSIMYFEPAPLLPQSVGGTAATAAGTSTISQDTSHLTSGSVSGLASSSSVLNVVSMQTTTTPTSSASVPGHVTLTNPRLLGTPDIGSISNLLIKASQQSLGIQDQPVALPPSSGMFPQLGTSQTPSTAAITAASSICVLPSTQTTGITAASPSGEADEHYQLQHVNQLLASKTGIHSSQRDLDSASGPQVSNFTQTVDAPNSMGLEQNKALSSAVQASPTSPGGSPSSPSSGQRSASPSVPGPTKPKPKTKRFQLPLDKGNGKKHKVSHLRTSSSEAHIPDQETTSLTSGTGTPGAEAEQQDTASVEQSSQKECGQPAGQVAVLPEVQVTQNPANEQESAEPKTVEEEESNFSSPLMLWLQQEQKRKESITEKKPKKGLVFEISSDDGFQICAESIEDAWKSLTDKVQEARSNARLKQLSFAGVNGLRMLGILHDAVVFLIEQLSGAKHCRNYKFRFHKPEEANEPPLNPHGSARAEVHLRKSAFDMFNFLASKHRQPPEYNPNDEEEEEVQLKSARRATSMDLPMPMRFRHLKKTSKEAVGVYRSPIHGRGLFCKRNIDAGEMVIEYAGNVIRSIQTDKREKYYDSKGIGCYMFRIDDSEVVDATMHGNAARFINHSCEPNCYSRVINIDGQKHIVIFAMRKIYRGEELTYDYKFPIEDASNKLPCNCGAKKCRKFLN*'
KMTA2_wt_seq[2781:2921]

'NCHSVSRVKTQGQDSLEAQLSSLESSRRVHTSTPSDKNLLDTYNTELLKSDSDNNNSDDCGNILPSDIMDFVLKNTPSMQALGESPESSSSELLNLGEGLGLDSNREKDMGLFEVFSQQLPTTEPVDSSVSSSISAEEQF'

In [38]:
len(save_variant_fasta("Q03164")["TF_seq"].iloc[0])

2782
2921


Unnamed: 0,name,TF_seq,prot_pos
0,g.118504710C>T(p.Arg2937Trp),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,2937
1,g.118504716C>T(p.Pro2939Ser),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,2939
2,g.118504723T>C(p.Val2941Ala),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,2941
3,g.118504732A>C(p.Gln2944Pro),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,2944
4,g.118504733G>A(p.Gln2944Pro),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,0
...,...,...,...
57,g.118505081C>T(p.Ser3057Phe),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,0
58,g.118505083A>G(p.Asn3061Ser),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,3061
59,g.118505084T>C(p.Asn3061Ser),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,0
60,g.118505087A>C(p.Asn3061Ser),MAHSCRWRFPARPGTTGGGGGGGRRGLGGAPRQRVPALLLPPGPPV...,0


Unnamed: 0,name,TF_seq,prot_pos


Unnamed: 0,name,TF_seq,prot_pos,AD_seq


3970