In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
# Folders:
folder_datafiles = '/mbshome/nvelthuijs/Cofactors/20200402_Transcriptionregulationlist/Datafiles/'
folder_corrected_names = '/mbshome/nvelthuijs/Cofactors/20200402_Transcriptionregulationlist/Datafiles_corrected_names/'
folder_output = '/mbshome/nvelthuijs/Cofactors/20200402_Transcriptionregulationlist/Output_files/'

# Files:
biogrid_file = folder_corrected_names + 'biogrid_corrected_names.txt'
tf_file = folder_datafiles + 'tfs.txt'
tf_smallset_file = folder_datafiles + 'tfs_smallset.txt'
intact_file = folder_datafiles + 'intact.txt'

# hugo_uniprot_file = '/mbshome/nvelthuijs/Cofactors/List2/Datafiles/HUGO_to_uniprot.txt'
hugo_translate_file = folder_datafiles + 'hugo_ensg_uniprot_approved.txt'

In [3]:
# Create set of tfs:
with open(tf_file) as tf_txt:
    tfs = set([tf.strip() for tf in tf_txt])

# Read biogrid into pandas dataframe:
with open(biogrid_file) as biogrid_full:
    biogrid = pd.read_table(biogrid_full,
                            usecols = ['#BioGRID Interaction ID',
                                       'Official Symbol Interactor A',
                                      'Official Symbol Interactor B',
                                      'Publication Source',
                                      'Organism Interactor A',
                                      'Organism Interactor B'],
                            dtype = str)
biogrid.columns = ['Int_ID', 'A', 'B', 'PMID', 'Org_A', 'Org_B']

# Filter out only human-human interactions,
# at least one of which is in the set of TFs.
biogrid = biogrid.loc[((biogrid['Org_A'] == '9606') & (biogrid['Org_B'] == '9606'))
                      & (biogrid['A'].isin(tfs) | biogrid['B'].isin(tfs))].drop(columns = ['Org_A', 'Org_B'])

# Create new dataframe with only columns A, B,
# interaction ID and PMID. For each row protein
# A and protein B are sorted alphabetticaly.
new_rows = []
for index, row in biogrid.iterrows():
    a = biogrid.loc[index, 'A']
    b = biogrid.loc[index, 'B']
    int_id = biogrid.loc[index, 'Int_ID']
    pmid = biogrid.loc[index, 'PMID']
    if a > b:
        new_rows.append([b, a, int_id, pmid])
    else:
        new_rows.append([a, b, int_id, pmid])
biogrid = pd.DataFrame(new_rows, columns = ['A', 'B', 'Int_ID', 'PMID'])#.reset_index(drop = True) #Do we need the reset_index?

# Create seperate dataframes depending on whether
# A or B is a TF. Group duplicate entries and count
# the number of PMIDs for each interaction, then
# only keep the ones with at least 2 references.
biogrid_a_tf = biogrid.loc[biogrid['A'].isin(tfs)].copy()
biogrid_a_tf_copy = biogrid_a_tf.groupby(['A', 'B']).nunique()
biogrid_a_tf_copy = biogrid_a_tf_copy.loc[biogrid_a_tf_copy['PMID'] > 1].copy()
biogrid_b_tf = biogrid.loc[biogrid['B'].isin(tfs)].copy()
biogrid_b_tf_copy = biogrid_b_tf.groupby(['A', 'B']).nunique()
biogrid_b_tf_copy = biogrid_b_tf_copy.loc[biogrid_b_tf_copy['PMID'] > 1].copy()

# Write interaction pairs to lists.
tf_int_list_a = [[index[0], index[1]] for index, row in biogrid_a_tf_copy.iterrows()]
tf_int_list_b = [[index[0], index[1]] for index, row in biogrid_b_tf_copy.iterrows()]

# Create interaction dictionary where each interactor is a key
# and the value is a list of all tfs it interacts with. Each TF
# is itself a list, containing the TF, the PMIDs and interaction IDs.
int_dict = {}
for i in tf_int_list_a:
    tf = i[0]
    interactor = i[1]
    df = biogrid_a_tf.loc[(biogrid_a_tf['A'] == tf) & (biogrid_a_tf['B'] == interactor)]
    pmids = [str(x) for x in set(df['PMID'])]
    newlist = [tf, ', '.join(pmids), set(df['Int_ID'])]
    if not interactor in int_dict.keys():
        int_dict[interactor] = [newlist]
    else:
        int_dict[interactor].append(newlist)

for i in tf_int_list_b:
    tf = i[1]
    interactor = i[0]
    df = biogrid_b_tf.loc[(biogrid_b_tf['B'] == tf) & (biogrid_b_tf['A'] == interactor)]
    pmids = [str(x) for x in set(df['PMID'])]
    newlist = [tf, ', '.join(pmids), set(df['Int_ID'])]
    if not interactor in int_dict.keys():
        int_dict[interactor] = [newlist]
    else:
        int_dict[interactor].append(newlist)

# Create a dataframe where each row is consists of
# interactor, tf, pmids and interaction IDs.
constructlist = []
for interactor, interactions in int_dict.items():
    for interaction in interactions:
        tf, pmids, int_ids = (interaction[i] for i in range(len(interaction)))
        constructlist.append([interactor, tf, pmids, int_ids])

interactors_biogrid = pd.DataFrame(constructlist, columns = ['Interactor', 'TF', 'PMIDS', 'Interaction IDs'])#.set_index('Interactor')
interactors_biogrid = interactors_biogrid.drop(columns = 'Interaction IDs').sort_values(by = ['Interactor', 'TF'])

# Create dataframe where each row has an interactor in the first
# collumn and all interacting tfs in the second:
biogrid_interactors = set(interactors_biogrid['Interactor'])
newrows = []
for interactor in biogrid_interactors:
    df = interactors_biogrid.loc[interactors_biogrid['Interactor'] == interactor]
    interacting_tfs = list(df['TF'])
    newrow = [interactor, ', '.join(interacting_tfs)]
    newrows.append(newrow)
interactors_biogrid_simple = pd.DataFrame(newrows, columns = ['Interactor', 'TFs'])
interactors_biogrid_simple = interactors_biogrid_simple.sort_values(by = ['Interactor', 'TFs'])

with open(tf_smallset_file) as tfs_small:
    tfs_small_set = set([x.strip() for x in tfs_small])
    new_col = []
    for index, row in interactors_biogrid_simple.iterrows():
        tfs_current = row['TFs'].split(', ')
        
        if any(x in tfs_small_set for x in tfs_current):
            new_col.append(1)
        else:
            new_col.append(0)
    interactors_biogrid_simple['Interaction with small set of TFs'] = new_col
    
# Write both dataframes to a .csv file:
interactors_biogrid.to_csv(folder_output + '02_tf_interactors_biogrid.csv', index = False)
interactors_biogrid_simple.to_csv(folder_output + '03_tf_interactors_biogrid_simple.csv', index = False)

print(len(biogrid_interactors))
print(interactors_biogrid.head(20))
print('\n')
print(interactors_biogrid_simple.head(20))
interactors_biogrid_simple

2260
     Interactor       TF                                              PMIDS
3932      AANAT  BHLHE40                   PUBMED:25416956, PUBMED:25910212
3933       AAR2      MYC                   PUBMED:30415952, PUBMED:29467282
3934       AATF      MYC                   PUBMED:30415952, PUBMED:29467282
3935      ABCA1    NR1H2                   PUBMED:25838426, PUBMED:20951680
3936       ABI1     LHX4                   PUBMED:28514442, PUBMED:26186194
3937       ABI1   ZNF511                   PUBMED:28514442, PUBMED:26186194
3938       ABI2     LHX4                   PUBMED:28514442, PUBMED:26186194
3939       ABI2   ZNF511                   PUBMED:28514442, PUBMED:26186194
3940       ABL1      JUN                   PUBMED:18619508, PUBMED:19818398
3941       ABL1     TP73  PUBMED:19442657, PUBMED:25893286, PUBMED:10391...
3942   ABRAXAS2   THAP11                   PUBMED:28514442, PUBMED:26186194
3943   ABRAXAS2     TP53  PUBMED:28514442, PUBMED:26186194, PUBMED:25283148
3944   

Unnamed: 0,Interactor,TFs,Interaction with small set of TFs
1793,AANAT,BHLHE40,1
155,AAR2,MYC,1
2171,AATF,MYC,1
1010,ABCA1,NR1H2,1
95,ABI1,"LHX4, ZNF511",1
1007,ABI2,"LHX4, ZNF511",1
2027,ABL1,"JUN, TP73",1
1684,ABRAXAS2,"THAP11, TP53",1
1019,ACOT1,TFAP2A,1
1385,ACOX1,FOXS1,1


In [4]:
# Create dictionary for translating
# UniProtIDs to gene names:
hugo_uniprot_dict = {}
with open(hugo_translate_file) as hugo_translate:
    next(hugo_translate)
    for line in hugo_translate:
        line = line.strip().split('\t')
        if len(line) >= 3:
            gene_name = line[0]
            uniprot_ids = line[2]
            if not ',' in uniprot_ids:
                hugo_uniprot_dict[uniprot_ids] = gene_name
            else:
                uniprot_ids = uniprot_ids.split(', ')
                for single_id in uniprot_ids:
                    hugo_uniprot_dict[single_id] = gene_name
hugo_uniprot_dict.pop('')
                    
for item in hugo_uniprot_dict.items():
    print(item)

('P04217', 'A1BG')
('Q9NQ94', 'A1CF')
('P01023', 'A2M')
('A8K2U0', 'A2ML1')
('U3KPV4', 'A3GALT2')
('Q9NPC4', 'A4GALT')
('Q9UNA3', 'A4GNT')
('Q9NRG9', 'AAAS')
('Q86V21', 'AACS')
('P22760', 'AADAC')
('Q6P093', 'AADACL2')
('Q5VUY0', 'AADACL3')
('Q5VUY2', 'AADACL4')
('Q8N5Z0', 'AADAT')
('Q6PD74', 'AAGAB')
('Q2M2I8', 'AAK1')
('Q9H7C9', 'AAMDC')
('Q13685', 'AAMP')
('Q16613', 'AANAT')
('Q9Y312', 'AAR2')
('Q4LEZ3', 'AARD')
('P49588', 'AARS1')
('Q5JTZ9', 'AARS2')
('Q9BTE6', 'PTGES3L-AARSD1')
('Q4L235', 'AASDH')
('Q9NRN7', 'AASDHPPT')
('Q9UDR5', 'AASS')
('Q9NY61', 'AATF')
('Q6ZMQ8', 'AATK')
('P80404', 'ABAT')
('O95477', 'ABCA1')
('Q9BZC7', 'ABCA2')
('Q99758', 'ABCA3')
('P78363', 'ABCA4')
('Q8WWZ7', 'ABCA5')
('Q8N139', 'ABCA6')
('Q8IZY2', 'ABCA7')
('O94911', 'ABCA8')
('Q8IUA7', 'ABCA9')
('Q8WWZ4', 'ABCA10')
('Q4W5N1', 'ABCA11P')
('Q86UK0', 'ABCA12')
('Q86UQ4', 'ABCA13')
('P08183', 'ABCB1')
('P21439', 'ABCB4')
('Q2M3G0', 'ABCB5')
('Q9NP58', 'ABCB6')
('O75027', 'ABCB7')
('Q9NUT2', 'ABCB8')
('Q9NP78

('Q9NRL2', 'BAZ1A')
('Q9UIG0', 'BAZ1B')
('Q9UIF9', 'BAZ2A')
('Q9UIF8', 'BAZ2B')
('Q96PG8', 'BBC3')
('Q9BXH1', 'BBC3')
('A8MTZ0', 'BBIP1')
('Q8ND07', 'BBOF1')
('O75936', 'BBOX1')
('Q8NFJ9', 'BBS1')
('Q9BXC9', 'BBS2')
('Q96RK4', 'BBS4')
('Q8N3I7', 'BBS5')
('Q8IWZ6', 'BBS7')
('Q3SYG4', 'BBS9')
('Q8TAM1', 'BBS10')
('Q6ZW61', 'BBS12')
('Q8WY36', 'BBX')
('P50895', 'BCAM')
('Q96GW7', 'BCAN')
('Q9UHQ4', 'BCAP29')
('P51572', 'BCAP31')
('P56945', 'BCAR1')
('O75815', 'BCAR3')
('O75363', 'BCAS1')
('O75934', 'BCAS2')
('Q9H6U6', 'BCAS3')
('Q8TDM0', 'BCAS4')
('P54687', 'BCAT1')
('O15382', 'BCAT2')
('Q9P287', 'BCCIP')
('Q7Z5W3', 'BCDIN3D')
('P06276', 'BCHE')
('P12694', 'BCKDHA')
('P21953', 'BCKDHB')
('O14874', 'BCKDK')
('P10415', 'BCL2')
('Q16548', 'BCL2A1')
('Q07817', 'BCL2L1')
('Q92843', 'BCL2L2-PABPN1')
('Q9HD36', 'BCL2L10')
('O43521', 'BCL2L11')
('Q9HB09', 'BCL2L12')
('Q9BXK5', 'BCL2L13')
('Q9BZR8', 'BCL2L14')
('Q5TBC7', 'BCL2L15')
('P20749', 'BCL3')
('P41182', 'BCL6')
('Q8N143', 'BCL6B')
('Q4VC05

('A2IDD5', 'CCDC78')
('Q76M96', 'CCDC80')
('Q6ZN84', 'CCDC81')
('Q8N4S0', 'CCDC82')
('Q8IWF9', 'CCDC83')
('Q86UT8', 'CCDC84')
('Q96PX6', 'CCDC85A')
('Q15834', 'CCDC85B')
('A6NKD9', 'CCDC85C')
('Q9H6F5', 'CCDC86')
('Q9NVE4', 'CCDC87')
('Q3V6T2', 'CCDC88A')
('A6NC98', 'CCDC88B')
('Q9P219', 'CCDC88C')
('Q8N998', 'CCDC89')
('Q9GZT6', 'CCDC90B')
('Q7Z6B0', 'CCDC91')
('Q53HC0', 'CCDC92')
('Q567U6', 'CCDC93')
('Q2M329', 'CCDC96')
('Q96F63', 'CCDC97')
('Q96A19', 'CCDC102A')
('Q68D86', 'CCDC102B')
('Q8IW40', 'CCDC103')
('Q8IYK2', 'CCDC105')
('Q9BWC9', 'CCDC106')
('Q8WV48', 'CCDC107')
('Q8TBZ0', 'CCDC110')
('Q8NEF3', 'CCDC112')
('Q9H0I3', 'CCDC113')
('Q96M63', 'CCDC114')
('Q96NT0', 'CCDC115')
('Q8IYX3', 'CCDC116')
('Q8IWD4', 'CCDC117')
('Q96HB5', 'CCDC120')
('Q6ZUS5', 'CCDC121')
('Q5T0U0', 'CCDC122')
('Q96CT7', 'CCDC124')
('Q86Z20', 'CCDC125')
('Q96EE4', 'CCDC126')
('Q96BQ5', 'CCDC127')
('P13994', 'CCDC130')
('Q9H6E4', 'CCDC134')
('Q96JN2', 'CCDC136')
('Q6PK04', 'CCDC137')
('Q96M89', 'CCDC138')


('P15086', 'CPB1')
('Q96IY4', 'CPB2')
('O75976', 'CPD')
('P16870', 'CPE')
('Q9BZB8', 'CPEB1')
('Q7Z5Q1', 'CPEB2')
('Q8NE35', 'CPEB3')
('Q17RY0', 'CPEB4')
('A4D0V7', 'CPED1')
('A0A1W2PPM1', 'CPHXL')
('Q9H799', 'CPLANE1')
('Q9BU20', 'CPLANE2')
('O14810', 'CPLX1')
('Q6PUV4', 'CPLX2')
('Q8WVH0', 'CPLX3')
('Q7Z7G2', 'CPLX4')
('P14384', 'CPM')
('P15169', 'CPN1')
('P22792', 'CPN2')
('Q99829', 'CPNE1')
('Q96FN4', 'CPNE2')
('O75131', 'CPNE3')
('Q96A23', 'CPNE4')
('Q9HCH3', 'CPNE5')
('O95741', 'CPNE6')
('Q9UBL6', 'CPNE7')
('Q86YQ8', 'CPNE8')
('Q8IYJ1', 'CPNE9')
('Q8IVL8', 'CPO')
('P36551', 'CPOX')
('Q9BRF8', 'CPPED1')
('Q9Y646', 'CPQ')
('P31327', 'CPS1')
('Q10570', 'CPSF1')
('Q9P2I0', 'CPSF2')
('Q9UKF6', 'CPSF3')
('O95639', 'CPSF4')
('A6NMK7', 'CPSF4L')
('Q16630', 'CPSF6')
('Q8N684', 'CPSF7')
('P50416', 'CPT1A')
('Q92523', 'CPT1B')
('Q8TCG5', 'CPT1C')
('P23786', 'CPT2')
('Q5TA50', 'CPTP')
('Q9H3G5', 'CPVL')
('Q8N123', 'CPXCR1')
('Q96SM3', 'CPXM1')
('Q8N436', 'CPXM2')
('Q66K79', 'CPZ')
('P17927',

('Q96MC2', 'DRC1')
('Q9H069', 'DRC3')
('Q8IY82', 'DRC7')
('P21728', 'DRD1')
('P14416', 'DRD2')
('P35462', 'DRD3')
('P21917', 'DRD4')
('P21918', 'DRD5')
('Q9Y295', 'DRG1')
('P55039', 'DRG2')
('A6NNA5', 'DRGX')
('Q6PGQ1', 'DRICH1')
('Q9NRR4', 'DROSHA')
('Q13474', 'DRP2')
('Q08554', 'DSC1')
('Q02487', 'DSC2')
('Q14574', 'DSC3')
('O60469', 'DSCAM')
('Q8TD84', 'DSCAML1')
('Q9BVC3', 'DSCC1')
('P56555', 'DSCR4')
('Q96T75', 'DSCR8')
('P59020', 'DSCR9')
('P59022', 'DSCR10')
('Q9UL01', 'DSE')
('Q8IZU8', 'DSEL')
('Q02413', 'DSG1')
('Q14126', 'DSG2')
('P32926', 'DSG3')
('Q86SJ6', 'DSG4')
('Q9H410', 'DSN1')
('P15924', 'DSP')
('Q9NZW4', 'DSPP')
('Q03001', 'DST')
('P60981', 'DSTN')
('Q6XUX3', 'DSTYK')
('Q8TEA8', 'DTD1')
('Q96FN9', 'DTD2')
('Q6ZMT9', 'DTHD1')
('Q9NZJ0', 'DTL')
('Q9Y4J8', 'DTNA')
('O60941', 'DTNB')
('Q96EV8', 'DTNBP1')
('Q8N5C7', 'DTWD1')
('Q8NBA8', 'DTWD2')
('Q86Y01', 'DTX1')
('Q86UW9', 'DTX2')
('Q8N9I9', 'DTX3')
('Q8TDB6', 'DTX3L')
('Q9Y2E6', 'DTX4')
('P23919', 'DTYMK')
('Q9NRD9', 'D

('P26885', 'FKBP2')
('Q00688', 'FKBP3')
('Q02790', 'FKBP4')
('Q13451', 'FKBP5')
('O75344', 'FKBP6')
('Q9Y680', 'FKBP7')
('Q14318', 'FKBP8')
('O95302', 'FKBP9')
('Q75LS8', 'FKBP9P1')
('Q96AY3', 'FKBP10')
('Q9NYL4', 'FKBP11')
('Q9NWM8', 'FKBP14')
('Q5T1M5', 'FKBP15')
('Q9UIM3', 'FKBPL')
('Q9H9S5', 'FKRP')
('O75072', 'FKTN')
('Q96Q35', 'FLACC1')
('Q8NFF5', 'FLAD1')
('Q8NFG4', 'FLCN')
('P20930', 'FLG')
('Q5D862', 'FLG2')
('Q01543', 'FLI1')
('Q13045', 'FLII')
('P21333', 'FLNA')
('O75369', 'FLNB')
('Q14315', 'FLNC')
('O75955', 'FLOT1')
('Q14254', 'FLOT2')
('Q9NZU1', 'FLRT1')
('O43155', 'FLRT2')
('Q9NZU0', 'FLRT3')
('P17948', 'FLT1')
('P36888', 'FLT3')
('P49771', 'FLT3LG')
('P35916', 'FLT4')
('Q9Y5Y0', 'FLVCR1')
('Q8TAF5', 'FLVCR1-DT')
('Q9UPI3', 'FLVCR2')
('Q4VC44', 'FLYWCH1')
('Q96CP2', 'FLYWCH2')
('Q96HJ9', 'FMC1')
('Q68DA7', 'FMN1')
('Q9NZ56', 'FMN2')
('O95466', 'FMNL1')
('Q96PY5', 'FMNL2')
('Q8IVF7', 'FMNL3')
('Q01740', 'FMO1')
('Q99518', 'FMO2')
('P31513', 'FMO3')
('P31512', 'FMO4')
('P

('Q86UP8', 'GTF2IRD2')
('Q6EKJ0', 'GTF2IRD2B')
('Q92664', 'GTF3A')
('Q12789', 'GTF3C1')
('Q8WUA4', 'GTF3C2')
('Q9Y5Q9', 'GTF3C3')
('Q9UKN8', 'GTF3C4')
('Q9Y5Q8', 'GTF3C5')
('Q969F1', 'GTF3C6')
('O00178', 'GTPBP1')
('Q9BX10', 'GTPBP2')
('Q969Y2', 'GTPBP3')
('Q9BZE4', 'GTPBP4')
('O43824', 'GTPBP6')
('Q8N3Z3', 'GTPBP8')
('A4D1E9', 'GTPBP10')
('Q86UQ5', 'GTSCR1')
('Q9NYZ3', 'GTSE1')
('Q8WW33', 'GTSF1')
('Q9H1H1', 'GTSF1L')
('P43080', 'GUCA1A')
('Q9UMX6', 'GUCA1B')
('O95843', 'GUCA1C')
('Q02747', 'GUCA2A')
('Q16661', 'GUCA2B')
('Q96NT3', 'GUCD1')
('Q02108', 'GUCY1A1')
('P33402', 'GUCY1A2')
('Q02153', 'GUCY1B1')
('O75343', 'GUCY1B2')
('P25092', 'GUCY2C')
('Q02846', 'GUCY2D')
('P51841', 'GUCY2F')
('Q8N442', 'GUF1')
('Q16774', 'GUK1')
('Q9UBP9', 'GULP1')
('P08236', 'GUSB')
('Q15486', 'GUSBP1')
('Q6P575', 'GUSBP11')
('Q7Z2Y8', 'GVINP1')
('Q3ZCU0', 'GVQW3')
('Q4G148', 'GXYLT1')
('A0PJZ3', 'GXYLT2')
('P46976', 'GYG1')
('O15488', 'GYG2')
('P02724', 'GYPA')
('P06028', 'GYPB')
('P04921', 'GYPC')
('P

('Q15842', 'KCNJ8')
('Q92806', 'KCNJ9')
('P78508', 'KCNJ10')
('Q14654', 'KCNJ11')
('Q14500', 'KCNJ12')
('O60928', 'KCNJ13')
('Q9UNX9', 'KCNJ14')
('Q99712', 'KCNJ15')
('Q9NPI9', 'KCNJ16')
('B7U540', 'KCNJ18')
('O00180', 'KCNK1')
('O95069', 'KCNK2')
('O14649', 'KCNK3')
('Q9NYG8', 'KCNK4')
('O95279', 'KCNK5')
('Q9Y257', 'KCNK6')
('Q9Y2U2', 'KCNK7')
('Q9NPC2', 'KCNK9')
('P57789', 'KCNK10')
('Q9HB15', 'KCNK12')
('Q9HB14', 'KCNK13')
('Q9H427', 'KCNK15')
('Q96T55', 'KCNK16')
('Q96T54', 'KCNK17')
('Q7Z418', 'KCNK18')
('Q12791', 'KCNMA1')
('Q16558', 'KCNMB1')
('Q9Y691', 'KCNMB2')
('Q9NPA1', 'KCNMB3')
('Q86W47', 'KCNMB4')
('Q92952', 'KCNN1')
('Q9H2S1', 'KCNN2')
('Q9UGI6', 'KCNN3')
('O15554', 'KCNN4')
('P51787', 'KCNQ1')
('Q9H478', 'KCNQ1DN')
('O43526', 'KCNQ2')
('O43525', 'KCNQ3')
('P56696', 'KCNQ4')
('Q9NR82', 'KCNQ5')
('Q8N5I3', 'KCNRG')
('Q96KK3', 'KCNS1')
('Q9ULS6', 'KCNS2')
('Q9BQ31', 'KCNS3')
('Q5JUK3', 'KCNT1')
('Q6UVM3', 'KCNT2')
('A8MYU2', 'KCNU1')
('Q6PIU1', 'KCNV1')
('Q8TDN2', 'KCNV2'

('Q86X40', 'LRRC28')
('Q8WV35', 'LRRC29')
('A6NM36', 'LRRC30')
('Q6UY01', 'LRRC31')
('Q14392', 'LRRC32')
('Q8IZ02', 'LRRC34')
('Q1X8D7', 'LRRC36')
('A6NMS7', 'LRRC37A')
('A6NM11', 'LRRC37A2')
('O60309', 'LRRC37A3')
('Q49AS3', 'LRRC37A5P')
('Q96QE4', 'LRRC37B')
('Q5VT99', 'LRRC38')
('Q96DD0', 'LRRC39')
('Q9H9A6', 'LRRC40')
('Q15345', 'LRRC41')
('Q9Y546', 'LRRC42')
('Q8N309', 'LRRC43')
('Q96CN5', 'LRRC45')
('Q96FV0', 'LRRC46')
('Q8N1G4', 'LRRC47')
('Q8IUZ0', 'LRRC49')
('Q8N7C0', 'LRRC52')
('A6NM62', 'LRRC53')
('Q6ZSA7', 'LRRC55')
('Q8IYG6', 'LRRC56')
('Q8N9N7', 'LRRC57')
('Q96CX6', 'LRRC58')
('Q96AG4', 'LRRC59')
('Q9BV99', 'LRRC61')
('Q05C16', 'LRRC63')
('Q68CR7', 'LRRC66')
('Q6ZNQ3', 'LRRC69')
('Q7Z2Q7', 'LRRC70')
('Q8N4P6', 'LRRC71')
('A6NJI9', 'LRRC72')
('Q5JTD7', 'LRRC73')
('Q0VAA2', 'LRRC74A')
('Q6ZQY2', 'LRRC74B')
('Q8NAA5', 'LRRC75A')
('Q2VPJ9', 'LRRC75B')
('Q9C099', 'LRRCC1')
('A4D1F6', 'LRRD1')
('Q32MZ4', 'LRRFIP1')
('Q9Y608', 'LRRFIP2')
('Q96JM4', 'LRRIQ1')
('A6PVS8', 'LRRIQ3')

('O95182', 'NDUFA7')
('P51970', 'NDUFA8')
('Q16795', 'NDUFA9')
('O95299', 'NDUFA10')
('Q86Y39', 'NDUFA11')
('Q9UI09', 'NDUFA12')
('Q9P0J0', 'NDUFA13')
('O14561', 'NDUFAB1')
('Q9Y375', 'NDUFAF1')
('Q8N183', 'NDUFAF2')
('Q9BU61', 'NDUFAF3')
('Q9P032', 'NDUFAF4')
('Q5TEU4', 'NDUFAF5')
('Q330K2', 'NDUFAF6')
('Q7L592', 'NDUFAF7')
('A1L188', 'NDUFAF8')
('O75438', 'NDUFB1')
('O95178', 'NDUFB2')
('O43676', 'NDUFB3')
('O95168', 'NDUFB4')
('O43674', 'NDUFB5')
('O95139', 'NDUFB6')
('P17568', 'NDUFB7')
('O95169', 'NDUFB8')
('Q9Y6M9', 'NDUFB9')
('O96000', 'NDUFB10')
('Q9NX14', 'NDUFB11')
('O43677', 'NDUFC1')
('O95298', 'NDUFC2')
('E9PQ53', 'NDUFC2-KCTD14')
('P28331', 'NDUFS1')
('O75306', 'NDUFS2')
('O75489', 'NDUFS3')
('O43181', 'NDUFS4')
('O43920', 'NDUFS5')
('O75380', 'NDUFS6')
('O75251', 'NDUFS7')
('O00217', 'NDUFS8')
('P49821', 'NDUFV1')
('P19404', 'NDUFV2')
('P56181', 'NDUFV3')
('P20929', 'NEB')
('O76041', 'NEBL')
('Q8N987', 'NECAB1')
('Q7Z6G3', 'NECAB2')
('Q96P71', 'NECAB3')
('Q8NC96', 'NECAP

('Q9Y5F6', 'PCDHGC5')
('Q9H1Q7', 'PCED1A')
('Q96HM7', 'PCED1B')
('O94913', 'PCF11')
('Q9BSM1', 'PCGF1')
('P35227', 'PCGF2')
('Q3KNV8', 'PCGF3')
('Q86SE9', 'PCGF5')
('Q9BYE7', 'PCGF6')
('Q5JVF3', 'PCID2')
('Q9H4Z3', 'PCIF1')
('P35558', 'PCK1')
('Q16822', 'PCK2')
('Q15004', 'PCLAF')
('Q9Y6V0', 'PCLO')
('Q15154', 'PCM1')
('P22061', 'PCMT1')
('Q96MG8', 'PCMTD1')
('Q9NV79', 'PCMTD2')
('P12004', 'PCNA')
('Q8WW12', 'PCNP')
('O95613', 'PCNT')
('Q96RV3', 'PCNX1')
('A6NKB5', 'PCNX2')
('Q9H6A9', 'PCNX3')
('Q63HM2', 'PCNX4')
('Q15113', 'PCOLCE')
('Q9UKZ9', 'PCOLCE2')
('Q58A44', 'PCOTH')
('Q8IVA1', 'PCP2')
('P48539', 'PCP4')
('A6NKN8', 'PCP4L1')
('P29120', 'PCSK1')
('Q9UHG2', 'PCSK1N')
('P16519', 'PCSK2')
('Q6UW60', 'PCSK4')
('Q92824', 'PCSK5')
('P29122', 'PCSK6')
('Q16549', 'PCSK7')
('Q8NBP7', 'PCSK9')
('Q9UKL6', 'PCTP')
('Q9UHG3', 'PCYOX1')
('Q8NBM8', 'PCYOX1L')
('P49585', 'PCYT1A')
('Q9Y5K3', 'PCYT1B')
('Q99447', 'PCYT2')
('Q13442', 'PDAP1')
('P20941', 'PDC')
('Q15116', 'PDCD1')
('Q9BQ51', 'PDCD

('Q99873', 'PRMT1')
('P55345', 'PRMT2')
('O60678', 'PRMT3')
('O14744', 'PRMT5')
('Q96LA8', 'PRMT6')
('Q9NVM4', 'PRMT7')
('Q9NR22', 'PRMT8')
('Q6P2P2', 'PRMT9')
('Q9UKY0', 'PRND')
('F7VJQ1', 'PRNP')
('P04156', 'PRNP')
('Q86SH4', 'PRNT')
('E7EW31', 'PROB1')
('P04070', 'PROC')
('Q8NCQ7', 'PROCA1')
('Q9UNN8', 'PROCR')
('O43272', 'PRODH')
('Q9UF12', 'PRODH2')
('P58294', 'PROK1')
('Q9HC23', 'PROK2')
('Q8TCW9', 'PROKR1')
('Q8NFJ6', 'PROKR2')
('O43490', 'PROM1')
('Q8N271', 'PROM2')
('O75360', 'PROP1')
('O15091', 'PRORP')
('A6NEY8', 'PRORSD1P')
('Q9H606', 'PRORY')
('P07225', 'PROS1')
('Q86XN7', 'PROSER1')
('Q86WR7', 'PROSER2')
('Q2NL68', 'PROSER3')
('Q92786', 'PROX1')
('Q3B8N5', 'PROX2')
('P22891', 'PROZ')
('O43395', 'PRPF3')
('O43172', 'PRPF4')
('Q13523', 'PRPF4B')
('O94906', 'PRPF6')
('Q6P2Q9', 'PRPF8')
('Q99633', 'PRPF18')
('Q9UMS4', 'PRPF19')
('Q8WWY3', 'PRPF31')
('Q8NAV1', 'PRPF38A')
('Q5VTL8', 'PRPF38B')
('Q86UA1', 'PRPF39')
('O75400', 'PRPF40A')
('Q6NWY9', 'PRPF40B')
('P41219', 'PRPH')
(

('Q14151', 'SAFB2')
('P10523', 'SAG')
('Q9NXZ1', 'SAGE1')
('A6NJ88', 'SAGE2P')
('Q9NSC2', 'SALL1')
('Q9Y467', 'SALL2')
('Q9BXA9', 'SALL3')
('Q9UJQ4', 'SALL4')
('Q6SPF0', 'SAMD1')
('Q8N6K7', 'SAMD3')
('Q9UPU9', 'SAMD4A')
('Q5PRF9', 'SAMD4B')
('Q5TGI4', 'SAMD5')
('Q7Z3H4', 'SAMD7')
('Q96LT4', 'SAMD8')
('Q5K651', 'SAMD9')
('Q8IVG5', 'SAMD9L')
('Q9BYL1', 'SAMD10')
('Q96NU1', 'SAMD11')
('Q8N8I0', 'SAMD12')
('Q5VXD3', 'SAMD13')
('Q8IZD0', 'SAMD14')
('Q9P1V8', 'SAMD15')
('Q9Y3Z3', 'SAMHD1')
('Q9Y512', 'SAMM50')
('Q9NSI8', 'SAMSN1')
('O00422', 'SAP18')
('Q8TEE9', 'SAP25')
('O75446', 'SAP30')
('Q9UHR5', 'SAP30BP')
('Q9HAJ7', 'SAP30L')
('Q9H0E3', 'SAP130')
('Q5SSQ6', 'SAPCD1')
('Q86UD0', 'SAPCD2')
('Q9NR31', 'SAR1A')
('Q9Y6B6', 'SAR1B')
('Q96BY9', 'SARAF')
('Q9UL12', 'SARDH')
('Q6SZW1', 'SARM1')
('P82979', 'SARNP')
('P49591', 'SARS1')
('Q9NP81', 'SARS2')
('O43290', 'SART1')
('Q15020', 'SART3')
('O94885', 'SASH1')
('O75995', 'SASH3')
('Q6UVJ0', 'SASS6')
('P21673', 'SAT1')
('Q96F10', 'SAT2')
('Q01

('Q2TAY7', 'SMU1')
('Q53HV7', 'SMUG1')
('Q9HCE7', 'SMURF1')
('Q9HAU4', 'SMURF2')
('Q8NB12', 'SMYD1')
('Q9NRG4', 'SMYD2')
('Q9H7B4', 'SMYD3')
('Q8IYR2', 'SMYD4')
('Q6GMV2', 'SMYD5')
('O95863', 'SNAI1')
('O43623', 'SNAI2')
('Q3KNW1', 'SNAI3')
('O00161', 'SNAP23')
('P60880', 'SNAP25')
('O95721', 'SNAP29')
('Q5SQN1', 'SNAP47')
('O60641', 'SNAP91')
('Q16533', 'SNAPC1')
('Q13487', 'SNAPC2')
('Q92966', 'SNAPC3')
('Q5SXM2', 'SNAPC4')
('O75971', 'SNAPC5')
('O95295', 'SNAPIN')
('P37840', 'SNCA')
('Q9Y6H5', 'SNCAIP')
('Q16143', 'SNCB')
('O76070', 'SNCG')
('Q7KZF4', 'SND1')
('Q9HBX3', 'SND1-IT1')
('Q8TER0', 'SNED1')
('Q96H20', 'SNF8')
('Q9BXW3', 'SNHG12')
('P0DPA3', 'SNHG28')
('Q8TAD8', 'SNIP1')
('O75324', 'SNN')
('Q6UX34', 'SNORC')
('O15079', 'SNPH')
('Q9NRH2', 'SNRK')
('Q9BV90', 'SNRNP25')
('Q8WVK2', 'SNRNP27')
('Q16560', 'SNRNP35')
('Q96DI7', 'SNRNP40')
('Q6IEG0', 'SNRNP48')
('P08621', 'SNRNP70')
('O75643', 'SNRNP200')
('P09012', 'SNRPA')
('P09661', 'SNRPA1')
('P14678', 'SNRPB')
('P08579', 'SNR

('Q9H1K6', 'TLNRD1')
('Q15399', 'TLR1')
('O60603', 'TLR2')
('O15455', 'TLR3')
('O00206', 'TLR4')
('O60602', 'TLR5')
('Q9Y2C9', 'TLR6')
('Q9NYK1', 'TLR7')
('Q9NR97', 'TLR8')
('Q9NR96', 'TLR9')
('Q9BXR5', 'TLR10')
('P31314', 'TLX1')
('P0CAT3', 'TLX1NB')
('O43763', 'TLX2')
('O43711', 'TLX3')
('Q9BX74', 'TM2D1')
('Q9BX73', 'TM2D2')
('Q9BRN9', 'TM2D3')
('P30408', 'TM4SF1')
('P48230', 'TM4SF4')
('O14894', 'TM4SF5')
('Q96CE8', 'TM4SF18')
('Q96DZ7', 'TM4SF19')
('Q53R12', 'TM4SF20')
('Q9BZW5', 'TM6SF1')
('Q9BZW4', 'TM6SF2')
('O76062', 'TM7SF2')
('Q9NS93', 'TM7SF3')
('O15321', 'TM9SF1')
('Q99805', 'TM9SF2')
('Q9HD45', 'TM9SF3')
('Q92544', 'TM9SF4')
('Q9Y2S6', 'TMA7')
('Q96EY4', 'TMA16')
('Q969X1', 'TMBIM1')
('Q9HC24', 'TMBIM4')
('P55061', 'TMBIM6')
('Q8TDI8', 'TMC1')
('Q8TDI7', 'TMC2')
('Q7Z5M5', 'TMC3')
('Q7Z404', 'TMC4')
('Q6UXY8', 'TMC5')
('Q7Z403', 'TMC6')
('Q7Z402', 'TMC7')
('Q8IU68', 'TMC8')
('O94876', 'TMCC1')
('O75069', 'TMCC2')
('Q9ULS5', 'TMCC3')
('Q9UM00', 'TMCO1')
('Q7Z6W1', 'TMCO2')

('Q6R6M4', 'USP17L2')
('A6NCW0', 'USP17L3')
('A6NCW7', 'USP17L4')
('A8MUK1', 'USP17L5')
('Q6QN14', 'USP17L6P')
('P0C7H9', 'USP17L7')
('P0C7I0', 'USP17L8')
('C9JJH3', 'USP17L10')
('C9JVI0', 'USP17L11')
('C9JPN9', 'USP17L12')
('C9JLJ4', 'USP17L13')
('C9J2P7', 'USP17L15')
('D6RBQ6', 'USP17L17')
('D6R9N7', 'USP17L18')
('D6RCP7', 'USP17L19')
('D6RJB6', 'USP17L20')
('D6R901', 'USP17L21')
('D6RA61', 'USP17L22')
('D6RBM5', 'USP17L23')
('Q0WX57', 'USP17L30')
('Q9UMW8', 'USP18')
('O94966', 'USP19')
('Q9Y2K6', 'USP20')
('Q9UK80', 'USP21')
('Q9UPT9', 'USP22')
('Q9UPU5', 'USP24')
('Q9UHP3', 'USP25')
('Q9BXU7', 'USP26')
('A6NNY8', 'USP27X')
('Q96RU2', 'USP28')
('Q9HBJ7', 'USP29')
('Q70CQ3', 'USP30')
('Q70CQ4', 'USP31')
('Q8NFA0', 'USP32')
('Q8TEY7', 'USP33')
('Q70CQ2', 'USP34')
('Q9P2H5', 'USP35')
('Q9P275', 'USP36')
('Q86T82', 'USP37')
('Q8NB14', 'USP38')
('Q53GS9', 'USP39')
('Q9NVE5', 'USP40')
('Q3LFD5', 'USP41')
('Q9H9J4', 'USP42')
('Q70EL4', 'USP43')
('Q9H0E7', 'USP44')
('Q70EL2', 'USP45')
('P62

('Q6ZNA1', 'ZNF836')
('Q96EG3', 'ZNF837')
('A8K0R7', 'ZNF839')
('A6NDX5', 'ZNF840P')
('Q6ZN19', 'ZNF841')
('Q8N446', 'ZNF843')
('Q08AG5', 'ZNF844')
('Q96IR2', 'ZNF845')
('Q147U1', 'ZNF846')
('A8MQ14', 'ZNF850')
('Q6ZMS4', 'ZNF852')
('P0CG23', 'ZNF853')
('A6NHJ4', 'ZNF860')
('O60384', 'ZNF861P')
('O60290', 'ZNF862')
('P0CJ78', 'ZNF865')
('P10072', 'ZNF875')
('Q49A33', 'ZNF876P')
('C9JN71', 'ZNF878')
('B4DU55', 'ZNF879')
('Q6PDB4', 'ZNF880')
('P0CG24', 'ZNF883')
('P0CJ79', 'ZNF888')
('A8MT65', 'ZNF891')
('Q9P2E3', 'ZNFX1')
('O43257', 'ZNHIT1')
('Q9UHR6', 'ZNHIT2')
('Q15649', 'ZNHIT3')
('Q9NWK9', 'ZNHIT6')
('O60232', 'ZNRD2')
('Q8ND25', 'ZNRF1')
('Q8NHG8', 'ZNRF2')
('Q9ULT6', 'ZNRF3')
('Q8WWF5', 'ZNRF4')
('P60852', 'ZP1')
('Q05996', 'ZP2')
('P21754', 'ZP3')
('Q12836', 'ZP4')
('Q9BS86', 'ZPBP')
('Q6X784', 'ZPBP2')
('Q8TCW7', 'ZPLD1')
('O75312', 'ZPR1')
('Q9UGI0', 'ZRANB1')
('O95218', 'ZRANB2')
('Q5FWF4', 'ZRANB3')
('Q15696', 'ZRSR2')
('Q15695', 'ZRSR2P1')
('Q8NBB4', 'ZSCAN1')
('Q7Z7L9', 'Z

In [5]:
# Write usefull columns of intact
# file to dataframe:
with open(intact_file) as intact:
    intact_df = pd.read_table(intact,
                            usecols = ['#ID(s) interactor A',
                                       'ID(s) interactor B',
                                       'Alias(es) interactor A',
                                       'Alias(es) interactor B',
                                      'Publication Identifier(s)',
                                      'Confidence value(s)',
                                      'Taxid interactor A',
                                      'Taxid interactor B',
                                      'Type(s) interactor A',
                                      'Type(s) interactor B'],
                            dtype = str)
intact_df.columns = ['A_uni', 'B_uni', 'A', 'B', 'Ref', 'Tax_A', 'Tax_B', 'Conf_val', 'Type_A', 'Type_B']

# Select interactions that are
# human-human, protein-protein
# and have a confidence value > 0.4.
corr_taxon = 'taxid:9606(human)|taxid:9606(Homo sapiens)'
intact_df = intact_df.loc[(intact_df['Tax_A'] == corr_taxon) & (intact_df['Tax_B'] == corr_taxon)].drop_duplicates()

corr_type = 'psi-mi:"MI:0326"(protein)'
intact_df = intact_df.loc[(intact_df['Type_A'] == corr_type) & (intact_df['Type_B'] == corr_type)].drop_duplicates()

intact_df['Conf_val'] = intact_df['Conf_val'].map(lambda x: float(x[-4:]))
intact_df = intact_df.loc[intact_df['Conf_val'] > 0.40]

# 
def uniprot_hugo(some_str):
    method_obj = re.search(r'[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}', some_str)
    if method_obj == None:
        return np.NaN
    else:
        uni_id = method_obj.group(0)
        if uni_id in hugo_uniprot_dict.keys():
            return hugo_uniprot_dict[uni_id]
        else:
            return np.NaN

def filter_hugo(some_str):
    list_str = some_str.split('|')
    for index, element in enumerate(list_str):
        if '(gene name)' in element:
            gene_name = element[10:-11]
            break
        elif index + 1 == len(list_str):
            gene_name = np.NaN
    return gene_name
        
intact_df[['A', 'B']] = intact_df[['A', 'B']].applymap(filter_hugo)
intact_df[['A_uni', 'B_uni']] = intact_df[['A_uni', 'B_uni']].applymap(uniprot_hugo)

intact_df['A_'] = [x if (type(x) == str) else y if isinstance(y, str) else 'No gene name' for x, y in zip(list(intact_df['A_uni']), list(intact_df['A']))]
intact_df['B_'] = [x if (type(x) == str) else y if isinstance(y, str) else 'No gene name' for x, y in zip(list(intact_df['B_uni']), list(intact_df['B']))]

intact_df = intact_df.loc[:, ['A_', 'B_', 'Ref', 'Conf_val']].rename(columns = {'A_':'A', 'B_':'B'})

def filter_pmid(some_str):
    method_obj = re.search(r'(?<=pubmed:)[0-9]+', some_str)
    if method_obj == None:
        return np.NaN
    else:
        return method_obj.group(0)

intact_df['PMID'] = intact_df['Ref'].map(filter_pmid)
intact_df.drop(columns = 'Ref', inplace = True)

new_rows = []
for index, row in intact_df.iterrows():
    a = row['A']
    b = row['B']
    if a > b:
        row['A'] = b
        row['B'] = a
    new_rows.append(row)
intact_df = pd.DataFrame(new_rows)

intact_a_tf = intact_df.loc[intact_df['A'].isin(tfs)].copy()
intact_a_tf_copy = intact_a_tf.groupby(['A', 'B']).nunique()

intact_b_tf = intact_df.loc[intact_df['B'].isin(tfs)].copy()
intact_b_tf_copy = intact_b_tf.groupby(['A', 'B']).nunique()

tf_int_list_a = [[index[0], index[1]] for index, row in intact_a_tf_copy.iterrows()]
tf_int_list_b = [[index[0], index[1]] for index, row in intact_b_tf_copy.iterrows()]

int_dict = {}
for i in tf_int_list_a:
    tf = i[0]
    interactor = i[1]
    df = intact_a_tf.loc[(intact_a_tf['A'] == tf) & (intact_a_tf['B'] == interactor)]
    pmids = [str(x) for x in set(df['PMID']) if str(x) != 'nan']
    newlist = [tf, ', '.join(pmids), set(df['Conf_val'])]
    if not interactor in int_dict.keys():
        int_dict[interactor] = [newlist]
    else:
        int_dict[interactor].append(newlist)

for i in tf_int_list_b:
    tf = i[1]
    interactor = i[0]
    df = intact_b_tf.loc[(intact_b_tf['B'] == tf) & (intact_b_tf['A'] == interactor)]
    pmids = [str(x) for x in set(df['PMID']) if str(x) != 'nan']
    newlist = [tf, ', '.join(pmids), set(df['Conf_val'])]
    if not interactor in int_dict.keys():
        int_dict[interactor] = [newlist]
    else:
        int_dict[interactor].append(newlist)
        
constructlist = []
for interactor, interactions in int_dict.items():
    for interaction in interactions:
        tf, pmids, conf_val = (interaction[i] for i in range(len(interaction)))
        constructlist.append([interactor, tf, pmids, list(conf_val)[0]])

interactors_intact = pd.DataFrame(constructlist, columns = ['Interactor', 'TF', 'PMIDS', 'Confidence Value'])#.set_index('Interactor')
interactors_intact = interactors_intact.sort_values(by = ['Interactor', 'TF'])

intact_interactors = set(interactors_intact['Interactor'])
newrows = []
for interactor in intact_interactors:
    df = interactors_intact.loc[interactors_intact['Interactor'] == interactor]
    interacting_tfs = list(df['TF'])
    newrow = [interactor, ', '.join(interacting_tfs)]
    newrows.append(newrow)

interactors_intact_simple = pd.DataFrame(newrows, columns = ['Interactor', 'TFs'])
interactors_intact_simple = interactors_intact_simple.sort_values(by = ['Interactor', 'TFs'])

with open(tf_smallset_file) as tfs_small:
    tfs_small_set = set([x.strip() for x in tfs_small])
    new_col = []
    for index, row in interactors_intact_simple.iterrows():
        tfs = row['TFs'].split(', ')
        
        if any(x in tfs_small_set for x in tfs):
            new_col.append(1)
        else:
            new_col.append(0)
    interactors_intact_simple['Interaction with small set of TFs'] = new_col
    
interactors_intact.to_csv(folder_output + '04_tf_interactors_intact.csv', index = False)
interactors_intact_simple.to_csv(folder_output + '05_tf_interactors_intact_simple.csv', index = False)

print(len(set(interactors_intact['Interactor'])))
print(interactors_intact.head())
print('\n')
interactors_intact_simple

4262
                     Interactor       TF               PMIDS  Confidence Value
10849  "WUGSC:H_DJ0726N20.gs.b"    MEOX2                                  0.56
10850                      A1CF      REL            25416956              0.56
10851                      AAMP  BHLHE40            25416956              0.56
10852                     AANAT  BHLHE40  25910212, 25416956              0.72
10853                  AASDHPPT      CRX                                  0.56




Unnamed: 0,Interactor,TFs,Interaction with small set of TFs
653,"""WUGSC:H_DJ0726N20.gs.b""",MEOX2,1
4150,A1CF,REL,1
3061,AAMP,BHLHE40,1
3462,AANAT,BHLHE40,1
172,AASDHPPT,CRX,1
4125,AATF,"RELA, SP1",1
1933,ABCA1,NR1H2,1
1840,ABCC2,NFKB1,1
2342,ABCF3,THAP1,1
3727,ABHD11,"PATZ1, PITX1, PROP1, VENTX, ZIC1",1
