In [2]:
import pandas as pd
import numpy as np
import requests
import sys
import regex as re

In [3]:
cofactors = pd.read_csv("data/cofactors_mapped_combined.csv")
cofactors_unique = cofactors[["Gene Name", "Complex", "Subcomplex or Module", "Own-complex paralog", "Other-complex Paralogues", "UniprotID", "Gene Names", "Bgee", "GeneID"]].groupby("Bgee").agg(list)
cofactors_unique.head()

Unnamed: 0_level_0,Gene Name,Complex,Subcomplex or Module,Own-complex paralog,Other-complex Paralogues,UniprotID,Gene Names,GeneID
Bgee,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ENSG00000005075,[POLR2J],[RNA Pol II],[nan],[nan],[nan],[P52435],[POLR2J POLR2J1],[5439.0]
ENSG00000005339,[CREBBP],[P300-CBP],[CBP],[nan],[nan],[Q92793],[CREBBP CBP],[1387.0]
ENSG00000006712,[PAF1],[Elongation],[nan],[nan],[nan],[Q8N7H5],[PAF1 PD2],[54623.0]
ENSG00000008838,[MED24],[Mediator],[[Tail]],[nan],[nan],[O75448],[MED24 ARC100 CRSP4 DRIP100 KIAA0130 THRAP4 TR...,[9862.0]
ENSG00000011007,[ELOA],[Elongation],[nan],[nan],[nan],[Q14241],[ELOA TCEB3 MSTP059],[6924.0]


In [4]:
def get_proteinatlas_specificity(ensg):

    try:
        server = "https://www.proteinatlas.org/api/search_download.php?search={0}&format=json&columns=g,eg,rnats,rnatd,rnatss,rnatsm,rnascs,rnascd,rnascss,rnascsm,rnasnbs,rnasnbd,rnasnbss,rnasnbsm,t_RNA__tau,blood_RNA__tau,brain_RNA__tau,sc_RNA__tau,Brain_sn_RNA__tau&compress=no".format(ensg)

        r = requests.get(server, headers={ "Content-Type" : "application/json"})
        if not r.ok:
            r.raise_for_status()
            sys.exit()
        

        decoded = r.json()
        return decoded
    
    except Exception as e:
        print(e, ensg)
        return np.NaN


In [5]:
labels = """
Single Cell Type RNA - Adipocytes [nTPM]	sc_RNA_Adipocytes
Single Cell Type RNA - Alveolar cells type 1 [nTPM]	sc_RNA_Alveolar_cells_type_1
Single Cell Type RNA - Alveolar cells type 2 [nTPM]	sc_RNA_Alveolar_cells_type_2
Single Cell Type RNA - Astrocytes [nTPM]	sc_RNA_Astrocytes
Single Cell Type RNA - B-cells [nTPM]	sc_RNA_B-cells
Single Cell Type RNA - Basal keratinocytes [nTPM]	sc_RNA_Basal_keratinocytes
Single Cell Type RNA - Basal prostatic cells [nTPM]	sc_RNA_Basal_prostatic_cells
Single Cell Type RNA - Basal respiratory cells [nTPM]	sc_RNA_Basal_respiratory_cells
Single Cell Type RNA - Basal squamous epithelial cells [nTPM]	sc_RNA_Basal_squamous_epithelial_cells
Single Cell Type RNA - Bipolar cells [nTPM]	sc_RNA_Bipolar_cells
Single Cell Type RNA - Breast glandular cells [nTPM]	sc_RNA_Breast_glandular_cells
Single Cell Type RNA - Breast myoepithelial cells [nTPM]	sc_RNA_Breast_myoepithelial_cells
Single Cell Type RNA - Cardiomyocytes [nTPM]	sc_RNA_Cardiomyocytes
Single Cell Type RNA - Cholangiocytes [nTPM]	sc_RNA_Cholangiocytes
Single Cell Type RNA - Ciliated cells [nTPM]	sc_RNA_Ciliated_cells
Single Cell Type RNA - Club cells [nTPM]	sc_RNA_Club_cells
Single Cell Type RNA - Collecting duct cells [nTPM]	sc_RNA_Collecting_duct_cells
Single Cell Type RNA - Cone photoreceptor cells [nTPM]	sc_RNA_Cone_photoreceptor_cells
Single Cell Type RNA - Cytotrophoblasts [nTPM]	sc_RNA_Cytotrophoblasts
Single Cell Type RNA - dendritic cells [nTPM]	sc_RNA_dendritic_cells
Single Cell Type RNA - Distal enterocytes [nTPM]	sc_RNA_Distal_enterocytes
Single Cell Type RNA - Distal tubular cells [nTPM]	sc_RNA_Distal_tubular_cells
Single Cell Type RNA - Ductal cells [nTPM]	sc_RNA_Ductal_cells
Single Cell Type RNA - Early spermatids [nTPM]	sc_RNA_Early_spermatids
Single Cell Type RNA - Endometrial stromal cells [nTPM]	sc_RNA_Endometrial_stromal_cells
Single Cell Type RNA - Endothelial cells [nTPM]	sc_RNA_Endothelial_cells
Single Cell Type RNA - Enteroendocrine cells [nTPM]	sc_RNA_Enteroendocrine_cells
Single Cell Type RNA - Erythroid cells [nTPM]	sc_RNA_Erythroid_cells
Single Cell Type RNA - Excitatory neurons [nTPM]	sc_RNA_Excitatory_neurons
Single Cell Type RNA - Exocrine glandular cells [nTPM]	sc_RNA_Exocrine_glandular_cells
Single Cell Type RNA - Extravillous trophoblasts [nTPM]	sc_RNA_Extravillous_trophoblasts
Single Cell Type RNA - Fibroblasts [nTPM]	sc_RNA_Fibroblasts
Single Cell Type RNA - Gastric mucus-secreting cells [nTPM]	sc_RNA_Gastric_mucus-secreting_cells
Single Cell Type RNA - Glandular and luminal cells [nTPM]	sc_RNA_Glandular_and_luminal_cells
Single Cell Type RNA - granulocytes [nTPM]	sc_RNA_granulocytes
Single Cell Type RNA - Granulosa cells [nTPM]	sc_RNA_Granulosa_cells
Single Cell Type RNA - Hepatocytes [nTPM]	sc_RNA_Hepatocytes
Single Cell Type RNA - Hofbauer cells [nTPM]	sc_RNA_Hofbauer_cells
Single Cell Type RNA - Horizontal cells [nTPM]	sc_RNA_Horizontal_cells
Single Cell Type RNA - Inhibitory neurons [nTPM]	sc_RNA_Inhibitory_neurons
Single Cell Type RNA - Intestinal goblet cells [nTPM]	sc_RNA_Intestinal_goblet_cells
Single Cell Type RNA - Ionocytes [nTPM]	sc_RNA_Ionocytes
Single Cell Type RNA - Kupffer cells [nTPM]	sc_RNA_Kupffer_cells
Single Cell Type RNA - Langerhans cells [nTPM]	sc_RNA_Langerhans_cells
Single Cell Type RNA - Late spermatids [nTPM]	sc_RNA_Late_spermatids
Single Cell Type RNA - Leydig cells [nTPM]	sc_RNA_Leydig_cells
Single Cell Type RNA - Lymphatic endothelial cells [nTPM]	sc_RNA_Lymphatic_endothelial_cells
Single Cell Type RNA - Macrophages [nTPM]	sc_RNA_Macrophages
Single Cell Type RNA - Melanocytes [nTPM]	sc_RNA_Melanocytes
Single Cell Type RNA - Mesothelial cells [nTPM]	sc_RNA_Mesothelial_cells
Single Cell Type RNA - Microglial cells [nTPM]	sc_RNA_Microglial_cells
Single Cell Type RNA - monocytes [nTPM]	sc_RNA_monocytes
Single Cell Type RNA - Mucus glandular cells [nTPM]	sc_RNA_Mucus_glandular_cells
Single Cell Type RNA - Muller glia cells [nTPM]	sc_RNA_Muller_glia_cells
Single Cell Type RNA - NK-cells [nTPM]	sc_RNA_NK-cells
Single Cell Type RNA - Oligodendrocyte precursor cells [nTPM]	sc_RNA_Oligodendrocyte_precursor_cells
Single Cell Type RNA - Oligodendrocytes [nTPM]	sc_RNA_Oligodendrocytes
Single Cell Type RNA - Oocytes [nTPM]	sc_RNA_Oocytes
Single Cell Type RNA - Ovarian stromal cells [nTPM]	sc_RNA_Ovarian_stromal_cells
Single Cell Type RNA - Pancreatic endocrine cells [nTPM]	sc_RNA_Pancreatic_endocrine_cells
Single Cell Type RNA - Paneth cells [nTPM]	sc_RNA_Paneth_cells
Single Cell Type RNA - Peritubular cells [nTPM]	sc_RNA_Peritubular_cells
Single Cell Type RNA - Plasma cells [nTPM]	sc_RNA_Plasma_cells
Single Cell Type RNA - Prostatic glandular cells [nTPM]	sc_RNA_Prostatic_glandular_cells
Single Cell Type RNA - Proximal enterocytes [nTPM]	sc_RNA_Proximal_enterocytes
Single Cell Type RNA - Proximal tubular cells [nTPM]	sc_RNA_Proximal_tubular_cells
Single Cell Type RNA - Rod photoreceptor cells [nTPM]	sc_RNA_Rod_photoreceptor_cells
Single Cell Type RNA - Salivary duct cells [nTPM]	sc_RNA_Salivary_duct_cells
Single Cell Type RNA - Schwann cells [nTPM]	sc_RNA_Schwann_cells
Single Cell Type RNA - Secretory cells [nTPM]	sc_RNA_Secretory_cells
Single Cell Type RNA - Serous glandular cells [nTPM]	sc_RNA_Serous_glandular_cells
Single Cell Type RNA - Sertoli cells [nTPM]	sc_RNA_Sertoli_cells
Single Cell Type RNA - Skeletal myocytes [nTPM]	sc_RNA_Skeletal_myocytes
Single Cell Type RNA - Smooth muscle cells [nTPM]	sc_RNA_Smooth_muscle_cells
Single Cell Type RNA - Spermatocytes [nTPM]	sc_RNA_Spermatocytes
Single Cell Type RNA - Spermatogonia [nTPM]	sc_RNA_Spermatogonia
Single Cell Type RNA - Squamous epithelial cells [nTPM]	sc_RNA_Squamous_epithelial_cells
Single Cell Type RNA - Suprabasal keratinocytes [nTPM]	sc_RNA_Suprabasal_keratinocytes
Single Cell Type RNA - Syncytiotrophoblasts [nTPM]	sc_RNA_Syncytiotrophoblasts
Single Cell Type RNA - T-cells [nTPM]	sc_RNA_T-cells
Single Cell Type RNA - Undifferentiated cells [nTPM]	sc_RNA_Undifferentiated_cells
"""

sclabels = [sc for sc in re.findall("(sc_.*)", labels)]
sclabels = ",".join(sclabels)

def get_proteinatlas_scRNA(ensg):

    try:
        server = "https://www.proteinatlas.org/api/search_download.php?search={0}&format=json&columns=g,eg,{1}&compress=no".format(ensg, sclabels)

        r = requests.get(server, headers={ "Content-Type" : "application/json"})
        if not r.ok:
            r.raise_for_status()
            sys.exit()
        

        decoded = r.json()
        return decoded
    
    except Exception as e:
        print(e, ensg)
        return np.NaN

In [6]:
def generate_proteinatlaslist(lst, apifunc, filename):
    maindf = pd.DataFrame({})
    for ensg in lst:
        df = pd.DataFrame(apifunc(ensg))
        maindf = pd.concat([maindf, df])
    maindf.to_csv(filename)
    return maindf

In [7]:
%%script echo
# Run on all cofactors
lst = cofactors_unique.index.to_list()
cofactorspecificity = generate_proteinatlaslist(lst, get_proteinatlas_specificity, "helperdata/cofactor_specificity.csv")
cofactor_scRNA = generate_proteinatlaslist(lst, get_proteinatlas_scRNA, "helperdata/cofactor_scRNA.csv")





In [27]:
ads = pd.read_csv(
        "../TF_list/maps/ADs_mapped.tsv", 
        sep='\t',  # Specify the delimiter
        quoting=3,  # Ignore quotes (quote style: 3 = None)
)
adsBgee = [str(x).replace(";","") for x in ads["Bgee"]]

In [None]:
%%script echo
# Run on all ADs
ADspecificity = generate_proteinatlaslist(adsBgee, get_proteinatlas_specificity, "helperdata/AD_specificity.csv")
ADscRNA = generate_proteinatlaslist(adsBgee, get_proteinatlas_scRNA, "helperdata/AD_scRNA.csv")


In [29]:
rds = pd.read_csv(
        "../TF_list/maps/ADs_mapped.tsv", 
        sep='\t',  # Specify the delimiter
        quoting=3,  # Ignore quotes (quote style: 3 = None)
)
rdsBgee = [str(x).replace(";","") for x in ads["Bgee"]]

In [30]:
%%script echo
# Run on all ADs
ADspecificity = generate_proteinatlaslist(rdsBgee, get_proteinatlas_specificity, "helperdata/RD_specificity.csv")
ADscRNA = generate_proteinatlaslist(rdsBgee, get_proteinatlas_scRNA, "helperdata/RD_scRNA.csv")


---
# MED12L similar expression

In [33]:
%%script echo
cluster34 = pd.read_csv("data/proteinatlas-cluster34.tsv", sep="\t")
cluster34ensembl = cluster34["Ensembl"]
cluster34scRNA = generate_proteinatlaslist(cluster34ensembl, get_proteinatlas_scRNA, "helperdata/cluster34_scRNA.csv")


In [34]:
proteinatlasTFs = pd.read_csv("data/proteinatlas-TFs.tsv", sep="\t")
proteinatlasTFsensembl = proteinatlasTFs["Ensembl"]
proteinatlasTFsscRNA = generate_proteinatlaslist(proteinatlasTFsensembl, get_proteinatlas_scRNA, "helperdata/proteinatlasTF_scRNA.csv")


In [44]:
annotationlabels = "gd,pc,upbp,up_mf,scl,ecsinglecell,interactions"

def get_proteinatlas_annotations(ensg):
    try:
        server = "https://www.proteinatlas.org/api/search_download.php?search={0}&format=json&columns=g,eg,{1}&compress=no".format(ensg, annotationlabels)

        r = requests.get(server, headers={ "Content-Type" : "application/json"})
        if not r.ok:
            r.raise_for_status()
            sys.exit()
        

        decoded = r.json()
        return decoded
    
    except Exception as e:
        print(e, ensg)
        return np.NaN

In [47]:
%%script echo
MED12L_similar = ['ENSG00000198846',
 'ENSG00000151623',
 'ENSG00000174306',
 'ENSG00000172379',
 'ENSG00000106536',
 'ENSG00000143190',
 'ENSG00000010803',
 'ENSG00000106459',
 'ENSG00000080298',
 'ENSG00000198105',
 'ENSG00000181827',
 'ENSG00000135365',
 'ENSG00000170485',
 'ENSG00000111783',
 'ENSG00000178764',
 'ENSG00000213096',
 'ENSG00000215421',
 'ENSG00000183621',
 'ENSG00000196597',
 'ENSG00000198795',
 'ENSG00000166432',
 'ENSG00000178662',
 'ENSG00000091656',
 'ENSG00000106571',
 'ENSG00000010818',
 'ENSG00000165495',
 'ENSG00000173041',
 'ENSG00000147421',
 'ENSG00000180357',
 'ENSG00000124496',
 'ENSG00000057935',
 'ENSG00000127124',
 'ENSG00000135164',
 'ENSG00000070476',
 'ENSG00000172733',
 'ENSG00000135457',
 'ENSG00000124440',
 'ENSG00000142611',
 'ENSG00000244405',
 'ENSG00000198815',
 'ENSG00000137871',
 'ENSG00000111249',
 'ENSG00000168916',
 'ENSG00000128000',
 'ENSG00000185238',
 'ENSG00000173258',
 'ENSG00000197008',
 'ENSG00000205683',
 'ENSG00000176165',
 'ENSG00000077092',
 'ENSG00000143614',
 'ENSG00000140396',
 'ENSG00000030419',
 'ENSG00000112242',
 'ENSG00000175322',
 'ENSG00000250312',
 'ENSG00000095951',
 'ENSG00000148200',
 'ENSG00000140382',
 'ENSG00000197037',
 'ENSG00000102908',
 'ENSG00000187605',
 'ENSG00000256223',
 'ENSG00000133794',
 'ENSG00000138311',
 'ENSG00000174197',
 'ENSG00000196268',
 'ENSG00000119042',
 'ENSG00000198169',
 'ENSG00000178177',
 'ENSG00000006468',
 'ENSG00000165259',
 'ENSG00000138738',
 'ENSG00000036549',
 'ENSG00000112182',
 'ENSG00000184486',
 'ENSG00000107249',
 'ENSG00000006576',
 'ENSG00000101493',
 'ENSG00000198911',
 'ENSG00000117000',
 'ENSG00000130856',
 'ENSG00000117625',
 'ENSG00000074657',
 'ENSG00000186020',
 'ENSG00000177853',
 'ENSG00000121390']

generate_proteinatlaslist(MED12L_similar, get_proteinatlas_annotations, "helperdata/TF-annotations.csv")




In [49]:
lambertonlyTFensembls = ['ENSG00000151500',
 'ENSG00000249961',
 'ENSG00000159905',
 'ENSG00000182348',
 'ENSG00000267179',
 'ENSG00000168916',
 'ENSG00000160352',
 'ENSG00000221994',
 'ENSG00000106624',
 'ENSG00000168769',
 'ENSG00000163320',
 'ENSG00000127445',
 'ENSG00000064489',
 'ENSG00000062194',
 'ENSG00000267041',
 'ENSG00000136169',
 'ENSG00000261221',
 'ENSG00000025293',
 'ENSG00000255192',
 'ENSG00000227124',
 'ENSG00000102189',
 'ENSG00000176723',
 'ENSG00000010244',
 'ENSG00000229809',
 'ENSG00000134874',
 'ENSG00000198783',
 'ENSG00000204946',
 'ENSG00000186416',
 'ENSG00000115163',
 'ENSG00000169057',
 'ENSG00000187772',
 'ENSG00000176542',
 'ENSG00000123636',
 'ENSG00000205683',
 'ENSG00000143033',
 'ENSG00000036549',
 'ENSG00000006704',
 'ENSG00000144747',
 'ENSG00000204060',
 'ENSG00000169955',
 'ENSG00000186918',
 'ENSG00000185420',
 'ENSG00000276644',
 'ENSG00000142409',
 'ENSG00000198715',
 'ENSG00000181638',
 'ENSG00000121413',
 'ENSG00000146285',
 'ENSG00000143379',
 'ENSG00000120832',
 'ENSG00000011332',
 'ENSG00000173825',
 'ENSG00000142528',
 'ENSG00000171467',
 'ENSG00000178917',
 'ENSG00000130254',
 'ENSG00000116539',
 'ENSG00000243678',
 'ENSG00000166987',
 'ENSG00000171604',
 'ENSG00000162664',
 'ENSG00000277258',
 'ENSG00000127989',
 'ENSG00000237765',
 'ENSG00000175550',
 'ENSG00000010803',
 'ENSG00000251369',
 'ENSG00000104976',
 'ENSG00000011243',
 'ENSG00000170515',
 'ENSG00000169689',
 'ENSG00000138380',
 'ENSG00000071655',
 'ENSG00000130711',
 'ENSG00000102901',
 'ENSG00000125482',
 'ENSG00000141258',
 'ENSG00000196233',
 'ENSG00000140632',
 'ENSG00000214575',
 'ENSG00000126733',
 'ENSG00000153975',
 'ENSG00000214022',
 'ENSG00000180346',
 'ENSG00000164296',
 'ENSG00000115041',
 'ENSG00000141579',
 'ENSG00000266265',
 'ENSG00000187595',
 'ENSG00000120963',
 'ENSG00000183340',
 'ENSG00000020256',
 'ENSG00000159140',
 'ENSG00000066697',
 'ENSG00000250312',
 'ENSG00000113761',
 'ENSG00000147183',
 'ENSG00000164299',
 'ENSG00000012223',
 'ENSG00000177125',
 'ENSG00000148300',
 'ENSG00000269343',
 'ENSG00000108509',
 'ENSG00000180357',
 'ENSG00000188779',
 'ENSG00000169989',
 'ENSG00000165061',
 'ENSG00000135365',
 'ENSG00000105127',
 'ENSG00000185670',
 'ENSG00000165655',
 'ENSG00000166454',
 'ENSG00000112511',
 'ENSG00000126705',
 'ENSG00000106948',
 'ENSG00000232040',
 'ENSG00000184271',
 'ENSG00000164334',
 'ENSG00000140993',
 'ENSG00000166432',
 'ENSG00000265763',
 'ENSG00000135100',
 'ENSG00000076108',
 'ENSG00000205189',
 'ENSG00000126464',
 'ENSG00000153207',
 'ENSG00000227059',
 'ENSG00000269067',
 'ENSG00000233757',
 'ENSG00000188313',
 'ENSG00000106554',
 'ENSG00000059122',
 'ENSG00000132604',
 'ENSG00000175809',
 'ENSG00000176182',
 'ENSG00000164631',
 'ENSG00000188070',
 'ENSG00000176407',
 'ENSG00000174428',
 'ENSG00000163516',
 'ENSG00000156374',
 'ENSG00000203326',
 'ENSG00000263001',
 'ENSG00000156469',
 'ENSG00000170903',
 'ENSG00000213588',
 'ENSG00000139793',
 'ENSG00000132024',
 'ENSG00000152217',
 'ENSG00000173894',
 'ENSG00000125817',
 'ENSG00000215356',
 'ENSG00000104885',
 'ENSG00000171735',
 'ENSG00000167962',
 'ENSG00000183779',
 'ENSG00000261787',
 'ENSG00000215474',
 'ENSG00000165244',
 'ENSG00000119403',
 'ENSG00000198824',
 'ENSG00000131116',
 'ENSG00000115816',
 'ENSG00000105991',
 'ENSG00000143157',
 'ENSG00000159592',
 'ENSG00000196275',
 'ENSG00000151657',
 'ENSG00000154655',
 'ENSG00000138073',
 'ENSG00000125520',
 'ENSG00000125826',
 'ENSG00000160007',
 'ENSG00000234616',
 'ENSG00000160633',
 'ENSG00000183434',
 'ENSG00000198945',
 'ENSG00000104221',
 'ENSG00000118418',
 'ENSG00000147601',
 'ENSG00000148411',
 'ENSG00000164684',
 'ENSG00000159588',
 'ENSG00000102984',
 'ENSG00000131381',
 'ENSG00000137947',
 'ENSG00000171634',
 'ENSG00000166268',
 'ENSG00000178177',
 'ENSG00000174446',
 'ENSG00000232237',
 'ENSG00000138311',
 'ENSG00000110777',
 'ENSG00000065526',
 'ENSG00000099364',
 'ENSG00000134046',
 'ENSG00000243660',
 'ENSG00000196767',
 'ENSG00000129071',
 'ENSG00000178928',
 'ENSG00000179886',
 'ENSG00000272602',
 'ENSG00000197579',
 'ENSG00000080603',
 'ENSG00000100207',
 'ENSG00000221944',
 'ENSG00000274529',
 'ENSG00000185238',
 'ENSG00000160336',
 'ENSG00000136936',
 'ENSG00000187605',
 'ENSG00000168772',
 'ENSG00000101457',
 'ENSG00000124203',
 'ENSG00000135148',
 'ENSG00000171169',
 'ENSG00000256229',
 'ENSG00000189308',
 'ENSG00000131914',
 'ENSG00000175279',
 'ENSG00000170396',
 'ENSG00000160062',
 'ENSG00000063438',
 'ENSG00000161642',
 'ENSG00000122085',
 'ENSG00000267281',
 'ENSG00000151789',
 'ENSG00000157933',
 'ENSG00000153814',
 'ENSG00000117505',
 'ENSG00000162601',
 'ENSG00000264668',
 'ENSG00000188981',
 'ENSG00000136603',
 'ENSG00000144331',
 'ENSG00000089094',
 'ENSG00000213793',
 'ENSG00000166349',
 'ENSG00000164185']

generate_proteinatlaslist(lambertonlyTFensembls, get_proteinatlas_annotations, "helperdata/lambertonlyTF-annotations.csv")

Unnamed: 0,Gene,Ensembl,Gene description,Protein class,Biological process,Molecular function,Subcellular location,Single cell expression cluster,Interactions
0,THYN1,ENSG00000151500,Thymocyte nuclear protein 1,[Predicted intracellular proteins],,,"[Nucleoplasm, Nucleoli fibrillar center, Cytosol]",Cluster 44: Oocytes - Unknown function,1
0,TERB1,ENSG00000249961,Telomere repeat binding bouquet formation prot...,"[Disease related genes, Human disease related ...",[Meiosis],,"[Nucleoplasm, Cell Junctions, Cytosol]",Cluster 79: Spermatocytes & Spermatogonia - Sp...,1
0,ZNF221,ENSG00000159905,Zinc finger protein 221,[Predicted intracellular proteins],"[Transcription, Transcription regulation]",[DNA-binding],[Nuclear bodies],Cluster 55: Non-specific - Transcription,3
0,ZNF804B,ENSG00000182348,Zinc finger protein 804B,[Predicted intracellular proteins],,,,Cluster 34: Neurons - Neuronal signaling,
0,ENSG00000267179,ENSG00000267179,Novel protein,[Predicted intracellular proteins],,,[Nucleoplasm],Cluster 7: T-cells - T-cell receptor,
...,...,...,...,...,...,...,...,...,...
0,ZNF385B,ENSG00000144331,Zinc finger protein 385B,[Predicted intracellular proteins],[Apoptosis],,[Nucleoli fibrillar center],Cluster 34: Neurons - Neuronal signaling,
0,KDM2B,ENSG00000089094,Lysine demethylase 2B,"[Enzymes, Predicted intracellular proteins]","[Transcription, Transcription regulation, Ubl ...","[Chromatin regulator, Dioxygenase, DNA-binding...",[Nucleoplasm],Cluster 14: Langerhans cells - Innate immune r...,26
0,ZNF888,ENSG00000213793,Zinc finger protein 888,[Predicted intracellular proteins],"[Transcription, Transcription regulation]",[DNA-binding],,Cluster 37: Glandular & Luminal cells - Unknow...,1
0,RAG1,ENSG00000166349,Recombination activating 1,"[Disease related genes, Enzymes, Human disease...","[DNA recombination, Ubl conjugation pathway]","[Chromatin regulator, DNA-binding, Endonucleas...","[Nucleoplasm, Nucleoli]",Cluster 41: Adipocytes & Endothelial cells - M...,12
