In [53]:
from rpy2.robjects.vectors import StrVector
import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages
from rpy2.robjects import r, pandas2ri
import pandas as pd

def cluster_profiler_KEGG(clusterProfiler, data): 
    
    enrich_KEGG = clusterProfiler.enrichKEGG(data, organism = 'hsa', keyType = 'kegg', pvalueCutoff = 0.05, pAdjustMethod = 'BY')
    KEGGdat = enrich_KEGG.slots['result']
    
    df = pd.DataFrame(index=range(len(KEGGdat[0])))
    df['ID'] = KEGGdat[0]
    df['Description'] = KEGGdat[1]
    df['GeneRatio'] = KEGGdat[2]
    df['pvalue'] = KEGGdat[4]
    df['padjust'] = KEGGdat[5]
    df['GeneID'] = KEGGdat[7]
    df['Count'] = KEGGdat[8]
    df = df[df['padjust']<0.05]
    
    return df

def cluster_profiler_GO_MF(clusterProfiler, data): 
    
    enrich_GO_MF = clusterProfiler.enrichGO(data, 'org.Hs.eg.db', ont = 'MF', pvalueCutoff = 0.05, pAdjustMethod = 'BY')
    MFdat = enrich_GO_MF.slots['result']
    
    df_GO_MF = pd.DataFrame(index=range(len(MFdat[0])))
    df_GO_MF['ID'] = MFdat[0]
    df_GO_MF['Description'] = MFdat[1]
    df_GO_MF['GeneRatio'] = MFdat[2]
    df_GO_MF['pvalue'] = MFdat[4]
    df_GO_MF['padjust'] = MFdat[5]
    df_GO_MF['GeneID'] = MFdat[7]
    df_GO_MF['Count'] = MFdat[8]
    df_GO_MF = df_GO_MF[df_GO_MF['padjust']<0.05]
    
    return df_GO_MF

def cluster_profiler_GO_CC(clusterProfiler, data): 
    
    enrich_GO_CC = clusterProfiler.enrichGO(data, 'org.Hs.eg.db', ont = 'CC', pvalueCutoff = 0.05, pAdjustMethod = 'BY')
    CCdat = enrich_GO_CC.slots['result']
    
    df_GO_CC = pd.DataFrame(index=range(len(CCdat[0])))
    df_GO_CC['ID'] = CCdat[0]
    df_GO_CC['Description'] = CCdat[1]
    df_GO_CC['GeneRatio'] = CCdat[2]
    df_GO_CC['pvalue'] = CCdat[4]
    df_GO_CC['padjust'] = CCdat[5]
    df_GO_CC['GeneID'] = CCdat[7]
    df_GO_CC['Count'] = CCdat[8]
    df_GO_CC = df_GO_CC[df_GO_CC['padjust']<0.05]
    
    return df_GO_CC

def cluster_profiler_GO_BP(clusterProfiler, data):
    
    enrich_GO_BP = clusterProfiler.enrichGO(data, 'org.Hs.eg.db', ont = 'BP', pvalueCutoff = 0.05, pAdjustMethod = 'BY')
    BPdat = enrich_GO_BP.slots['result']
    
    df_GO_BP = pd.DataFrame(index=range(len(BPdat[0])))
    df_GO_BP['ID'] = BPdat[0]
    df_GO_BP['Description'] = BPdat[1]
    df_GO_BP['GeneRatio'] = BPdat[2]
    df_GO_BP['pvalue'] = BPdat[4]
    df_GO_BP['padjust'] = BPdat[5]
    df_GO_BP['GeneID'] = BPdat[7]
    df_GO_BP['Count'] = BPdat[8]
    df_GO_BP = df_GO_BP[df_GO_BP['padjust']<0.05]
    
    return df_GO_BP

def Reactome(ReactomePA, data): 
    
    enrich_Reactome = ReactomePA.enrichPathway(gene=data, pvalueCutoff = 0.05, readable = True, pAdjustMethod = 'BY', organism = "human")
    ReactomeDat = enrich_Reactome.slots['result']
    
    df_Reactome = pd.DataFrame(index=range(len(ReactomeDat[0])))
    df_Reactome['ID'] = ReactomeDat[0]
    df_Reactome['Description'] = ReactomeDat[1]
    df_Reactome['GeneRatio'] = ReactomeDat[2]
    df_Reactome['pvalue'] = ReactomeDat[4]
    df_Reactome['padjust'] = ReactomeDat[5]
    df_Reactome['Count'] = ReactomeDat[8]
    
    entrez_list = []
    for x in ReactomeDat[7]:
        input_bitr = x.split('/')
        eg = clusterProfiler.bitr(input_bitr, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
        entrez_list.append("/".join(eg[1])) 

    df_Reactome['GeneID'] = entrez_list
    df_Reactome = df_Reactome[df_Reactome['padjust']<0.05]
    
    return df_Reactome

def main_enrichments_ACR():
    data = robjects.r("scan('/Users/user/Documents/Internship_LUMC/test_list_genes1.0.txt', what='', sep='\n', skip = 1)")
    clusterProfiler = rpackages.importr('clusterProfiler')
    ReactomePA = rpackages.importr('ReactomePA')
    print(cluster_profiler_KEGG(clusterProfiler, data))
    print(cluster_profiler_GO_MF(clusterProfiler, data))
    print(cluster_profiler_GO_CC(clusterProfiler, data))
    print(cluster_profiler_GO_BP(clusterProfiler, data))
    print(Reactome(ReactomePA, data)) 
    

main_enrichments_ACR()   

           ID                                        Description GeneRatio  \
0    hsa04151                         PI3K-Akt signaling pathway    91/718   
1    hsa05215                                    Prostate cancer    44/718   
2    hsa04933  AGE-RAGE signaling pathway in diabetic complic...    42/718   
3    hsa05212                                  Pancreatic cancer    35/718   
4    hsa05220                           Chronic myeloid leukemia    35/718   
5    hsa05213                                 Endometrial cancer    29/718   
6    hsa04068                             FoxO signaling pathway    44/718   
7    hsa04510                                     Focal adhesion    56/718   
8    hsa05219                                     Bladder cancer    23/718   
9    hsa04211                       Longevity regulating pathway    34/718   
10   hsa05226                                     Gastric cancer    45/718   
11   hsa05210                                  Colorectal cancer

             ID                                        Description GeneRatio  \
0    GO:0033613            activating transcription factor binding   24/1150   
1    GO:0019838                              growth factor binding   33/1150   
2    GO:0000987    proximal promoter sequence-specific DNA binding   71/1150   
3    GO:0051087                                  chaperone binding   27/1150   
4    GO:0008307                   structural constituent of muscle   17/1150   
5    GO:0003682                                  chromatin binding   69/1150   
6    GO:0001085     RNA polymerase II transcription factor binding   30/1150   
7    GO:0031625                   ubiquitin protein ligase binding   50/1150   
8    GO:0003779                                      actin binding   60/1150   
9    GO:0031072                         heat shock protein binding   29/1150   
10   GO:0001228  DNA-binding transcription activator activity, ...   63/1150   
11   GO:0051082                         

            ID                                     Description GeneRatio  \
0   GO:0044449                          contractile fiber part   65/1166   
1   GO:0043292                               contractile fiber   67/1166   
2   GO:0030016                                       myofibril   63/1166   
3   GO:0030017                                       sarcomere   58/1166   
4   GO:0031674                                          I band   44/1166   
5   GO:0030018                                          Z disc   40/1166   
6   GO:0031983                                   vesicle lumen   73/1166   
7   GO:0060205                       cytoplasmic vesicle lumen   72/1166   
8   GO:0034774                         secretory granule lumen   64/1166   
9   GO:0005912                               adherens junction   82/1166   
10  GO:0030055                         cell-substrate junction   69/1166   
11  GO:0005925                                  focal adhesion   68/1166   
12  GO:00059

              ID                                        Description GeneRatio  \
0     GO:0060537                          muscle tissue development  101/1157   
1     GO:0007568                                              aging   90/1157   
2     GO:0014706                 striated muscle tissue development   98/1157   
3     GO:0003012                              muscle system process  107/1157   
4     GO:0006979                       response to oxidative stress   99/1157   
5     GO:0006936                                 muscle contraction   85/1157   
6     GO:0007517                           muscle organ development   91/1157   
7     GO:0031667                        response to nutrient levels   98/1157   
8     GO:0009991                 response to extracellular stimulus  101/1157   
9     GO:0048738                  cardiac muscle tissue development   64/1157   
10    GO:0042692                        muscle cell differentiation   83/1157   
11    GO:0006941            

                ID                                        Description  \
0    R-HSA-9614085                        FOXO-mediated transcription   
1    R-HSA-9006934             Signaling by Receptor Tyrosine Kinases   
2    R-HSA-5663202                    Diseases of signal transduction   
3    R-HSA-2219528                       PI3K/AKT Signaling in Cancer   
4    R-HSA-9615017  FOXO-mediated transcription of oxidative stres...   
5    R-HSA-8864260  Transcriptional regulation by the AP-2 (TFAP2)...   
6     R-HSA-390522                        Striated Muscle Contraction   
7    R-HSA-3371556                   Cellular response to heat stress   
8    R-HSA-2173782  Binding and Uptake of Ligands by Scavenger Rec...   
9    R-HSA-1474244                  Extracellular matrix organization   
10   R-HSA-3000171             Non-integrin membrane-ECM interactions   
11   R-HSA-3000480                    Scavenging by Class A Receptors   
12    R-HSA-397014                                 