In [1]:
import pandas as pd
from gprofiler import GProfiler
import ast

In [2]:
df = pd.read_csv('intersting_kegg_pathways.csv')
gene_lists = []
for gene_list in df.enriched_genes:
    gene_list = ast.literal_eval(gene_list)
    gene_lists.append(gene_list)
df.enriched_genes = gene_lists
df

Unnamed: 0.1,Unnamed: 0,Pathway_ID,Pathway_name,p_value,enriched_genes
0,133,KEGG:04726,Serotonergic synapse,2.436566e-08,"[HTR1A, CACNA1A, HRAS, HTR1D, HTR2A, HTR3A, SL..."
1,197,KEGG:04724,Glutamatergic synapse,1.670683e-06,"[SLC1A1, GRIA3, GRIA4, GRIK1, GRIK2, GRIK3, GR..."
2,201,KEGG:04728,Dopaminergic synapse,1.960982e-06,"[DRD3, SLC6A3, DRD4, GRIA3, GRIA4, GRIN2B, DRD..."


In [3]:
gene_counts = dict()
for index, row in df.iterrows():
    pathway = row.Pathway_name
    gene_list = row.enriched_genes
    for gene in gene_list:
        if gene in gene_counts:
            gene_counts[gene].add(pathway)
        else:
            gene_counts[gene] = {pathway}

In [4]:
gp = GProfiler(return_dataframe=True)

In [6]:
for index, row in df.iterrows():
    old_genes = row.enriched_genes
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    file_name = row['Pathway_name'] + '.txt'
    colors = set()
    with open(file_name, 'w+') as file:
        for gene, ID in zip(old_genes,new_genes):
            pathways = gene_counts[gene]
            if len(pathways) == 3:
                ID += ' red\n'
                colors.add('red')
            elif pathways == {'Dopaminergic synapse'} or pathways == {'Glutamatergic synapse'} or pathways == {'Serotonergic synapse'}:
                ID += ' yellow\n'
                colors.add('yellow')
            elif pathways == {'Dopaminergic synapse', 'Glutamatergic synapse'}:
                ID += ' green\n'
                colors.add('green')
            elif pathways == {'Glutamatergic synapse', 'Serotonergic synapse'}:
                ID += ' blue\n'
                colors.add('blue')
            elif pathways == {'Serotonergic synapse', 'Dopaminergic synapse'}:
                ID += ' orange\n'
                colors.add('orange')
            else:
                print(pathways)
        
            file.write(ID)
        file.close()
        
   

In [7]:
colors

{'green', 'red', 'yellow'}