In [3]:
import pandas as pd
import networkx as nx
import igraph as ig
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
import re
from math import ceil  # Import ceil from math module
import math
from matplotlib import cm
from matplotlib.colors import Normalize
import gseapy as gp
def plot_enrichr_pathway_analysis(gene_list, keywords=None, gene_set='GO_Biological_Process_2023', organism='mouse', output_path='pathway_analysis_plot.png'):
    # Run Enrichr analysis
    try:
        # Run Enrichr analysis
        enrichr_results = gp.enrichr(
            gene_list=gene_list,
            gene_sets=gene_set,
            organism=organism
        )
    except Exception as e:
        # Handle failure gracefully
        print(f"Enrichr analysis failed: {e}")
        print("Skipping pathway analysis for this gene list.")
        return
    
    # Extract the results DataFrame
    enrichr_df = enrichr_results.results

    # Filter the DataFrame by the specific gene set 'GO_Biological_Process_2023'
    enrichr_df = enrichr_df[enrichr_df['Gene_set'] == gene_set]


    # Print the filtered DataFrame
    print("Number of confident pathways (start) : " + str(len(enrichr_df)))

    # Step 1: Filter pathways related to programmed cell death and apoptosis
    #filtered_enrichr_df = enrichr_df[enrichr_df['Term'].str.contains('|'.join(keywords), case=False, na=False)]
    def keyword_match(term, keywords):
        return any(keyword.lower() in term.lower() for keyword in keywords)

    if keywords:
        filtered_enrichr_df = enrichr_df[enrichr_df['Term'].apply(keyword_match, keywords=keywords)]
    else:
        filtered_enrichr_df = enrichr_df

    # Print the filtered DataFrame
    nconfpath = len(filtered_enrichr_df)
    if nconfpath == 0:  # Check if the filtered DataFrame is empty
        print("No pathways match the specified keywords. Exiting analysis.")
        return
    
    print("Number of confident pathways (of interest): " + str(nconfpath))

    def clean_pathway_name(name):
        # Remove "GO:" followed by numbers and the optional parentheses around GO IDs
        name = re.sub(r'\(GO:\d+\)', '', name)  # Removes "(GO:101010101)" style entries
        return name

    # Apply cleaning to the pathway names in the DataFrame
    #filtered_enrichr_df['Term'] = filtered_enrichr_df['Term'].apply(clean_pathway_name)
    filtered_enrichr_df.loc[:, 'Term'] = filtered_enrichr_df['Term'].apply(clean_pathway_name)

    # Step 2: Proceed with the graph creation using the filtered DataFrame
    vectorizer = TfidfVectorizer(stop_words='english')
    X = vectorizer.fit_transform(filtered_enrichr_df['Term'])

    # Step 3: Build Graph from Shared Genes
    G = nx.Graph()
    gene_counts = {}
    pathway_gene_map = {}

    for _, row in filtered_enrichr_df.iterrows():
        genes = set(row['Genes'].split(';'))  # Convert to set for intersection checks
        pathway_gene_map[row['Term']] = genes
        gene_counts[row['Term']] = len(genes)  # Count genes per term

    #filtered_enrichr_df['num_genes'] = filtered_enrichr_df['Term'].map(gene_counts)
    filtered_enrichr_df = filtered_enrichr_df.copy()
    filtered_enrichr_df.loc[:, 'num_genes'] = filtered_enrichr_df['Term'].map(gene_counts)

    # Filter pathways that have >= 3 genes, with fallback to >= 2 and >= 1
    #for threshold in [3, 2, 1]:
    for threshold in [3, 2]:
        idx = filtered_enrichr_df['num_genes'] >= threshold
        if idx.sum() > 3:
            print(f"Pathways with >= {threshold} genes found: {idx.sum()}")
            filtered_enrichr_df = filtered_enrichr_df[idx]
            break
    else:
        # If no pathways meet the minimum gene count, exit the function
        print("No pathways with at least 2 gene found. Skipping further analysis.")
        return

    filtered_enrichr_df = filtered_enrichr_df[idx]

    # Recalculate gene counts for the filtered pathways
    gene_counts = {term: gene_counts[term] for term in filtered_enrichr_df['Term']}

    # Add edges only for shared genes
    shared_gene_labels = {}
    non_shared_gene_labels = {}
    for pathway1, genes1 in pathway_gene_map.items():
        if pathway1 not in filtered_enrichr_df['Term'].values:
            continue
        for pathway2, genes2 in pathway_gene_map.items():
            if pathway1 != pathway2 and pathway2 in filtered_enrichr_df['Term'].values:
                shared_genes = genes1.intersection(genes2)
                if shared_genes:  # Add edge if there are shared genes
                    G.add_edge(pathway1, pathway2, weight=len(shared_genes))
                    shared_gene_labels[(pathway1, pathway2)] = '\n'.join(shared_genes)  # Insert newline

        # Non-shared genes for the pathway
        non_shared_genes = genes1 - set.union(*[genes2 for pathway2, genes2 in pathway_gene_map.items() if pathway2 != pathway1])
        non_shared_gene_labels[pathway1] = '\n'.join(non_shared_genes)

    # Convert NetworkX graph to iGraph
    ig_graph = ig.Graph.TupleList(G.edges(data=False), directed=False)

    # Run Leiden clustering
    partition = ig_graph.community_leiden(resolution=0.7)  # Experiment with lower values for flexibility

    # Assign cluster labels to nodes (pathways)
    term_cluster_map = {ig_graph.vs[idx]['name']: cluster for idx, cluster in enumerate(partition.membership)}

    # Map clusters to filtered_enrichr_df DataFrame
    filtered_enrichr_df['Cluster'] = filtered_enrichr_df['Term'].map(term_cluster_map)

    # Dynamically calculate the figure size
    nconfpath = len(filtered_enrichr_df)
    print("Final number of confident pathways : " + str(nconfpath))

    xsize = ceil(nconfpath * 1.25)
    ysize = ceil(nconfpath * 1.00)
    xsize = max(xsize, 25)
    ysize = max(ysize, 20)
    print("Canvas size " + str(xsize) + " " + str(ysize))
    # Step 4: Visualize Graph with Improved Scale
    plt.figure(figsize=(xsize, ysize))  # Larger figure size for better visualization

    k0 = 0.5
    kf = 4.0
    kval = round(k0 + kf * (xsize - ysize) / xsize, 2)
    print("Final number k (spacing) : " + str(kval))

    # Adjust layout spacing with spring_layout
    pos = nx.spring_layout(G, seed=42, k=kval, iterations=100)  # Increase 'k' for better spacing

    # Map node size to gene count (increase size proportionally)
    min_gene_count = min(gene_counts.values())
    max_gene_count = max(gene_counts.values())

    # Dynamic scaling using min-max normalization with an adjustable range
    size_min = 1000  # Minimum node size
    size_max = 10000  # Maximum node size

    # Apply scaling
    if min_gene_count == max_gene_count:
        node_size = [size_max] * len(G.nodes())  # Use maximum size if counts are uniform
    else:
        node_size = [
            size_min + ((gene_counts[node] - min_gene_count) / (max_gene_count - min_gene_count)) * (size_max - size_min)
            for node in G.nodes()
        ]

    # Map node colors to clusters (color by cluster index using a colormap)
    if max(term_cluster_map.values()) == 0:
        print("All pathways belong to the same cluster. Assigning a default color.")
        color_map = ['red' for _ in term_cluster_map.values()]
    else:
        color_map = [plt.cm.hsv(cluster / max(term_cluster_map.values())) for cluster in term_cluster_map.values()]
        
    # Function to split text into groups of two words separated by '\n'
    def split_pathway_name(name):
        words = name.split()
        return '\n'.join([' '.join(words[i:i+2]) for i in range(0, len(words), 2)])

    # Adjust node labels to show each term with breaks every two words and number of genes
    labels = {node: f"{split_pathway_name(node)}\n({gene_counts.get(node, 0)} genes)\n{non_shared_gene_labels.get(node, '')}" for node in G.nodes()}
    
    npt_font = max(14, min(20, ceil(12 * kval)))  # Cap font size between 14 and 20
    print("Final npt_font (pathways) : " + str(npt_font))

    # Draw the graph
    nx.draw(
        G, pos,
        with_labels=True,
        labels=labels,  # Use updated labels with gene count and non-shared genes
        node_color=color_map,  # Color nodes by cluster with distinct colors
        node_size=node_size,
        font_size=npt_font,
        edge_color='gray',
        cmap=plt.cm.Set3
    )

    # Create a ScalarMappable object for colorbar reference
    sm = plt.cm.ScalarMappable(cmap=plt.cm.hsv, norm=Normalize(vmin=0, vmax=max(term_cluster_map.values())))
    sm.set_array([])  # Empty array for colorbar reference

    # Add a thinner colorbar using the 'fraction' and 'pad' arguments
    plt.colorbar(sm, ax=plt.gca(), label='Cluster Index', fraction=0.02, pad=0.02)

    if len(G.edges()) > 50:
        print("Too many edges. Skipping shared gene labels.")
    else:
        nx.draw_networkx_edge_labels(
            G, pos,
            edge_labels=shared_gene_labels,
            font_size=6  # Adjust font size for shared genes
        )

    plt.title('Pathway Analysis (P-value < 0.05) - Gene Overlap Graph', fontsize=18)

    # Save the plot to the specified output path
    plt.savefig(output_path, format='png', bbox_inches='tight')  # Save the plot as a PNG file
    print(f"Graph saved to {output_path}")

    plt.close()  # Close the plot to free up memory


In [None]:
import os
import pandas as pd

def open_file(fname):
    """Open a file and filter genes based on the type (DV or DE)."""
    df = pd.read_excel(fname)

    # Get the filename from the full path
    filename = os.path.basename(fname)

    if "dv" in filename.lower():
        # Open DV summary spreadsheet
        print(f"Processing DV file: {filename}")
        # Keep important and confident genes
        idx = (df["pval"] < 0.05) & (df["dist_diff"] > 0)
        df = df[idx]
        gene_list = df["gene"]
        print(f"Number of confident genes (DV): {len(gene_list)}")
        return df, gene_list, "dv"

    else:
        # Open DE summary spreadsheet
        print(f"Processing DE file: {filename}")
        idx = (df['p_val_adj'] < 0.05) & (df['abs_log2FC'] >= 0.7)
        df = df[idx]
        gene_list = df['gene']
        print(f"Number of confident genes (DE): {len(gene_list)}")
        return df, gene_list, "de"

# Define keywords for filtering pathways
keywords = ['apoptosis', 'programmed cell death', 'cell cycle regulation', 
            'tumor necrosis factor (TNF) signaling', 'caspase activation', 
            'DNA damage response', 'autophagy', 'cell survival pathways', 
            'Bcl-2 family proteins', 'NF-kB signaling', 'P53 signaling pathway', 
            'mitochondrial apoptosis signaling', 'inflammatory response', 'Wnt signaling pathway', 
            'MAPK/ERK pathway', 'PI3K-Akt signaling pathway', 'TGF-beta signaling', 'Fas signaling', 
            'endoplasmic reticulum stress', 'angiogenesis', 'epigenetic regulation of apoptosis']

main_path = r"Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells"

# Walk through all subdirectories
for root, dirs, files in os.walk(main_path):
    for file in files:
        print("=============================================")
        print(f"Working on {root}")
        print("=============================================")
        if file.endswith(".xls"):
            # Construct the file path
            fname = os.path.join(root, file)

            # Open the file to get the dataframe, gene list, and analysis type
            df, gene_list, analysis_type = open_file(fname)

            # If the file is a DE file, split into upregulated and downregulated
            if analysis_type == "de":
                # Upregulated genes
                idx_up = df['avg_log2FC'] > 0
                df_up = df[idx_up]
                gene_list_up = df_up['gene']

                # Downregulated genes
                idx_dn = df['avg_log2FC'] < 0
                df_dn = df[idx_dn]
                gene_list_dn = df_dn['gene']

                # Generate plots for DE, DE-up, and DE-downregulated
                out_name_de = os.path.join(root, f"de_pathways_{os.path.basename(fname).replace('.xls', '')}.png")
                plot_enrichr_pathway_analysis(gene_list, keywords=keywords, gene_set='GO_Biological_Process_2023', organism='mouse', output_path=out_name_de)
                #print(f"Processed {fname} and saved plot to {out_name_de}")

                if not df_up.empty:
                    print(f"Number op UP-genes {len(gene_list_up)}")
                    out_name_up = os.path.join(root, f"de_up_pathways_{os.path.basename(fname).replace('.xls', '')}.png")
                    plot_enrichr_pathway_analysis(gene_list_up, keywords=keywords, gene_set='GO_Biological_Process_2023', organism='mouse', output_path=out_name_up)
                    #print(f"Processed {fname} (upregulated) and saved plot to {out_name_up}")

                if not df_dn.empty:
                    print(f"Number op DN-genes {len(gene_list_dn)}")
                    out_name_dn = os.path.join(root, f"de_dn_pathways_{os.path.basename(fname).replace('.xls', '')}.png")
                    plot_enrichr_pathway_analysis(gene_list_dn, keywords=keywords, gene_set='GO_Biological_Process_2023', organism='mouse', output_path=out_name_dn)
                    #print(f"Processed {fname} (downregulated) and saved plot to {out_name_dn}")

            # If the file is a DV file, generate a plot
            elif analysis_type == "dv":
                out_name_dv = os.path.join(root, f"dv_pathways_{os.path.basename(fname).replace('.xls', '')}.png")
                plot_enrichr_pathway_analysis(gene_list, keywords=keywords, gene_set='GO_Biological_Process_2023', organism='mouse', output_path=out_name_dv)
                #print(f"Processed {fname} and saved plot to {out_name_dv}")


Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\B_cells\ACKG_577_18w_NR4ACKG_136_18w
Processing DE file: ACKG_577_18w_NR4ACKG_136_18w_de.xls
Number of confident genes (DE): 25
Number of confident pathways (start) : 397
Number of confident pathways (of interest): 16
No pathways with at least 2 gene found. Skipping further analysis.
Number op UP-genes 15
Number of confident pathways (start) : 264
Number of confident pathways (of interest): 11
No pathways with at least 2 gene found. Skipping further analysis.
Number op DN-genes 10
Number of confident pathways 

  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\B_cells\ACKG_577_18w_NR4ACKG_136_18w\dv_pathways_ACKG_577_18w_NR4ACKG_136_18w_dv.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\B_cells\ACKG_577_18w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w
Processing DE file: ACKG_574_10w_ACKG_577_18w_de.xls
Number of confident genes (DE): 1076
Number of confident pathways (start) : 3414
Number of confident pathways (of interest): 70
Pathways with >= 3 genes found: 34
Final number of confident pathways : 34
Canvas size 43 34
Final number k (spacing) : 1.34
Final npt_font (pathways) : 17
Too many edges. Skipping shared gene labels.


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w\de_pathways_ACKG_574_10w_ACKG_577_18w_de.png
Number op UP-genes 129
Number of confident pathways (start) : 876
Number of confident pathways (of interest): 26
Pathways with >= 2 genes found: 5
Final number of confident pathways : 5
Canvas size 25 20
Final number k (spacing) : 1.3
All pathways belong to the same cluster. Assigning a default color.
Final npt_font (pathways) : 16


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w\de_up_pathways_ACKG_574_10w_ACKG_577_18w_de.png
Number op DN-genes 947
Number of confident pathways (start) : 3215
Number of confident pathways (of interest): 67
Pathways with >= 3 genes found: 31
Final number of confident pathways : 31
Canvas size 39 31
Final number k (spacing) : 1.32
Final npt_font (pathways) : 16
Too many edges. Skipping shared gene labels.


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w\de_dn_pathways_ACKG_574_10w_ACKG_577_18w_de.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w
Processing DV file: ACKG_574_10w_ACKG_577_18w_dv.xls
Number of confident genes (DV): 300
Number of confident pathways (start) : 1880
Number of confident pathways (of interest): 56
Pathways with >= 3 genes found: 17
Final number of confident pathways : 17
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w\dv_pathways_ACKG_574_10w_ACKG_577_18w_dv.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w
Processing DE file: ACKG_574_10w_NR4ACKG_127_10w_de.xls
Number of confident genes (DE): 355
Number of confident pathways (start) : 1912
Nu

  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w\de_pathways_ACKG_574_10w_NR4ACKG_127_10w_de.png
Number op UP-genes 225
Number of confident pathways (start) : 1602
Number of confident pathways (of interest): 32
Pathways with >= 3 genes found: 12
Final number of confident pathways : 12
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w\de_up_pathways_ACKG_574_10w_NR4ACKG_127_10w_de.png
Number op DN-genes 130
Number of confident pathways (start) : 654
Number of confident pathways (of interest): 17
Pathways with >= 2 genes found: 4
Final number of confident pathways : 4
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w\de_dn_pathways_ACKG_574_10w_NR4ACKG_127_10w_de.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w
Processing DV file: ACKG_574_10w_NR4ACKG_127_10w_dv.xls
Number of confident genes (DV): 167
Number of confident pathways (start) : 1407
Number of confident pathways (of interest): 49
Pathways with >= 3 genes found: 14
Final number of confident pathways : 14
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w\dv_pathways_ACKG_574_10w_NR4ACKG_127_10w_dv.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_127_10w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w
Processing DE file: ACKG_574_10w_NR4ACKG_136_18w_de.xls
Number of confident genes (DE): 1262
Number of confident pathway

  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w\de_pathways_ACKG_574_10w_NR4ACKG_136_18w_de.png
Number op UP-genes 216
Number of confident pathways (start) : 1161
Number of confident pathways (of interest): 22
Pathways with >= 3 genes found: 6
Final number of confident pathways : 6
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w\de_up_pathways_ACKG_574_10w_NR4ACKG_136_18w_de.png
Number op DN-genes 1046
Number of confident pathways (start) : 3520
Number of confident pathways (of interest): 76
Pathways with >= 3 genes found: 38
Final number of confident pathways : 38
Canvas size 48 38
Final number k (spacing) : 1.33
Final npt_font (pathways) : 16
Too many edges. Skipping shared gene labels.


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w\de_dn_pathways_ACKG_574_10w_NR4ACKG_136_18w_de.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w
Processing DV file: ACKG_574_10w_NR4ACKG_136_18w_dv.xls
Number of confident genes (DV): 236
Number of confident pathways (start) : 1759
Number of confident pathways (of interest): 49
Pathways with >= 3 genes found: 17
Final number of confident pathways : 17
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16
Too many edges. Skipping shared gene labels.


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w\dv_pathways_ACKG_574_10w_NR4ACKG_136_18w_dv.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w
Processing DE file: ACKG_577_18w_NR4ACKG_127_10w_de.xls
Number of confident genes (DE): 1608
Number of confident pathway

  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w\de_pathways_ACKG_577_18w_NR4ACKG_127_10w_de.png
Number op UP-genes 1377
Number of confident pathways (start) : 3794
Number of confident pathways (of interest): 72
Pathways with >= 3 genes found: 40
Final number of confident pathways : 40
Canvas size 50 40
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16
Too many edges. Skipping shared gene labels.


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w\de_up_pathways_ACKG_577_18w_NR4ACKG_127_10w_de.png
Number op DN-genes 231
Number of confident pathways (start) : 1107
Number of confident pathways (of interest): 21
Pathways with >= 3 genes found: 6
Final number of confident pathways : 6
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w\de_dn_pathways_ACKG_577_18w_NR4ACKG_127_10w_de.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w
Processing DV file: ACKG_577_18w_NR4ACKG_127_10w_dv.xls
Number of confident genes (DV): 287
Number of confident pathways (start) : 1802
Number of confident pathways (of interest): 40
Pathways with >= 3 genes found: 17
Final number of confident pathways : 17
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16
Too many edges. Skipping shared gene labels.


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w\dv_pathways_ACKG_577_18w_NR4ACKG_127_10w_dv.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_127_10w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_136_18w
Processing DE file: ACKG_577_18w_NR4ACKG_136_18w_de.xls
Number of confident genes (DE): 618
Enrichr analysis failed: Err

  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_136_18w\de_up_pathways_ACKG_577_18w_NR4ACKG_136_18w_de.png
Number op DN-genes 431
Number of confident pathways (start) : 2186
Number of confident pathways (of interest): 59
Pathways with >= 3 genes found: 22
Final number of confident pathways : 22
Canvas size 28 22
Final number k (spacing) : 1.36
Final npt_font (pathways) : 17
Too many edges. Skipping shared gene labels.


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_136_18w\de_dn_pathways_ACKG_577_18w_NR4ACKG_136_18w_de.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_136_18w
Processing DV file: ACKG_577_18w_NR4ACKG_136_18w_dv.xls
Number of confident genes (DV): 212
Enrichr analysis failed: Error sending gene list, try again later
Skipping pathway analysis for this gene list.
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_577_18w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study1

  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\NR4ACKG_127_10w_NR4ACKG_136_18w\de_pathways_NR4ACKG_127_10w_NR4ACKG_136_18w_de.png
Number op UP-genes 272
Number of confident pathways (start) : 1192
Number of confident pathways (of interest): 35
Pathways with >= 3 genes found: 5
Final number of confident pathways : 5
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\NR4ACKG_127_10w_NR4ACKG_136_18w\de_up_pathways_NR4ACKG_127_10w_NR4ACKG_136_18w_de.png
Number op DN-genes 1388
Number of confident pathways (start) : 3931
Number of confident pathways (of interest): 77
Pathways with >= 3 genes found: 40
Final number of confident pathways : 40
Canvas size 50 40
Final number k (spacing) : 1.3
Final npt_font (pathways) : 16
Too many edges. Skipping shared gene labels.


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\NR4ACKG_127_10w_NR4ACKG_136_18w\de_dn_pathways_NR4ACKG_127_10w_NR4ACKG_136_18w_de.png
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\NR4ACKG_127_10w_NR4ACKG_136_18w
Processing DV file: NR4ACKG_127_10w_NR4ACKG_136_18w_dv.xls
Number of confident genes (DV): 292
Enrichr analysis failed: Error sending gene list, try again later
Skipping pathway analysis for this gene list.
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\NR4ACKG_127_10w_NR4ACKG_136_18w
Working on Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Fibroblasts\ACKG_577_18w_NR4ACKG_136_18w
Processing DE file: ACKG_577_18w_NR4ACKG_136_18w_de.xls
Number of confident genes (DE): 109


In [56]:
fname = r"Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w\ACKG_574_10w_ACKG_577_18w_dv.xls"
df, gene_list,analysis_type = open_file(fname)
out_name = r"Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w\pathway_plot_ACKG_574_10w_ACKG_577_18w_dv.png"
plot_enrichr_pathway_analysis(gene_list, keywords=keywords, gene_set='GO_Biological_Process_2023', organism='mouse', output_path=out_name)


Processing DV file: ACKG_574_10w_ACKG_577_18w_dv.xls
Number of confident genes (DV): 300
Number of confident pathways (start) : 1880
Number of confident pathways (of interest): 56
Pathways with >= 3 genes found: 17
Final number of confident pathways : 17
Canvas size 25 20
Final number k (spacing) : 1.3
Final npt_font (pathways) : 13


  filtered_enrichr_df = filtered_enrichr_df[idx]
  node_collection = ax.scatter(


Graph saved to Z:\selim_working_dir\2024_NR4ACKG_Study10_Sc_data\analysis\dv_de_analysis_broad_cells\Colonocytes\ACKG_574_10w_ACKG_577_18w\pathway_plot_ACKG_574_10w_ACKG_577_18w_dv.png
