In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

from google.colab import drive
drive.mount('/content/drive', force_remount=True)
root_dir = "/content/drive/My Drive/"
project_folder = "Thesis_MSc"
os.chdir(root_dir + project_folder)
!pwd

Mounted at /content/drive
/content/drive/My Drive/Thesis_MSc


In [2]:
import pandas as pd

enrichr_df = pd.read_csv('data/Enrichr-KG overt (2).csv')  # or .tsv
print(enrichr_df.columns)


Index(['Term', 'Library', 'p-value', 'q-value', 'z-score', 'combined score',
       'overlaps'],
      dtype='object')


In [3]:
!pip install kaleido==0.2.1




In [None]:
# See the first 5 rows with all columns
print(enrichr_df.head())

# If you spot a column that looks like it might be the library/category:
print(enrichr_df["Library"].unique())   # replace "Library" with the actual column name



                                          Term           Library  \
0   Generic Transcription Pathway R-HSA-212436     Reactome_2022   
1  RNA Polymerase II Transcription R-HSA-73857     Reactome_2022   
2  Gene Expression (Transcription) R-HSA-74160     Reactome_2022   
3             Herpes simplex virus 1 infection   KEGG_2021_Human   
4            BCL6 human tf ARCHS4 coexpression  ARCHS4_TFs_Coexp   

        p-value       q-value  z-score  combined score  \
0  1.073000e-32  1.021000e-29    2.653           195.3   
1  1.303000e-32  1.021000e-29    2.561           188.0   
2  7.555000e-32  3.943000e-29    2.452           175.7   
3  1.800000e-25  5.636000e-23    3.365           191.7   
4  1.459000e-18  2.516000e-15    3.616           148.5   

                                            overlaps  
0  SPI1;EHMT2;HDAC11;EHMT1;RBPJ;LGALS3;TRIM28;ZNF...  
1  SPI1;EHMT2;HDAC11;EHMT1;RBPJ;LGALS3;TRIM28;ZNF...  
2  SPI1;EHMT2;HDAC11;EHMT1;RBPJ;RRP8;LGALS3;TRIM2...  
3  ZNF573;ZNF571;ZNF57

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)

# Rename columns for convenience
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

# Sort by Adjusted P-value and take top N
df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()

# Compute -log10 Adjusted P-value
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])

# Count number of genes in overlap for dot size
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Barplot --------
fig_bar = px.bar(
    df_top,
    x="neglog10adjP",
    y="Term",
    orientation="h",
    text="Combined Score",
    hover_data=["Overlap", "Adjusted P-value", "P-value", "Combined Score"],
    labels={"neglog10adjP": "-log10(Adjusted P-value)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Overt)"
)
fig_bar.update_layout(yaxis=dict(autorange="reversed"))

# Save HTML
bar_html = os.path.join(OUTPUT_DIR, "overt_barplot.html")
fig_bar.write_html(bar_html)

# Save PNG if Kaleido is installed
bar_png = os.path.join(OUTPUT_DIR, "overt_barplot.png")
try:
    pio.write_image(fig_bar, bar_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for barplot.")

fig_bar.show()

# -------- Dotplot --------
fig_dot = px.scatter(
    df_top,
    x="Combined Score",
    y="Term",
    size="Overlap_count",
    color="neglog10adjP",
    hover_data=["P-value", "Adjusted P-value", "Overlap"],
    labels={"Combined Score": "Combined Score", "neglog10adjP": "-log10(Adj P)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Overt)"
)
fig_dot.update_layout(yaxis=dict(autorange="reversed"))

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "overt_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "overt_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)

# Rename columns for convenience
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

# Sort by Adjusted P-value and take top N
df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()

# Compute -log10 Adjusted P-value
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])

# Count number of genes in overlap for reference (optional)
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Barplot --------
fig_bar = px.bar(
    df_top,
    x="neglog10adjP",
    y="Term",
    orientation="h",
    text="Combined Score",
    hover_data=["Overlap", "Adjusted P-value", "P-value", "Combined Score", "Library"],
    labels={"neglog10adjP": "-log10(Adjusted P-value)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Overt)"
)
fig_bar.update_layout(yaxis=dict(autorange="reversed"))

# Save HTML
bar_html = os.path.join(OUTPUT_DIR, "overt_barplot.html")
fig_bar.write_html(bar_html)

# Save PNG if Kaleido is installed
bar_png = os.path.join(OUTPUT_DIR, "overt_barplot.png")
try:
    pio.write_image(fig_bar, bar_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for barplot.")

fig_bar.show()

# -------- Dotplot (Adjusted P-value as dot size) --------
fig_dot = px.scatter(
    df_top,
    x="Combined Score",
    y="Term",
    size="neglog10adjP",    # dot size = significance
    color="Library",        # color = gene-set library
    hover_data=["P-value", "Adjusted P-value", "Overlap", "Library"],
    labels={
        "Combined Score": "Combined Score",
        "neglog10adjP": "-log10(Adjusted P-value)",
        "Term": "Enriched Term",
        "Library": "Gene-set Library"
    },
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Overt)"
)
fig_dot.update_layout(yaxis=dict(autorange="reversed"))

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "overt_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "overt_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Barplot --------
fig_bar = px.bar(
    df_top,
    x="neglog10adjP",
    y="Term",
    orientation="h",
    text="Combined Score",
    hover_data=["Overlap", "Adjusted P-value", "P-value", "Combined Score", "Library"],
    labels={"neglog10adjP": "-log10(Adjusted P-value)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Overt)"
)
fig_bar.update_layout(yaxis=dict(autorange="reversed"))
fig_bar.show()

# -------- Dotplot with size legend --------
fig_dot = px.scatter(
    df_top,
    x="Combined Score",
    y="Term",
    size="neglog10adjP",
    color="Library",
    hover_data=["P-value", "Adjusted P-value", "Overlap", "Library"],
    labels={
        "Combined Score": "Combined Score",
        "neglog10adjP": "-log10(Adjusted P-value)",
        "Term": "Enriched Term",
        "Library": "Gene-set Library"
    },
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Overt)"
)
fig_dot.update_layout(yaxis=dict(autorange="reversed"))

# Add dummy traces to show size legend
for val in [2, 5, 10, 15]:  # example –log10(padj) values
    fig_dot.add_trace(go.Scatter(
        x=[None],
        y=[None],
        mode='markers',
        marker=dict(size=val*2),  # scale factor to match your dotplot
        name=f"-log10(Adj P) = {val}"
    ))

fig_dot.show()



In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Dotplot with proper legends --------
fig_dot = px.scatter(
    df_top,
    x="Combined Score",
    y="Term",
    size="neglog10adjP",      # size = significance
    color="Library",          # color = library
    hover_data=["P-value", "Adjusted P-value", "Overlap", "Library"],
    labels={
        "Combined Score": "Combined Score",
        "neglog10adjP": "-log10(Adjusted P-value)",
        "Term": "Enriched Term",
        "Library": "Gene-set Library"
    },
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Overt)",
    size_max=40  # maximum dot size
)

# Ensure size legend is visible
fig_dot.update_traces(marker=dict(sizemode='area', sizeref=2.*df_top["neglog10adjP"].max()/(40.**2), sizemin=5))

# Layout tweaks
fig_dot.update_layout(
    yaxis=dict(autorange="reversed"),
    legend_title=dict(text="Gene-set Library"),
    legend=dict(title_font_size=14, font=dict(size=12))
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "overt_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "overt_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()


In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Dotplot with proper legends --------
fig_dot = px.scatter(
    df_top,
    x="Combined Score",
    y="Term",
    size="neglog10adjP",      # size = significance
    color="Library",          # color = library
    hover_data=["P-value", "Adjusted P-value", "Overlap", "Library"],
    labels={
        "Combined Score": "Combined Score",
        "neglog10adjP": "-log10(Adjusted P-value)",
        "Term": "Enriched Term",
        "Library": "Gene-set Library"
    },
    size_max=40  # maximum dot size
)

# Ensure size legend is visible
fig_dot.update_traces(
    marker=dict(
        sizemode='area',
        sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
        sizemin=5
    ),
    hoverlabel=dict(font_size=16)  # bigger hover labels
)

# Layout tweaks (big fonts + centered title)
fig_dot.update_layout(
    title=dict(
        text=f"Top {TOP_N} Enriched Terms (Dotplot, Overt)",
        x=0.5,                # center title
        xanchor="center",
        font=dict(size=28)    # title font size
    ),
    yaxis=dict(
        autorange="reversed",
        title=dict(font=dict(size=22)),   # y-axis title font
        tickfont=dict(size=18)            # y-axis tick font
    ),
    xaxis=dict(
        title=dict(font=dict(size=22)),   # x-axis title font
        tickfont=dict(size=18)            # x-axis tick font
    ),
    legend_title=dict(
        text="Gene-set Library",
        font=dict(size=20)
    ),
    legend=dict(
        font=dict(size=18)
    )
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "overt_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "overt_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()


Kaleido not installed; skipping PNG for dotplot.


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Create figure with size legend --------
fig_dot = go.Figure()

# Get unique libraries for colors
libraries = df_top["Library"].unique()
colors = px.colors.qualitative.Set1[:len(libraries)]

# Add traces for each library (for color legend)
for i, library in enumerate(libraries):
    library_data = df_top[df_top["Library"] == library]

    fig_dot.add_trace(go.Scatter(
        x=library_data["Combined Score"],
        y=library_data["Term"],
        mode='markers',
        name=library,
        marker=dict(
            size=library_data["neglog10adjP"],
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color=colors[i],
            line=dict(width=0.5, color="DarkSlateGrey")
        ),
        customdata=np.column_stack((
            library_data["P-value"],
            library_data["Adjusted P-value"],
            library_data["Overlap"],
            library_data["Library"]
        )),
        hovertemplate="<b>%{y}</b><br>" +
                      "Combined Score: %{x}<br>" +
                      "P-value: %{customdata[0]}<br>" +
                      "Adjusted P-value: %{customdata[1]}<br>" +
                      "Overlap: %{customdata[2]}<br>" +
                      "Library: %{customdata[3]}<br>" +
                      "<extra></extra>"
    ))

# Create size legend by adding invisible traces with different sizes
size_values = [df_top["neglog10adjP"].min(),
               df_top["neglog10adjP"].quantile(0.25),
               df_top["neglog10adjP"].quantile(0.5),
               df_top["neglog10adjP"].quantile(0.75),
               df_top["neglog10adjP"].max()]

size_labels = [f"{val:.1f}" for val in size_values]

# Add size legend traces
for i, (size_val, size_label) in enumerate(zip(size_values, size_labels)):
    fig_dot.add_trace(go.Scatter(
        x=[None], y=[None],  # Invisible points
        mode='markers',
        marker=dict(
            size=size_val,
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color='rgba(0,0,0,0.6)',
            line=dict(width=0.5, color="black")
        ),
        showlegend=True,
        name=f"Size: {size_label}",
        legendgroup="size",
        legendgrouptitle_text="-log10(Adj P-value)"
    ))

# Update layout
fig_dot.update_layout(
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Overt)",
    xaxis_title="Combined Score",
    yaxis_title="Enriched Term",
    yaxis=dict(autorange="reversed"),
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02,
        font=dict(size=10),
        tracegroupgap=10
    ),
    width=1200,
    height=800
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "overt_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "overt_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()

In [4]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Create figure with size legend --------
fig_dot = go.Figure()

# Get unique libraries for colors
libraries = df_top["Library"].unique()
colors = px.colors.qualitative.Set1[:len(libraries)]

# Add traces for each library (for color legend)
for i, library in enumerate(libraries):
    library_data = df_top[df_top["Library"] == library]

    fig_dot.add_trace(go.Scatter(
        x=library_data["Combined Score"],
        y=library_data["Term"],
        mode='markers',
        name=library,
        marker=dict(
            size=library_data["neglog10adjP"],
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color=colors[i],
            line=dict(width=0.5, color="DarkSlateGrey")
        ),
        customdata=np.column_stack((
            library_data["P-value"],
            library_data["Adjusted P-value"],
            library_data["Overlap"],
            library_data["Library"]
        )),
        hovertemplate="<b>%{y}</b><br>" +
                      "Combined Score: %{x}<br>" +
                      "P-value: %{customdata[0]}<br>" +
                      "Adjusted P-value: %{customdata[1]}<br>" +
                      "Overlap: %{customdata[2]}<br>" +
                      "Library: %{customdata[3]}<br>" +
                      "<extra></extra>"
    ))

# Create size legend values
size_values = [df_top["neglog10adjP"].min(),
               df_top["neglog10adjP"].quantile(0.25),
               df_top["neglog10adjP"].quantile(0.5),
               df_top["neglog10adjP"].quantile(0.75),
               df_top["neglog10adjP"].max()]

size_labels = [f"{val:.1f}" for val in size_values]

# Add size legend traces
for i, (size_val, size_label) in enumerate(zip(size_values, size_labels)):
    fig_dot.add_trace(go.Scatter(
        x=[None], y=[None],  # Invisible points
        mode='markers',
        marker=dict(
            size=size_val,
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color='rgba(0,0,0,0.6)',
            line=dict(width=0.5, color="black")
        ),
        showlegend=True,
        name=f"Size: {size_label}",
        legendgroup="size",
        legendgrouptitle_text="-log10(Adj P-value)"
    ))

# -------- Layout tweaks (big fonts + centered title) --------
fig_dot.update_layout(
    title=dict(
        text=f"Top {TOP_N} Enriched Terms (Dotplot, Overt)",
        x=0.5,                # center title
        xanchor="center",
        font=dict(size=28)    # bigger title font
    ),
    xaxis=dict(
        title="Combined Score",
        titlefont=dict(size=22),
        tickfont=dict(size=18)
    ),
    yaxis=dict(
        title="Enriched Term",
        autorange="reversed",
        titlefont=dict(size=22),
        tickfont=dict(size=18)   # larger term labels
    ),
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02,
        font=dict(size=16),      # bigger legend font
        tracegroupgap=10
    ),
    width=1200,
    height=800
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "overt_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "overt_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG sign (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Create figure with size legend --------
fig_dot = go.Figure()

# Get unique libraries for colors
libraries = df_top["Library"].unique()
colors = px.colors.qualitative.Set1[:len(libraries)]

# Add traces for each library (for color legend)
for i, library in enumerate(libraries):
    library_data = df_top[df_top["Library"] == library]

    fig_dot.add_trace(go.Scatter(
        x=library_data["Combined Score"],
        y=library_data["Term"],
        mode='markers',
        name=library,
        marker=dict(
            size=library_data["neglog10adjP"],
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color=colors[i],
            line=dict(width=0.5, color="DarkSlateGrey")
        ),
        customdata=np.column_stack((
            library_data["P-value"],
            library_data["Adjusted P-value"],
            library_data["Overlap"],
            library_data["Library"]
        )),
        hovertemplate="<b>%{y}</b><br>" +
                      "Combined Score: %{x}<br>" +
                      "P-value: %{customdata[0]}<br>" +
                      "Adjusted P-value: %{customdata[1]}<br>" +
                      "Overlap: %{customdata[2]}<br>" +
                      "Library: %{customdata[3]}<br>" +
                      "<extra></extra>"
    ))

# Create size legend by adding invisible traces with different sizes
size_values = [df_top["neglog10adjP"].min(),
               df_top["neglog10adjP"].quantile(0.25),
               df_top["neglog10adjP"].quantile(0.5),
               df_top["neglog10adjP"].quantile(0.75),
               df_top["neglog10adjP"].max()]

size_labels = [f"{val:.1f}" for val in size_values]

# Add size legend traces
for i, (size_val, size_label) in enumerate(zip(size_values, size_labels)):
    fig_dot.add_trace(go.Scatter(
        x=[None], y=[None],  # Invisible points
        mode='markers',
        marker=dict(
            size=size_val,
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color='rgba(0,0,0,0.6)',
            line=dict(width=0.5, color="black")
        ),
        showlegend=True,
        name=f"Size: {size_label}",
        legendgroup="size",
        legendgrouptitle_text="-log10(Adj P-value)"
    ))

# Update layout
fig_dot.update_layout(
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Sign)",
    xaxis_title="Combined Score",
    yaxis_title="Enriched Term",
    yaxis=dict(autorange="reversed"),
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02,
        font=dict(size=10),
        tracegroupgap=10
    ),
    width=1200,
    height=800
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "sign_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "sign_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()

In [5]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG sign (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Create figure with size legend --------
fig_dot = go.Figure()

# Get unique libraries for colors
libraries = df_top["Library"].unique()
colors = px.colors.qualitative.Set1[:len(libraries)]

# Add traces for each library (for color legend)
for i, library in enumerate(libraries):
    library_data = df_top[df_top["Library"] == library]

    fig_dot.add_trace(go.Scatter(
        x=library_data["Combined Score"],
        y=library_data["Term"],
        mode='markers',
        name=library,
        marker=dict(
            size=library_data["neglog10adjP"],
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color=colors[i],
            line=dict(width=0.5, color="DarkSlateGrey")
        ),
        customdata=np.column_stack((
            library_data["P-value"],
            library_data["Adjusted P-value"],
            library_data["Overlap"],
            library_data["Library"]
        )),
        hovertemplate="<b>%{y}</b><br>" +
                      "Combined Score: %{x}<br>" +
                      "P-value: %{customdata[0]}<br>" +
                      "Adjusted P-value: %{customdata[1]}<br>" +
                      "Overlap: %{customdata[2]}<br>" +
                      "Library: %{customdata[3]}<br>" +
                      "<extra></extra>"
    ))

# Create size legend by adding invisible traces with different sizes
size_values = [df_top["neglog10adjP"].min(),
               df_top["neglog10adjP"].quantile(0.25),
               df_top["neglog10adjP"].quantile(0.5),
               df_top["neglog10adjP"].quantile(0.75),
               df_top["neglog10adjP"].max()]

size_labels = [f"{val:.1f}" for val in size_values]

for i, (size_val, size_label) in enumerate(zip(size_values, size_labels)):
    fig_dot.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(
            size=size_val,
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color='rgba(0,0,0,0.6)',
            line=dict(width=0.5, color="black")
        ),
        showlegend=True,
        name=f"Size: {size_label}",
        legendgroup="size",
        legendgrouptitle_text="-log10(Adj P-value)"
    ))

# -------- Layout tweaks (center title + big fonts) --------
fig_dot.update_layout(
    title=dict(
        text=f"Top {TOP_N} Enriched Terms (Dotplot, Sign)",
        x=0.5,
        xanchor="center",
        font=dict(size=28)
    ),
    xaxis=dict(
        title="Combined Score",
        titlefont=dict(size=22),
        tickfont=dict(size=18)
    ),
    yaxis=dict(
        title="Enriched Term",
        autorange="reversed",
        titlefont=dict(size=22),
        tickfont=dict(size=18)
    ),
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02,
        font=dict(size=16),
        tracegroupgap=10
    ),
    width=1200,
    height=800
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "sign_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "sign_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG hidden (3).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Create figure with size legend --------
fig_dot = go.Figure()

# Get unique libraries for colors
libraries = df_top["Library"].unique()
colors = px.colors.qualitative.Set1[:len(libraries)]

# Add traces for each library (for color legend)
for i, library in enumerate(libraries):
    library_data = df_top[df_top["Library"] == library]

    fig_dot.add_trace(go.Scatter(
        x=library_data["Combined Score"],
        y=library_data["Term"],
        mode='markers',
        name=library,
        marker=dict(
            size=library_data["neglog10adjP"],
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color=colors[i],
            line=dict(width=0.5, color="DarkSlateGrey")
        ),
        customdata=np.column_stack((
            library_data["P-value"],
            library_data["Adjusted P-value"],
            library_data["Overlap"],
            library_data["Library"]
        )),
        hovertemplate="<b>%{y}</b><br>" +
                      "Combined Score: %{x}<br>" +
                      "P-value: %{customdata[0]}<br>" +
                      "Adjusted P-value: %{customdata[1]}<br>" +
                      "Overlap: %{customdata[2]}<br>" +
                      "Library: %{customdata[3]}<br>" +
                      "<extra></extra>"
    ))

# Create size legend by adding invisible traces with different sizes
size_values = [df_top["neglog10adjP"].min(),
               df_top["neglog10adjP"].quantile(0.25),
               df_top["neglog10adjP"].quantile(0.5),
               df_top["neglog10adjP"].quantile(0.75),
               df_top["neglog10adjP"].max()]

size_labels = [f"{val:.1f}" for val in size_values]

# Add size legend traces
for i, (size_val, size_label) in enumerate(zip(size_values, size_labels)):
    fig_dot.add_trace(go.Scatter(
        x=[None], y=[None],  # Invisible points
        mode='markers',
        marker=dict(
            size=size_val,
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color='rgba(0,0,0,0.6)',
            line=dict(width=0.5, color="black")
        ),
        showlegend=True,
        name=f"Size: {size_label}",
        legendgroup="size",
        legendgrouptitle_text="-log10(Adj P-value)"
    ))

# Update layout
fig_dot.update_layout(
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Hidden)",
    xaxis_title="Combined Score",
    yaxis_title="Enriched Term",
    yaxis=dict(autorange="reversed"),
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02,
        font=dict(size=10),
        tracegroupgap=10
    ),
    width=1200,
    height=800
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "hidden_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "hidden_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()

In [6]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG hidden (3).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Create figure with size legend --------
fig_dot = go.Figure()

# Get unique libraries for colors
libraries = df_top["Library"].unique()
colors = px.colors.qualitative.Set1[:len(libraries)]

# Add traces for each library (for color legend)
for i, library in enumerate(libraries):
    library_data = df_top[df_top["Library"] == library]

    fig_dot.add_trace(go.Scatter(
        x=library_data["Combined Score"],
        y=library_data["Term"],
        mode='markers',
        name=library,
        marker=dict(
            size=library_data["neglog10adjP"],
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color=colors[i],
            line=dict(width=0.5, color="DarkSlateGrey")
        ),
        customdata=np.column_stack((
            library_data["P-value"],
            library_data["Adjusted P-value"],
            library_data["Overlap"],
            library_data["Library"]
        )),
        hovertemplate="<b>%{y}</b><br>" +
                      "Combined Score: %{x}<br>" +
                      "P-value: %{customdata[0]}<br>" +
                      "Adjusted P-value: %{customdata[1]}<br>" +
                      "Overlap: %{customdata[2]}<br>" +
                      "Library: %{customdata[3]}<br>" +
                      "<extra></extra>"
    ))

# Create size legend by adding invisible traces with different sizes
size_values = [df_top["neglog10adjP"].min(),
               df_top["neglog10adjP"].quantile(0.25),
               df_top["neglog10adjP"].quantile(0.5),
               df_top["neglog10adjP"].quantile(0.75),
               df_top["neglog10adjP"].max()]

size_labels = [f"{val:.1f}" for val in size_values]

for i, (size_val, size_label) in enumerate(zip(size_values, size_labels)):
    fig_dot.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(
            size=size_val,
            sizemode='area',
            sizeref=2.*df_top["neglog10adjP"].max()/(40.**2),
            sizemin=5,
            color='rgba(0,0,0,0.6)',
            line=dict(width=0.5, color="black")
        ),
        showlegend=True,
        name=f"Size: {size_label}",
        legendgroup="size",
        legendgrouptitle_text="-log10(Adj P-value)"
    ))

# -------- Layout tweaks (center title + big fonts) --------
fig_dot.update_layout(
    title=dict(
        text=f"Top {TOP_N} Enriched Terms (Dotplot, Hidden)",
        x=0.5,
        xanchor="center",
        font=dict(size=28)
    ),
    xaxis=dict(
        title="Combined Score",
        titlefont=dict(size=22),
        tickfont=dict(size=18)
    ),
    yaxis=dict(
        title="Enriched Term",
        autorange="reversed",
        titlefont=dict(size=22),
        tickfont=dict(size=18)
    ),
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02,
        font=dict(size=16),
        tracegroupgap=10
    ),
    width=1200,
    height=800
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "hidden_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "hidden_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()


In [7]:
from PIL import Image
import os

# -------- Paths --------
OUTPUT_DIR = "enrichr_plots"
images = [
    os.path.join(OUTPUT_DIR, "overt_dotplot.png"),
    os.path.join(OUTPUT_DIR, "sign_dotplot.png"),
    os.path.join(OUTPUT_DIR, "hidden_dotplot.png")
]

# -------- Open images --------
imgs = [Image.open(img) for img in images]

# -------- Determine size of final image --------
width = max(img.width for img in imgs)
total_height = sum(img.height for img in imgs)

# -------- Create new blank image --------
combined_img = Image.new('RGB', (width, total_height), color=(255, 255, 255))

# -------- Paste images one below the other --------
y_offset = 0
for img in imgs:
    combined_img.paste(img, (0, y_offset))
    y_offset += img.height

# -------- Save final multi-panel image --------
output_path = os.path.join(OUTPUT_DIR, "multi_panel_dotplots.png")
combined_img.save(output_path)
print(f"Saved multi-panel image to: {output_path}")


Saved multi-panel image to: enrichr_plots/multi_panel_dotplots.png


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG sign (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)

# Rename columns for convenience
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

# Sort by Adjusted P-value and take top N
df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()

# Compute -log10 Adjusted P-value
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])

# Count number of genes in overlap for dot size
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Barplot --------
fig_bar = px.bar(
    df_top,
    x="neglog10adjP",
    y="Term",
    orientation="h",
    text="Combined Score",
    hover_data=["Overlap", "Adjusted P-value", "P-value", "Combined Score"],
    labels={"neglog10adjP": "-log10(Adjusted P-value)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Sign)"
)
fig_bar.update_layout(yaxis=dict(autorange="reversed"))

# Save HTML
bar_html = os.path.join(OUTPUT_DIR, "sign_barplot.html")
fig_bar.write_html(bar_html)

# Save PNG if Kaleido is installed
bar_png = os.path.join(OUTPUT_DIR, "sign_barplot.png")
try:
    pio.write_image(fig_bar, bar_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for barplot.")

fig_bar.show()

# -------- Dotplot --------
fig_dot = px.scatter(
    df_top,
    x="Combined Score",
    y="Term",
    size="Overlap_count",
    color="neglog10adjP",
    hover_data=["P-value", "Adjusted P-value", "Overlap"],
    labels={"Combined Score": "Combined Score", "neglog10adjP": "-log10(Adj P)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Sign)"
)
fig_dot.update_layout(yaxis=dict(autorange="reversed"))

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "sign_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "sign_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG sign (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)

# Rename columns for convenience
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

# Sort by Adjusted P-value and take top N
df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()

# Compute -log10 Adjusted P-value
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])

# Count number of genes in overlap for dot size
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Barplot --------
fig_bar = px.bar(
    df_top,
    x="neglog10adjP",
    y="Term",
    orientation="h",
    text="Combined Score",
    hover_data=["Overlap", "Adjusted P-value", "P-value", "Combined Score"],
    labels={"neglog10adjP": "-log10(Adjusted P-value)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Sign)"
)
fig_bar.update_layout(yaxis=dict(autorange="reversed"))

# Save HTML
bar_html = os.path.join(OUTPUT_DIR, "sign_barplot.html")
fig_bar.write_html(bar_html)

# Save PNG if Kaleido is installed
bar_png = os.path.join(OUTPUT_DIR, "sign_barplot.png")
try:
    pio.write_image(fig_bar, bar_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for barplot.")

fig_bar.show()

# -------- Dotplot with proper legends --------
fig_dot = go.Figure()

# Add main scatter plot
fig_dot.add_trace(go.Scatter(
    x=df_top["Combined Score"],
    y=df_top["Term"],
    mode='markers',
    name="Enriched Terms",
    marker=dict(
        size=df_top["Overlap_count"],
        sizemode='area',
        sizeref=2.*df_top["Overlap_count"].max()/(40.**2),
        sizemin=5,
        color=df_top["neglog10adjP"],
        colorscale='Viridis',
        showscale=True,
        colorbar=dict(
            title="-log10(Adj P-value)",
            titleside="right"
        ),
        line=dict(width=0.5, color="DarkSlateGrey")
    ),
    customdata=np.column_stack((
        df_top["P-value"],
        df_top["Adjusted P-value"],
        df_top["Overlap"]
    )),
    hovertemplate="<b>%{y}</b><br>" +
                  "Combined Score: %{x}<br>" +
                  "P-value: %{customdata[0]}<br>" +
                  "Adjusted P-value: %{customdata[1]}<br>" +
                  "Overlap: %{customdata[2]}<br>" +
                  "<extra></extra>",
    showlegend=False  # Hide this trace from legend since we'll create custom size legend
))

# Create size legend by adding invisible traces with different sizes
size_values = [df_top["Overlap_count"].min(),
               df_top["Overlap_count"].quantile(0.25),
               df_top["Overlap_count"].quantile(0.5),
               df_top["Overlap_count"].quantile(0.75),
               df_top["Overlap_count"].max()]

size_labels = [f"{int(val)}" for val in size_values]

# Add size legend traces
for i, (size_val, size_label) in enumerate(zip(size_values, size_labels)):
    fig_dot.add_trace(go.Scatter(
        x=[None], y=[None],  # Invisible points
        mode='markers',
        marker=dict(
            size=size_val,
            sizemode='area',
            sizeref=2.*df_top["Overlap_count"].max()/(40.**2),
            sizemin=5,
            color='rgba(0,0,0,0.6)',
            line=dict(width=0.5, color="black")
        ),
        showlegend=True,
        name=f"Size: {size_label}",
        legendgroup="size",
        legendgrouptitle_text="Overlap Count"
    ))

# Update layout
fig_dot.update_layout(
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Sign)",
    xaxis_title="Combined Score",
    yaxis_title="Enriched Term",
    yaxis=dict(autorange="reversed"),
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.02,
        font=dict(size=10),
        tracegroupgap=10
    )
)

# Save HTML
dot_html = os.path.join(OUTPUT_DIR, "sign_dotplot.html")
fig_dot.write_html(dot_html)

# Save PNG if Kaleido is installed
dot_png = os.path.join(OUTPUT_DIR, "sign_dotplot.png")
try:
    pio.write_image(fig_dot, dot_png, width=1200, height=800)
except ValueError:
    print("Kaleido not installed; skipping PNG for dotplot.")

fig_dot.show()

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)

# Rename columns for convenience
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

# Sort by Adjusted P-value and take top 20
df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()

# Compute -log10 Adjusted P-value
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])

# Count number of genes in overlap for dot size
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Barplot (Top 20) --------
fig_bar = px.bar(
    df_top,
    x="neglog10adjP",
    y="Term",
    orientation="h",
    text="Combined Score",
    hover_data=["Overlap", "Adjusted P-value", "P-value", "Combined Score"],
    labels={"neglog10adjP": "-log10(Adjusted P-value)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms from Enrichr"
)
fig_bar.update_layout(yaxis=dict(autorange="reversed"))
fig_bar.show()

# -------- Dotplot (Top 20) --------
fig_dot = px.scatter(
    df_top,
    x="Combined Score",
    y="Term",
    size="Overlap_count",
    color="neglog10adjP",
    hover_data=["P-value", "Adjusted P-value", "Overlap"],
    labels={"Combined Score": "Combined Score", "neglog10adjP": "-log10(Adj P)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Dotplot)"
)
fig_dot.update_layout(yaxis=dict(autorange="reversed"))
fig_dot.show()


In [None]:
pip install -U kaleido


Collecting kaleido
  Downloading kaleido-1.0.0-py3-none-any.whl.metadata (5.6 kB)
Collecting choreographer>=1.0.5 (from kaleido)
  Downloading choreographer-1.0.10-py3-none-any.whl.metadata (5.6 kB)
Collecting logistro>=1.0.8 (from kaleido)
  Downloading logistro-1.1.0-py3-none-any.whl.metadata (2.6 kB)
Downloading kaleido-1.0.0-py3-none-any.whl (51 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.5/51.5 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading choreographer-1.0.10-py3-none-any.whl (51 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading logistro-1.1.0-py3-none-any.whl (7.9 kB)
Installing collected packages: logistro, choreographer, kaleido
Successfully installed choreographer-1.0.10 kaleido-1.0.0 logistro-1.1.0


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import os

# -------- Parameters --------
INPUT_FILE = "data/Enrichr-KG overt (2).csv"
TOP_N = 40
OUTPUT_DIR = "enrichr_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------- Load Enrichr results --------
df = pd.read_csv(INPUT_FILE)

# Rename columns for convenience
df.rename(columns={
    "q-value": "Adjusted P-value",
    "combined score": "Combined Score",
    "overlaps": "Overlap",
    "p-value": "P-value"
}, inplace=True)

# Sort by Adjusted P-value and take top N
df_top = df.sort_values("Adjusted P-value").head(TOP_N).copy()

# Compute -log10 Adjusted P-value
df_top["neglog10adjP"] = -np.log10(df_top["Adjusted P-value"])

# Count number of genes in overlap for dot size
df_top["Overlap_count"] = df_top["Overlap"].apply(lambda x: len(str(x).split(";")))

# -------- Barplot --------
fig_bar = px.bar(
    df_top,
    x="neglog10adjP",
    y="Term",
    orientation="h",
    text="Combined Score",
    hover_data=["Overlap", "Adjusted P-value", "P-value", "Combined Score"],
    labels={"neglog10adjP": "-log10(Adjusted P-value)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Overt)"
)
fig_bar.update_layout(yaxis=dict(autorange="reversed"))

# Save barplot
bar_html = os.path.join(OUTPUT_DIR, "overt_barplot.html")
bar_png = os.path.join(OUTPUT_DIR, "overt_barplot.png")
fig_bar.write_html(bar_html)
pio.write_image(fig_bar, bar_png, width=1200, height=800)

fig_bar.show()

# -------- Dotplot --------
fig_dot = px.scatter(
    df_top,
    x="Combined Score",
    y="Term",
    size="Overlap_count",
    color="neglog10adjP",
    hover_data=["P-value", "Adjusted P-value", "Overlap"],
    labels={"Combined Score": "Combined Score", "neglog10adjP": "-log10(Adj P)", "Term": "Enriched Term"},
    title=f"Top {TOP_N} Enriched Terms (Dotplot, Overt)"
)
fig_dot.update_layout(yaxis=dict(autorange="reversed"))

# Save dotplot
dot_html = os.path.join(OUTPUT_DIR, "overt_dotplot.html")
dot_png = os.path.join(OUTPUT_DIR, "overt_dotplot.png")
fig_dot.write_html(dot_html)
pio.write_image(fig_dot, dot_png, width=1200, height=800)

fig_dot.show()






This means that static image generation (e.g. `fig.write_image()`) will not work.

Please upgrade Plotly to version 6.1.1 or greater, or downgrade Kaleido to version 0.2.1.




ValueError: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido


In [None]:
!pip install kaleido==0.2.1

Collecting kaleido==0.2.1
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
  Attempting uninstall: kaleido
    Found existing installation: kaleido 1.0.0
    Uninstalling kaleido-1.0.0:
      Successfully uninstalled kaleido-1.0.0
Successfully installed kaleido-0.2.1
