In [3]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from statannotations.Annotator import Annotator

In [None]:
#import your expression matrix and metadata
gene_id = "ENSG00000XXXXXX"

expression = pd.read_csv(
    "20250603_Anh_HCC/final_products/data/TPM_NASH_FLD.csv"
).set_index("Unnamed: 0")
expression = pd.DataFrame(expression.loc[gene_id].T)

metadata = pd.read_csv("salmon_results/NASH_NAFLD/human/metadata_GSE162694.csv").set_index("Run")

In [None]:
# Assign labels based on thresholds
def assign_level(x):
    if x == 0:
        return "normal"
    elif x <= 2:
        return "NAFLD"
    elif (x > 4) and (x <=7):
        return "NASH"
    else:
        return "ambiguous"
    

metadata["category"] = [assign_level(x) for x in metadata["nas_score"]]

In [None]:
#combinding expression data and metadata
joined = pd.concat([metadata, expression], axis=1)
joined["log_TPM"] = np.log2(joined[gene_id] + 1)
joined.dropna(inplace=True)

joined

## Two-factor plotting

In [None]:
def plot_TPM(joined, condition_1, condition_2):
    plt.figure(figsize=(5, 5))

    # Set Seaborn style to white for a clean background
    sns.set_style("white")

    # Create barplot with error bars
    ax = sns.barplot(
        data=joined,
        x=condition_1,
        y=gene_id,
        errorbar="se",  # pass standard error for error bars
        palette="pastel",
        edgecolor="black",
        linewidth=1.5,
        capsize=0.1,
        hue=condition_2,
        legend=True,
    )

    sns.stripplot(
        data=joined,
        x=condition_1,
        y=gene_id,
        color="#333333",
        size=6,
        jitter=True,
        hue=condition_2,
        legend=False,
        dodge=True
    )

    # Remove the top and right spines (clean look)
    sns.despine(top=True, right=True)

    # Customize spines to be a bit thicker (like GraphPad)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

    # Customize ticks
    ax.tick_params(axis="both", which="major", length=8, width=1.5)
    ax.tick_params(axis="both", which="minor", length=4, width=1)

    # Set font family and size to something clean and readable
    plt.xticks(fontsize=8)
    plt.yticks(fontsize=8)

    # Add labels with bigger font, bold if you want too
    ax.set_xlabel("Condition", fontsize=16)
    ax.set_ylabel("TPM Expression", fontsize=16)

    # Add gridlines on y-axis only, with light grey color and thin lines
    ax.yaxis.grid(True, linestyle="--", linewidth=0.7, color="grey", alpha=0.5)
    ax.xaxis.grid(False)

    pairs = [ # format is ((main_cond, sub_cond_1),(main_cond, sub_cond_2))
        (("ZT0", "LFD"), ("ZT0", "HFD")),
        (("ZT4", "LFD"), ("ZT4", "HFD")),
        (("ZT8", "LFD"), ("ZT8", "HFD")),
        (("ZT12", "LFD"), ("ZT12", "HFD")),
        (("ZT16", "LFD"), ("ZT16", "HFD")),
        (("ZT20", "LFD"), ("ZT20", "HFD")),
    ]

    annotator = Annotator(
        ax, pairs, data=joined, x=condition_1, y=gene_id, hue=condition_2
    )
    annotator.configure(test="t-test_ind", text_format="star", loc="outside")
    annotator.apply_and_annotate()

    # Optional: tighten layout so labels don’t get cut off
    plt.tight_layout()

In [None]:
plot_TPM(
    joined,
    condition_1="timepoint",
    condition_2="Diet"
)
plt.show()
plt.savefig("result.pdf")

## One-factor plotting

In [None]:
def plot_TPM(joined, y_axis, x_axis):

    plt.figure(figsize=(5, 5))

    # Set Seaborn style to white for a clean background
    sns.set_style("white")

    # Create barplot with error bars
    ax = sns.barplot(
        data=joined,
        x=x_axis,
        y=y_axis,
        errorbar="se",  # pass standard error for error bars
        palette="pastel",
        edgecolor="black",
        linewidth=1.5,
        capsize=0.1,
        hue=x_axis,
        legend=False,
    )

    sns.stripplot(
        data=joined,
        x=x_axis,
        y=y_axis,
        color="#333333",
        size=5,
        jitter=True,
        legend=False,
    )

    # Remove the top and right spines (clean look)
    sns.despine(top=True, right=True)

    # Customize spines to be a bit thicker (like GraphPad)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

    # Customize ticks
    ax.tick_params(axis="both", which="major", length=8, width=1.5)
    ax.tick_params(axis="both", which="minor", length=4, width=1)

    # Set font family and size to something clean and readable
    plt.xticks(fontsize=8)
    plt.yticks(fontsize=8)
    # plt.legend(title="AGE")

    # Add labels with bigger font, bold if you want
    ax.set_xlabel("Time", fontsize=16)
    ax.set_ylabel("TPM expression", fontsize=16)

    # Add gridlines on y-axis only, with light grey color and thin lines
    ax.yaxis.grid(True, linestyle="--", linewidth=0.7, color="grey", alpha=0.5)
    ax.xaxis.grid(False)

    pairs = [("normal", "NAFLD"), ("normal", "NASH")]

    annotator = Annotator(ax, pairs, data=joined, x=x_axis, y=y_axis)
    annotator.configure(test="t-test_ind", text_format="star", loc="outside")
    annotator.apply_and_annotate()

    # Optional: tighten layout so labels don’t get cut off
    plt.tight_layout()

In [None]:
plot_TPM(
    joined,
    gene_id,
    "category",
)
plt.savefig("liver_disease.pdf")
plt.show()

## Line plot

In [None]:
condition_1="timepoint"
condition_2="Diet"
plt.figure(figsize=(5, 5))

# Set Seaborn style to white for a clean background
sns.set_style("white")

ax = sns.pointplot(
    data=joined,
    x=condition_1,
    y=gene_id,
    hue=condition_2,
    palette="Greys",
    capsize=0.15,
    linewidth=1.5,
    legend=True
)

sns.stripplot(
    data=joined,
    x=condition_1,
    y=gene_id,
    palette="Greys",
    size=3,
    jitter=True,
    hue=condition_2,
    legend=False,
    # dodge=True,
)

# Remove the top and right spines (clean look)
sns.despine(top=True, right=True)

# Customize spines to be a bit thicker (like GraphPad)
ax.spines["bottom"].set_linewidth(1.5)
ax.spines["left"].set_linewidth(1.5)

# Customize ticks
ax.tick_params(axis="both", which="major", length=8, width=1.5)
ax.tick_params(axis="both", which="minor", length=4, width=1)

# Set font family and size to something clean and readable
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
# plt.legend(title="AGE")

# Add labels with bigger font, bold if you want
ax.set_xlabel("Zeitgeber Time (ZT)", fontsize=16)
ax.set_ylabel("TPM expression", fontsize=16)

ax.yaxis.grid(True, linestyle="--", linewidth=0.7, color="grey", alpha=0.5)
ax.xaxis.grid(False)

pairs = [ #same as two-factor plotting
    (("ZT2", "control"), ("ZT2", "NAFLD")),
    (("ZT8", "control"), ("ZT8", "NAFLD")),
    (("ZT14", "control"), ("ZT14", "NAFLD")),
    (("ZT20", "control"), ("ZT20", "NAFLD"))
]

annotator = Annotator(
    ax, pairs, data=joined, x=condition_1, y=gene_id, hue=condition_2
)
annotator.configure(test="t-test_ind", text_format="star", loc="outside")
annotator.apply_and_annotate()

# Optional: tighten layout so labels don’t get cut off
plt.tight_layout()

plt.savefig("timepoint.pdf")
plt.show()