In [None]:
import os
import math

import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt


BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, "data")

CHECKPOINT_DIR = os.path.join(DATA_DIR, "checkpoints")

PROCESSED_DIR = os.path.join(DATA_DIR, "processed")
PDF_DIR = os.path.join(PROCESSED_DIR, "pdf")
CSV_DIR = os.path.join(PROCESSED_DIR, "csv")
NOTEBOOK_DIR = os.path.join(BASE_DIR, "notebooks")

RAW_DATA_DIR = os.path.join(DATA_DIR, "raw")


PROJECT_NAME = "CropSeq-19"

def sfile(filename):
    _fname = os.path.join(PDF_DIR, f"{PROJECT_NAME}_merged_{filename}")
    print(f"File save at '{_fname}'")
    return _fname



### Load and plot data

In [None]:
all_targets =(
    "IL-23R",
    "IRF4",
    "Rorc",
)

### Downregulated pathways

In [None]:
for target in all_targets:

    print(target)

    df =pd.read_csv(
        open(
            os.path.join(
                CSV_DIR,
                f"CropSeq_all_NTN_diffExp_withEnsGene_{target}_canonical_pathways_converted.csv"
            ),
            encoding="utf-8",
            errors="ignore",
        ),
        decimal=",",
        delimiter=",",
        skiprows=1,
        names=["pathway", "log_pval", "ratio", "zscore", "genes"],
    )

    df["counts"] = df["genes"].str.split(",").apply(lambda x: len(x))

    df = df[df["zscore"] != "#ZAHL!"]

    if df["zscore"].dtype == object:
        df["zscore"] = df["zscore"].str.replace(",", ".").astype(float)

    df = df[df["log_pval"] > - math.log(0.05)]

    df = df[df["pathway"].str.match(r"(Th17|IL-17|IL-23|HIF|GM-CSF|MAPK|JAK|IL-6|T Cell|TNFR2|Toll-like|IL-1)") & (df["zscore"] < 0)]

    if len(df) > 0:


        fig, ax = plt.subplots(figsize=(8,6))

        g = sns.scatterplot(
            df.sort_values(by=["zscore"], ascending=True).head(10),
            x="zscore",
            y="pathway",
            hue="log_pval",
            size="counts",
            sizes=(20, 200),
            palette=sns.color_palette("ch:start=.1,rot=-.3", as_cmap=True),
            ax=ax,
        )

        _ = g.set_title(f"Downregulated canonical pathways of {target}")

        g.legend(loc='upper right', bbox_to_anchor=(1.2, 1.0), ncol=1)

        handles, labels = ax.get_legend_handles_labels()
        g.legend(
            handles=handles[labels.index("counts"):],
            labels=labels[labels.index("counts"):],
            bbox_to_anchor=[1.2, 1.0],
            loc='upper right'
        )

        norm = plt.Normalize(df['log_pval'].min(), df['log_pval'].max())
        cmap = sns.color_palette("ch:start=.1,rot=-.3", as_cmap=True)
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm.set_array([])

        cax = fig.add_axes([ax.get_position().x1, ax.get_position().y0, 0.03, ax.get_position().height / 2])
        cax.set_title("-log(p-value)")
        ax.figure.colorbar(sm, cax=cax)

        plt.tight_layout()

        plt.savefig(sfile(f"Canonical-pathway-downregulated_{target}.pdf"))

    else:
        print(f"No regulated pathways for {target}")