# DeSeq2 analysis

13-12-2024
Updated

In [None]:
import os
import pandas as pd
import scanpy as sc
import anndata

from goatools.base import download_go_basic_obo
from goatools.base import gunzip
from goatools.obo_parser import GODag
from goatools.anno.genetogo_reader import Gene2GoReader
from goatools.goea.go_enrichment_ns import GOEnrichmentStudyNS

from genes_ncbi_homo_sapiens_proteincoding import GENEID2NT as GeneID2nt_hs

from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats
from pydeseq2.default_inference import DefaultInference

from pathlib import Path
from datetime import datetime

from adjustText import adjust_text
from typing import Union
from anndata import AnnData

In [None]:
import seaborn as sns
import textwrap
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.figure as figure
from matplotlib.patches import Patch

rc = {"figure.dpi": 150}
sns.set_theme(
    context="notebook",
    style="white",
    palette="deep",
    font="sans-serif",
    font_scale=4,
    color_codes=True,
    rc=rc,
)

pydeseq2 is not supporting numpy.v2, thereby important to keep downgraded numpy

In [None]:
import numpy as np

print(np.__version__)

In [None]:
%load_ext autoreload

Saving resuting figures and csv files in /home/t.afanasyeva/MAT_rnaseq/<yearmonthday>_output

In [None]:
p = Path().cwd().parent.parent
date = datetime.today().strftime("%Y%m%d")[2:]  # Get shortened year
RESULTS_PATH = p / f"{date}_output"
RESULTS_PATH.mkdir(parents=True, exist_ok=True)


def save_csv(table, table_name):
    """Save table in csv."""
    path = RESULTS_PATH / f"{table_name}.csv"
    table.to_csv(path)

### 1. Prepare data

In [None]:
class DataProcessor:
    def __init__(
        self,
        raw_counts: pd.DataFrame,
        classes: list[str],
        n_cpus: int,
        batches: list[str] = None,
    ):
        self.raw_counts = raw_counts
        self.classes = classes
        self.batches = batches
        self.n_cpus = n_cpus

    def prepare_metadata(self) -> pd.DataFrame:
        """
        A method that checks if the list of batches was provided.
        If there are no batches, it checks the number of classes and subsets samples that belong to the classes,
        then it will prepare filtered counts and metadata. Otherwise, it will subset by class and batch and check
        if only one class was provided. Multiple batch comparisons are implemented here as only per one class!

        Returns tuple of counts and metadata

        """
        if not self.batches and len(self.classes) != 2:
            raise ValueError(
                "prepare_metadata() takes a list with two classes (str) as a condition."
            )
        if self.batches and len(self.classes) != 2:
            raise ValueError(
                "prepare_metadata() takes a list with one class (str) when batches provided."
            )

        samples_to_compare = list(
            self.raw_counts[
                self.raw_counts.index.get_level_values("label").isin(self.classes)
            ].index
        )

        number_of_unique_classes = len(
            (
                self.raw_counts.loc[samples_to_compare]
                .index.get_level_values("label")
                .unique()
            )
        )
        if number_of_unique_classes <= 1:
            raise ValueError(
                f"Provided counts for {number_of_unique_classes} class(es). Please provide counts for at least two classes."
            )

        metadata = pd.DataFrame(samples_to_compare, columns=["index", "condition"])
        metadata.set_index("index", inplace=True)

        if self.batches:
            metadata["batches"] = metadata.index.map(
                lambda sample: (
                    sample.split("_")[1]
                    if sample.split("_")[1] in self.batches
                    else None
                )
            )  # Retrieve batch number if the sample name as TA001_batch_class_replica
            metadata = metadata.dropna(subset=["batches"])
            if len(metadata["batches"].unique()) <= 1:
                raise ValueError(
                    f"Provided counts for {len(metadata['batches'].unique())} batch(es). Please provide counts for at least two batches."
                )

        return metadata

    def make_dds(self) -> AnnData:
        metadata = self.prepare_metadata()
        counts = self.raw_counts.loc[metadata.index]

        # Filtering genes that have a total sum of counts lower than 10
        counts = counts[counts.columns[counts.sum(axis=0) >= 10]]

        # Convert the multi-indexed DataFrame to a single-indexed one by resetting the index
        counts.reset_index(level="label", drop=True, inplace=True)

        dds = DeseqDataSet(
            counts=counts,
            metadata=metadata,
            design_factors=metadata.columns,
            refit_cooks=True,
            inference=DefaultInference(self.n_cpus),
        )
        dds.deseq2()
        return dds

    def make_statistics(
        self,
        padj_value: float = 0.05,
        log2foldchange_value: int = 2,
    ) -> tuple[AnnData, pd.DataFrame, pd.DataFrame]:

        dds = self.make_dds()

        if not self.batches:
            conditions_to_contrast = self.classes
            design_factor = "condition"
        else:
            conditions_to_contrast = self.batches
            design_factor = "group"

        stat_res = DeseqStats(
            dds,
            contrast=(design_factor, *conditions_to_contrast),
            inference=DefaultInference(self.n_cpus),
            quiet=True,
        )

        stat_res.summary()

        res = stat_res.results_df
        res = res[res.baseMean >= 10]

        # LImit the p value to minimal pvalue detected

        sigs = res[
            (res.padj < padj_value) & (abs(res.log2FoldChange) > log2foldchange_value)
        ]

        try:
            if sigs.empty:
                raise ValueError("No significant gene found.")
        except ValueError as e:
            print(e)

        return dds, res, sigs

### 2. Make plots

In [None]:
class Plotter:
    geneid_symbol_mapper_human = None
    goeaobj = None
    total_go_terms = None
    is_initialized = False

    def __init__(
        self,
        dds: AnnData,
        res: pd.DataFrame,
        sigs: pd.DataFrame,
        analysis_name: str,
    ):
        self.dds = dds
        self.res = res
        self.sigs = sigs
        self.analysis_name = analysis_name

        if not Plotter.is_initialized:
            Plotter._initialise_go()
            Plotter.is_initialized = True

    def check_de_gene_list_not_empty(self):
        """Check if differentially expressed gene list is empty."""
        if self.sigs.empty:
            print("Warning: No significant genes found.")
            return False
        return True

    def make_figure(self, plot_type: str):
        if plot_type not in ["volcano", "histogram", "pca", "go"]:
            raise ValueError(
                "plot() takes 'volcano', 'histogram', 'pca' or 'go' as a condition."
            )
        fig_extension = "png"
        fname = RESULTS_PATH / f"{self.analysis_name}_{plot_type}.{fig_extension}"
        fontsize = 12

        if plot_type == "volcano":
            try:
                if self.sigs.empty:
                    raise ValueError("No significant gene found.")
            except ValueError as e:
                print(e)
                return

            print("Plotting volcano plot...")
            fig = self.plot_volcano()
            plt.title(
                f"{self.analysis_name} Differentially Expressed Genes",
                fontsize=fontsize,
                fontweight="bold",
            )

        if plot_type == "histogram":
            try:
                if self.sigs.empty:
                    raise ValueError("No significant gene found.")
            except ValueError as e:
                print(e)
                return

            print("Plotting histogram...")
            fig = self.plot_historgram()
            fig.ax_col_dendrogram.set_title(
                f"{self.analysis_name} Differentially Expressed Genes",
                fontsize=fontsize,
                fontweight="bold",
                pad=2,
            )

        if plot_type == "pca":
            fig = self.plot_pca()
            fig.axes[0].set_title(
                f"{self.analysis_name}", fontsize=fontsize, fontweight="bold"
            )

        if plot_type == "go":
            try:
                if self.sigs.empty:
                    raise ValueError("No significant gene found.")
            except ValueError as e:
                print(e)
                return

            print("Plotting GO terms...")
            fig = self.plot_go()
            plt.title(
                f"{self.analysis_name} Top 20 Significant GO Terms",
                fontsize=fontsize,
                fontweight="bold",
            )
            go_df = self.generate_go_table()
            save_csv(go_df, f"{self.analysis_name}_go_terms")

        fig.figure.savefig(
            fname,
            format=fig_extension,
            dpi=300,
            bbox_inches="tight",
        )

    def plot_volcano(self, log2foldchange=2):
        """Plot volcano."""
        # Add a small number to zeros to avoid inf in padj_log, taking log(0) error
        grapher = self.res.assign(
            padj_log=self.res["padj"].apply(
                lambda x: -np.log10(x) if x != 0 else -np.log10(x + 1e-300)
            ),
            color="no_expression_change",
        )

        grapher.loc[grapher["log2FoldChange"] > log2foldchange, "color"] = (
            "overexpressed"
        )
        grapher.loc[grapher["log2FoldChange"] < -log2foldchange, "color"] = (
            "underexpressed"
        )
        # Subset grapher to only overexpressed and underexpressed
        grapher_subset = grapher[
            grapher["color"].isin(["overexpressed", "underexpressed"])
        ]

        print(f"Number of DE genes: {len(grapher_subset)}")

        # Sort by padj_log (descending) and log2FoldChange (ascending)
        sorted_grapher_padj_log = grapher_subset.sort_values(
            by="padj_log", ascending=False
        )
        sorted_grapher_log2foldchange = grapher_subset.sort_values(
            by="log2FoldChange", ascending=True
        )
        print(f"Finished sorting DE genes.")

        # Select 20 genes for annotation (10 highest and 10 lowest)
        annotation_subset = pd.concat(
            [
                sorted_grapher_padj_log.head(20),
                sorted_grapher_log2foldchange.head(10),
                sorted_grapher_log2foldchange.tail(10),
            ]
        ).drop_duplicates()
        print(f"Selected top and bottom 20 DE genes for annotation.")

        g = plt.figure(figsize=(8, 10))
        rc = {
            "axes.spines.right": False,
            "axes.spines.top": False,
            "axes.titlepad": 20,
            "font.size": 10,
            "font.family": "sans-serif",
            "legend.frameon": "False",
            "legend.loc": "upper right",
            "lines.linestyle": "--",
            "lines.linewidth": 1,
            "lines.color": "k",
            "axes.facecolor": "white",
        }

        sns.set_theme(rc=rc)

        ax = sns.scatterplot(
            data=grapher,
            x="log2FoldChange",
            y="padj_log",
            hue="color",
            hue_order=["no_expression_change", "overexpressed", "underexpressed"],
            palette=["grey", "orange", "purple"],
            size="baseMean",
            sizes=(20, 50),
            alpha=0.7,
        )
        # Draw lines showing the threshold values
        ax.axhline(1.3, zorder=1)
        ax.axvline(2, zorder=1)
        ax.axvline(-2, zorder=1)
        # Add DE gene names (only for top 10% of sorted values)
        texts = []
        for i, row in annotation_subset.iterrows():
            texts.append(
                plt.text(
                    x=row.log2FoldChange,
                    y=row.padj_log,
                    s=row.name,
                    weight="bold",
                    size=8,
                )
            )

        adjust_text(texts, arrowprops=dict(arrowstyle="-", color="k"))
        plt.legend(bbox_to_anchor=(1.4, 1), prop={"size": 10, "weight": "bold"})
        plt.xticks(size=10, weight="bold")
        plt.yticks(size=10, weight="bold")
        plt.xlabel("$log_{2}$ fold change")
        plt.ylabel("-$log_{10}$ FDR")
        plt.ylim(-2, grapher["padj_log"].max() + 5)
        return g

    def plot_historgram(
        self, num_top_sig: Union[int, str] = 50
    ) -> tuple[figure.Figure, plt.Axes]:
        """Plot histogram of gene expression of genes with significant Benjamini-Hochberg adjusted p-values."""
        if num_top_sig != "all":
            sigs = self.sigs.sort_values("padj")[:num_top_sig]
        dds_sigs = self.dds[:, sigs.index]
        dds_sigs.layers["log1p"] = np.log1p(dds_sigs.layers["normed_counts"])
        sns.set_theme(rc={"ytick.labelsize": 8})
        grapher = pd.DataFrame(
            dds_sigs.layers["log1p"].T,
            index=dds_sigs.var_names,
            columns=dds_sigs.obs.condition,
        )

        lut = dict(zip(set(dds_sigs.obs.condition), "rgb"))
        col_colors = list(dds_sigs.obs.condition.map(lut))

        plt.figure(figsize=(8, 10), layout="tight")
        ax = sns.clustermap(
            figsize=(8, 10),
            data=grapher,
            cmap="RdYlBu_r",
            z_score=None,
            dendrogram_ratio=(0.1, 0.1),
            cbar_pos=(0.93, 0.2, 0.03, 0.45),
            cbar_kws=dict(
                location="left",
                orientation="vertical",
                pad=2,
            ),
            col_colors=col_colors,
        )

        handles = [Patch(facecolor=lut[name]) for name in lut]
        plt.legend(
            handles,
            lut,
            bbox_transform=plt.gcf().transFigure,
            bbox_to_anchor=(1, 1),
            loc="upper right",
            fontsize=10,
            fancybox=True,
            frameon=True,
            facecolor="white",
            edgecolor="black",
        )
        ax.ax_heatmap.set_xticklabels([])
        ax.ax_heatmap.set(xlabel=None)
        plt.subplots_adjust(hspace=0.01)
        return ax

    def plot_pca(self, w_text=False):
        dds = self.dds.copy()
        sc.tl.pca(dds, n_comps=2)
        pca = dds.obsm["X_pca"]
        sample_names = list(sc.get.obs_df(dds).index)
        rc = {
            "axes.facecolor": "white",
            "axes.edgecolor": "black",
        }
        sns.set_theme(rc=rc)
        plt.figure(figsize=(8, 10))
        ax = sc.pl.pca(
            dds, color="condition", size=300, show=False, title=" ", return_fig=True
        )  # color can also be group

        if w_text:
            texts = [
                plt.text(pca[i][0], pca[i][1], sample_names[i], ha="left", va="bottom")
                for i in range(len(pca))
            ]

        return ax

    @classmethod
    def _initialise_go(cls):
        """Get GO terms and initiate gotools classes."""
        print("Initializing GO terms...")

        path_to_supporting_files = Path().cwd() / "deseq2"

        """
        download_ncbi_associations() needed for gene2go returned critical connection error, 
        likely was not handled well by Sanquin's firewall. 
        I have downloaded the gene2go data from 
        ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2go.gz on 27-02-2025 and unzipped.
        """
        # gunzip(path_to_supporting_files / "gene2go.gz")
        genes = Gene2GoReader(
            path_to_supporting_files / "gene2go", taxids=[9606], namespaces={"BP"}
        )
        ns2assoc = genes.get_ns2assc()

        """Downloaded ontologies on 27-02-2025"""
        # obo_fname = download_go_basic_obo() # saves them to the same folder as this script
        obodag = GODag(path_to_supporting_files / "go-basic.obo")

        cls.goeaobj = GOEnrichmentStudyNS(
            GeneID2nt_hs.keys(),  # List of human protein-coding genes
            ns2assoc,  # Geneid/GO associations
            obodag,  # Ontologies
            propagate_counts=False,
            alpha=0.05,  # Default significance cut-off
            methods=["fdr_bh"],  # Default correction method for multiple testing
        )

        cls.geneid_symbol_mapper_human = {
            GeneID2nt_hs[key].Symbol: GeneID2nt_hs[key].GeneID for key in GeneID2nt_hs
        }

    def generate_go_table(self):
        """Retrieve GO terms per condition"""
        sigs_ids = [
            Plotter.geneid_symbol_mapper_human[gene]
            for gene in self.sigs.index
            if gene in Plotter.geneid_symbol_mapper_human
        ]
        print(
            f"Mapped {len(sigs_ids)/len(self.sigs.index)*100:.2f}% of",
            "GeneIDs of significantly differentially expressed genes symbols to gene IDs.",
        )

        goea_results = Plotter.goeaobj.run_study(sigs_ids, prt=None)
        goea_results_sig = [r for r in goea_results if r.p_fdr_bh < 0.05]

        inverted_mapping_dictionary = {
            v: k for k, v in Plotter.geneid_symbol_mapper_human.items()
        }  # Map study item IDs to gene symbols

        go_df = pd.DataFrame(
            list(
                map(
                    lambda x: [
                        x.GO,
                        x.goterm.name,
                        x.goterm.namespace,
                        x.p_uncorrected,
                        x.p_fdr_bh,
                        x.ratio_in_study[0],
                        x.ratio_in_study[1],
                        x.ratio_in_study[0] / x.ratio_in_study[1],
                        list(
                            map(lambda y: inverted_mapping_dictionary[y], x.study_items)
                        ),
                    ],
                    goea_results_sig,
                )
            ),
            columns=[
                "GO",
                "term",
                "class",
                "raw_pvalue",
                "fdr",
                "n_genes",
                "n_study",
                "ratio_in_study",
                "gene_symbols",
            ],
        )
        go_df["gene_symbols"] = go_df["gene_symbols"].apply(lambda x: ", ".join(x))
        go_df = go_df.sort_values("fdr", ascending=True)
        return go_df

    def plot_go(self):
        """Plot Go terms."""
        go_terms = self.generate_go_table()
        go_terms = go_terms[:20]
        go_terms = go_terms.sort_values("ratio_in_study", ascending=False)

        norm = mpl.colors.Normalize(vmin=go_terms.fdr.min(), vmax=go_terms.fdr.max())
        color_mapper = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.bwr_r)
        rc = {"axes.edgecolor": "black"}
        sns.set_theme(rc=rc)

        g = plt.figure(figsize=(10, 12))

        ax = sns.barplot(
            data=go_terms,
            x=go_terms["n_genes"] / go_terms["n_study"],
            y="term",
            palette=list(color_mapper.to_rgba(go_terms.fdr.values)),
        )

        ax.set_yticklabels(
            [textwrap.fill(term, 40) for term in go_terms["term"]],
            fontsize=2,  # Set font size 3 times larger (default is 12)
        )
        cbar = g.colorbar(
            color_mapper, ax=ax, orientation="vertical", pad=0.01, format="{x:.2f}"
        )
        cbar.ax.set_position([0.8, 0.5, 0.2, 0.3])
        cbar.ax.set_title("padj", loc="left", pad=4.0)

        return g

### 6. Run analysis

In [None]:
file_path_multiindex = p / "resources/250624_training_multindex_108_7128.csv"
mi_counts = pd.read_csv(file_path_multiindex, index_col=[0, 1])
mi_counts.head()

In [None]:
mi_counts.rename(index={"IMDM": "nc"}, level="label", inplace=True)

In [None]:
# Create pairs with "negative control"
def make_pairs_with_negative_control(class_list):
    class_comparison_pair_list = []
    for my_class in class_list:
        class_comparison_pair_list.append([my_class, "nc"])
    return class_comparison_pair_list


class_list = ["Fla-PA", "LPS", "PGN", "R848", "Pam3", "PGN"]

# list_of_classes_to_include = [[my_class, "nc"] for my_class in class_list]
# for i, class_x in enumerate(class_list):
#     for class_y in class_list[i + 1 :]:
#         if class_x != class_y:
#             list_of_classes_to_include.append([class_x, class_y])

list_of_classes_to_include = make_pairs_with_negative_control(class_list)

In [None]:
# list_of_de_genes = []

combined_res = pd.DataFrame()

for class_pair in list_of_classes_to_include:
    analysis_name = f"{class_pair[0]}"
    print(f"Running analysis for {analysis_name}")

    dataprocessor = DataProcessor(raw_counts=mi_counts, classes=class_pair, n_cpus=42)
    dds, res, sigs = dataprocessor.make_statistics()
    save_csv(res, f"{analysis_name}_results")

# combined_res now contains all results with prefixed column names

set_of_de_genes = set(list_of_de_genes)

with open(RESULTS_PATH / "de_genes.txt", "w") as f:
    for gene in set_of_de_genes:
        f.write(f"{gene}\n")

Generatin DE gene lists for additional TLRS and HK bacteria samples

In [None]:
file_path_complete_data = p / "resources/250624_300.csv"
complete_data = pd.read_csv(file_path_complete_data, index_col=0)
complete_data["label"] = [label.split("_")[2] for label in complete_data.index]
complete_data.reset_index(inplace=True)
complete_data.set_index(["samples", "label"], inplace=True)
complete_data.rename(index={"IMDM": "nc"}, level="label", inplace=True)

In [None]:
class_list_additional_tlrs = ["LTA", "MPLA", "Pam2", "HKEB", "HKSA"]
# Create pairs with "negative control"
list_of_classes_to_include = make_pairs_with_negative_control(
    class_list_additional_tlrs
)
list_of_classes_to_include

In [None]:
list_of_de_genes = []
for class_pair in list_of_classes_to_include:
    analysis_name = f"{class_pair[0]}"
    print(f"Running analysis for {analysis_name}")

    dataprocessor = DataProcessor(
        raw_counts=complete_data, classes=class_pair, n_cpus=42
    )
    dds, res, sigs = dataprocessor.make_statistics()

    save_csv(res, f"{analysis_name}_results")
    # save_csv(sigs, f"{analysis_name}_statistically_significant_results")
    # list_of_de_genes.extend([gene for gene in sigs.index])

    # plotter = Plotter(dds, res, sigs, analysis_name)

    # plotter.make_figure("pca")
    # plotter.make_figure("histogram")
    # plotter.make_figure("volcano")
    # plotter.make_figure("go")

In [7]:
del merged_df

In [None]:
filelist = list(RESULTS_PATH.glob("*.csv"))
merged_df = pd.read_csv(filelist[0], index_col=0)
merged_df.rename(
    columns=lambda x: f'{x}_{filelist[0].stem.split("_")[0]}', inplace=True
)

for file in filelist[1:]:
    print(f"Processing file: {file.stem}")
    class_name = file.stem.split("_")[0]
    df = pd.read_csv(file, index_col=0)
    merged_df = pd.merge(
        merged_df,
        df,
        left_index=True,
        right_index=True,
        how="outer",
        suffixes=("", f"_{class_name}"),
        validate="one_to_one",
    )

print(merged_df.shape)

(RESULTS_PATH / "merged").mkdir(exist_ok=True)
merged_df.to_csv(RESULTS_PATH / "merged" / "merged_results.csv")

Processing file: LPS_results
Processing file: PGN_results
Processing file: R848_results
Processing file: Pam3_results
Processing file: LTA_results
Processing file: MPLA_results
Processing file: Pam2_results
Processing file: HKEB_results
Processing file: HKSA_results
(12759, 60)


In [None]:
from pathlib import Path
import pandas as pd

path = Path("/home/t.afanasyeva/MAT_rnaseq/250625_output")
filelist = list(path.glob("*_go_terms.csv"))
merged_df = pd.read_csv(filelist[0], index_col=0)
merged_df.insert(loc=0, column="Ligand", value=filelist[0].stem.split("_")[0])

In [None]:
for file in filelist[1:]:
    print(f"Processing file: {file.stem}")
    class_name = file.stem.split("_")[0]
    df = pd.read_csv(file, index_col=0)
    df["Ligand"] = file.stem.split("_")[0]
    merged_df = pd.concat([merged_df, df], axis=0)

print(merged_df.shape)

Processing file: LPS_go_terms
Processing file: PGN_go_terms
Processing file: R848_go_terms
Processing file: Pam3_go_terms
(240, 10)


In [35]:
RESULTS_PATH = Path("/home/t.afanasyeva/MAT_rnaseq/250710_output")

In [36]:
merged_df.to_csv(RESULTS_PATH / "GO_DE_genes.csv")

In [31]:
merged_df

Unnamed: 0,Ligand,GO,term,class,raw_pvalue,fdr,n_genes,n_study,ratio_in_study,gene_symbols
0,Fla-PA,GO:0070098,chemokine-mediated signaling pathway,biological_process,1.171771e-06,0.014155,3,8,0.375,"CXCL10, CXCL11, CCL8"
1,Fla-PA,GO:0031640,killing of cells of another organism,biological_process,4.585781e-06,0.02769812,3,8,0.375,"CXCL10, CXCL11, CCL8"
2,Fla-PA,GO:0006935,chemotaxis,biological_process,9.523999e-06,0.03334517,3,8,0.375,"CXCL10, CXCL11, CCL8"
3,Fla-PA,GO:0061844,antimicrobial humoral immune response mediated...,biological_process,1.16362e-05,0.03334517,3,8,0.375,"CXCL10, CXCL11, CCL8"
4,Fla-PA,GO:0010818,T cell chemotaxis,biological_process,1.380181e-05,0.03334517,2,8,0.25,"CXCL10, CXCL11"
0,LPS,GO:0006954,inflammatory response,biological_process,4.85855e-13,5.869129e-09,12,32,0.375,"IL1A, ADORA2A, CXCL2, CCL3, IL36G, CCL3L3, CCL..."
1,LPS,GO:0006955,immune response,biological_process,2.628192e-12,1.587428e-08,11,32,0.34375,"CSF3, IL1A, PRG4, CXCL2, IL36G, TNFSF15, CCL4,..."
2,LPS,GO:0071222,cellular response to lipopolysaccharide,biological_process,1.85225e-10,7.458393e-07,8,32,0.25,"CSF3, IL1A, IL36G, PTGS2, IL6, TNIP3, IL36RN, ..."
3,LPS,GO:0019221,cytokine-mediated signaling pathway,biological_process,1.365441e-09,4.12363e-06,7,32,0.21875,"CSF3, IL1A, GREM2, INHBA, F3, IL36G, IL6"
4,LPS,GO:0007267,cell-cell signaling,biological_process,1.836917e-09,4.437992e-06,8,32,0.25,"ADORA2A, INHBA, CCL3, IL36G, CCL4, GJB2, TNFAI..."
