In [2]:
import pandas as pd
import numpy as np

import altair as alt
from altair_saver import save as alt_save

import seaborn as sns
from matplotlib import pyplot as plt
from adjustText import adjust_text
from pyensembl import EnsemblRelease, find_species_by_name, genome_for_reference_name

import json
import requests

In [3]:
metmap_tissue = snakemake.params['metmap_tissue']
fold = snakemake.wildcards["fold"]

In [4]:
df = pd.read_csv(snakemake.input['deseq'], index_col=0)

In [5]:
df["minuslog10p"] = df["padj"].apply(lambda p: -np.log10(p))
df["significant"] = df.apply(lambda row: row["padj"] < 0.05 and abs(row["log2FoldChange"]) > 2, axis='columns')

In [6]:
df["should_label"] = df.apply(lambda row: row["minuslog10p"] > -np.log10(0.00000005) and abs(row["log2FoldChange"]) > 3.5, axis='columns')

In [7]:
er = EnsemblRelease(100, species=find_species_by_name("homo_sapiens"))

def get_gene_name(ens_vid):
    try:
        ens_id = ens_vid.split(".")[0]
        return er.gene_name_of_gene_id(ens_id)
    except ValueError:
        return ens_vid
df["gene_name"] = df.index.to_series().apply(get_gene_name)

In [8]:
label_df = df.loc[df["should_label"]]

In [9]:
plt.figure(figsize=(8,8))
ax = sns.scatterplot(data=df, x="log2FoldChange", y="minuslog10p", hue="significant")
texts = [plt.text(label_df.iloc[i]["log2FoldChange"], label_df.iloc[i]["minuslog10p"], label_df.iloc[i]["gene_name"], ha='center', va='center') for i in range(label_df.shape[0])];
adjust_text(texts)
ax.hlines(y=-np.log10(0.05), xmin=-6, xmax=6, colors='black', linestyles='--', lw=2)
plt.title(f"Differentially expressed genes for {metmap_tissue} metastasis, fold {fold}")
plt.xlabel("log_2(fold change)")
plt.ylabel("-log_10(p)")
plt.savefig(snakemake.output["deseq_plot"]) 

## Use Enrichr to analyze the set of differentially expressed genes

Reference: https://maayanlab.cloud/Enrichr/help#api

In [10]:
df = df.set_index("gene_name")

In [11]:
met_gene_set = df.loc[df["significant"]].index.values.tolist()
nonmet_gene_set = df.loc[~df["significant"]].index.values.tolist()

In [12]:
def get_enrichr_df(gene_set, gene_set_desc):
    ENRICHR_URL = 'http://maayanlab.cloud/Enrichr/addList'
    genes_str = '\n'.join(gene_set)
    description = gene_set_desc
    payload = {
        'list': (None, genes_str),
        'description': (None, description)
    }

    response = requests.post(ENRICHR_URL, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')

    data = json.loads(response.text)
    
    ENRICHR_URL = 'http://maayanlab.cloud/Enrichr/enrich'
    query_string = '?userListId=%s&backgroundType=%s'
    user_list_id = data['userListId']
    gene_set_library = 'KEGG_2019_Human'
    response = requests.get(
        ENRICHR_URL + query_string % (user_list_id, gene_set_library)
     )
    if not response.ok:
        raise Exception('Error fetching enrichment results')

    data = json.loads(response.text)
    
    enrichr_df = pd.DataFrame(data=data[gene_set_library], columns=["rank", "term_name", "pval", "zscore", "combined_score", "overlapping_genes", "padj", "pold", "poldadj"])
    return enrichr_df

In [13]:
len(met_gene_set)

In [14]:
met_enrichr_df = get_enrichr_df(met_gene_set, 'Differentially significant genes')

In [15]:
met_enrichr_df.head()

In [16]:
met_enrichr_df.to_csv(snakemake.output["enrichr"], sep='\t')

In [17]:
met_enrichr_df = met_enrichr_df.loc[met_enrichr_df["padj"] < 0.999913]

In [18]:
met_enrichr_df["minuslog10p"] = met_enrichr_df["padj"].apply(lambda p: -np.log10(p))
met_enrichr_df["significant"] = met_enrichr_df["padj"] < 0.05

In [19]:
TERM_SORT = met_enrichr_df["term_name"].values.tolist()

plot = alt.Chart(met_enrichr_df.head(30)).mark_bar().encode(
    x=alt.X("minuslog10p:Q", axis=alt.Axis(title="-log_10(p)")),
    y=alt.Y("term_name:N", sort=TERM_SORT, axis=alt.Axis(title="KEGG 2019 term")),
    color=alt.Color("significant:N", legend=alt.Legend(title="Significant at 0.05"))
).properties(
    title=f"Top 30 pathways enriched in differentially expressed gene set for {metmap_tissue} metastasis, fold {fold}"
)

plot

In [20]:
alt_save(plot, snakemake.output["enrichr_plot"])