# Expression of FOXA2 and other candidate TFs

Use violin plots instead of the heatmap.


```{Reviewer Comment}
Fig. 2E: Replace the current visualization with violin plots, showing each sample as a distinct data point,
to improve interpretability.
```


## Rendition 1.

- As a grid of violin plots

## Rendition 2.

- Multi-panel: each panel is a TF, X is the compartment, points are the average expression per compartment per patient

In [None]:
from common_utils import setup_dirs, find_arial_font

find_arial_font()

In [None]:
outDir = OUTDIR_COMPARTMENTS
figuresDir, dataDir, tablesDir = setup_dirs(outDir)

sc.settings.figdir = figuresDir
sc.set_figure_params(dpi_save=300, vector_friendly=True)

In [None]:
rsync -azvp --relative \
    iris:/data1/shahs3/users/salehis/sclc/./results//rebuttal/nat_methods/figures/*.p* \
    /Users/salehis/Projects/sclc/rebuttal_code/SCLC_MET/

In [None]:
# Load the filtered cells
cdata_path = '/data1/shahs3/users/salehis/sclc/results/patient_met/primary_umaps/data/primary_scrublet_qc_harmony_final.h5ad'
cdata = sc.read_h5ad(cdata_path)
keep_cells = cdata.obs.index.tolist()

# Load the raw counts
adata = sc.read_h5ad(os.path.join('/data1/shahs3/users/salehis/sclc/results/patient_met/primary_umaps/data', 'primary_scrublet_qc.h5ad'))
adata.X = adata.layers['counts'].copy()
adata = adata[keep_cells, :].copy()

compartment_mapping_path = '/data1/shahs3/users/salehis/sclc/results/patient_met/primary_umaps/primary_ann_level_1.csv.gz'
compartment_mapping = pd.read_csv(compartment_mapping_path, index_col=0)
adata.obs = adata.obs.join(compartment_mapping[['predicted.ann_level_1']])


# Rendition 1.

- As a grid of violin plots


- Compute the average expression of each TF per patient per compartment

In [None]:
# Normalize the log1p data
bdata = adata.copy()
assert bdata.X.max() > 100, 'Data is not counts'
# Filter genes and cells
sc.pp.filter_genes(bdata, min_cells=3)
sc.pp.filter_cells(bdata, min_genes=200)
bdata = filter_genes(bdata)

'RAX2' in bdata.var_names

sc.pp.normalize_total(bdata, target_sum=1e4)
sc.pp.log1p(bdata)
assert bdata.X.max() < 50, 'Data is not normalized'

TFs = ['MEOX1', 'ARX', 'FOXD4', 'RAX2', 'FOXL1', 'FOXS1', 'PRRX2', 'FOXA2']
df = bdata.obs[['predicted.ann_level_1', 'sample']].copy()
for tf in TFs:
    df[tf] = bdata[:, tf].X.toarray().flatten()

# Compute average per sample
df = df.groupby(['sample', 'predicted.ann_level_1'], observed=False).mean().reset_index()

# Melt
df = df.melt(id_vars=['sample', 'predicted.ann_level_1'], var_name='TF', value_name='expression')

# Print min and max expression
print(f"Min expression: {df['expression'].min()}")
print(f"Max expression: {df['expression'].max()}")

# Save this
df.to_csv(os.path.join(tablesDir, 'TF_expression_per_sample.csv.gz'), index=False)

In [None]:
# Plot
df = pd.read_csv(os.path.join(tablesDir, "TF_expression_per_sample.csv.gz"))

# Rename predicted.ann_level_1 to Compartment
df = df.rename(columns={"predicted.ann_level_1": "Compartment"})

# Use seaborn to plot violin plots of TF expression per sample. Each panel is a violin plot,
# Each panel: a TF
# Each point: a sample
# X: predicted.ann_level_1 (compatment)


In [None]:
def plot_violin_compartment(figsize=(12, 8)):
    """
    X: Each panel is a compartment, X axis is TF, Y axis is expression
    """
    tf_order = ['MEOX1', 'ARX', 'FOXA2', 'FOXD4', 'RAX2', 'FOXL1', 'FOXS1', 'PRRX2']
    compartments = ['Endothelial', 'Epithelial', 'Immune', 'Stroma']
    plt.clf()
    fig, axes = plt.subplots(
        nrows=2, ncols=2, figsize=figsize, sharey=True
    )
    for i, compartment in enumerate(compartments):
        ax = axes.flatten()[i]
        df_tf = df[df["Compartment"] == compartment].copy()
        sns.violinplot(
            data=df_tf,
            x="TF",
            y="expression",
            hue="TF",
            ax=ax,
            split=False,
            inner="points",
            linewidth=0.5,
            density_norm="width",
        )
        _ = ax.tick_params(axis='x', rotation=90)
        ax.set_title(compartment.capitalize())
        ax.set_xlabel("")
        ax.set_ylabel("Normalized Expression")
        ax.grid(False)
        ax.legend().remove()
    plt.tight_layout()
    plt.savefig(
        os.path.join(figuresDir, "TF_expression_per_sample_compartment.pdf"), bbox_inches="tight"
    )
    plt.close(fig)


def plot_violin(figsize=(12, 8)):
    tf_order = ['MEOX1', 'ARX', 'FOXA2', 'FOXD4', 'RAX2', 'FOXL1', 'FOXS1', 'PRRX2']
    plt.clf()
    fig, axes = plt.subplots(
        nrows=2, ncols=4, figsize=figsize, sharey=True
    )
    for i, tf in enumerate(tf_order):
        ax = axes.flatten()[i]
        df_tf = df[df["TF"] == tf].copy()
        sns.violinplot(
            data=df_tf,
            x="Compartment",
            y="expression",
            hue="Compartment",
            ax=ax,
            split=False,
            inner="points",
            linewidth=0.5,
            linecolor=None,
            density_norm="width",
        )
        _ = ax.tick_params(axis='x', rotation=90)
        ax.set_title(tf)
        ax.set_xlabel("")
        ax.set_ylabel("Normalized Expression")
        ax.grid(False)
        ax.legend().remove()
    plt.tight_layout()
    plt.savefig(
        os.path.join(figuresDir, "TF_expression_per_sample.pdf"), bbox_inches="tight"
    )
    plt.close(fig)

In [None]:

plot_violin(figsize=(8, 7))
plot_violin_compartment(figsize=(8, 6))