In [None]:
# Import packages
import warnings
from pathlib import Path

import anndata as ad
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import seaborn as sns
import skimage
import yaml
from matplotlib.colors import LinearSegmentedColormap
from phenoscapes.feature_extraction import extract_features
from phenoscapes.montage import generate_overview_montage
from phenoscapes.sc import convert_to_h5ad, plot_summary
from phenoscapes.utils import annotate_img, get_metadata, scale_image
from skimage import io
from skimage.color import label2rgb
from tqdm import tqdm

warnings.simplefilter(action="ignore", category=Warning)
import os

import matplotlib

matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42

In [None]:
dir_output = (
    "/cluster/project/treutlein/DATA/imaging/4i_Data/Brain_ECM_4i/Brain_ECM_4i_dapi/"
)

ann_dir = Path(dir_output, "anndata")
tables_dir = Path(dir_output, "feature_tables")
dir_bg_subtracted = Path(dir_output, "bg_subtracted")
morpho_dir = Path(dir_output, "feature_tables_morphometrics")


good_stains = [
    "IGFBP2",
    "GPC3",
    "Col4A1",
    "Col2A1",
    "CDH2",
    "HAPLN1",
    "ITGA6",
    "LHX9",
    "RSPO3",
    "SOX10",
    "RAX",
    "WNT5a",
    "Pax6",
    "Nes",
    "CDH1",
    "GPRWLS",
    "IRX3",
    "Vinculin",
    "VCAN",
    "Arl13b.1",
    "n-Cad.1",
    "HES1",
    "ITGB5",
    "JAG1",
    "SIX3",
    "DLL1",
    "Yap1.3",
    "Fibronectin",
    "B-cat.1",
]


table_data = {
    "3": {"Range": "227-237", "Condition": "Matrigel", "Day": "11"},
    "4.1": {"Range": "238-245", "Condition": "No Matrix", "Day": "11"},
    "4.2": {"Range": "246-251", "Condition": "No Matrix", "Day": "11"},
    "4.3": {"Range": "252-265", "Condition": "No Matrix", "Day": "11"},
    "4.4": {"Range": "266-271", "Condition": "No Matrix", "Day": "7"},
    "5.1": {"Range": "272-282", "Condition": "Matrigel", "Day": "7"},
    "5.2": {"Range": "283-289", "Condition": "Matrigel", "Day": "7"},
    "5.3": {"Range": "290-296", "Condition": "Matrigel", "Day": "7"},
    "5.4": {"Range": "297-303", "Condition": "Matrigel", "Day": "7"},
    "6.1": {"Range": "304-308", "Condition": "No Matrix", "Day": "7"},
    "6.2": {"Range": "309-317", "Condition": "No Matrix", "Day": "7"},
    "6.3": {"Range": "318-326", "Condition": "No Matrix", "Day": "7"},
    "6.4": {"Range": "327-334", "Condition": "No Matrix", "Day": "7"},
    "7.1": {"Range": "335-345", "Condition": "Matrigel", "Day": "16"},
    "7.2": {"Range": "346-356", "Condition": "Matrigel", "Day": "16"},
    "7.3": {"Range": "357-360", "Condition": "Matrigel", "Day": "16"},
    "7.4": {"Range": "360-364", "Condition": "Matrigel", "Day": "16"},
    "8": {"Range": "365-371", "Condition": "No Matrix", "Day": "16"},
}

samples = os.listdir(dir_bg_subtracted)

In [None]:
samples = [
    "R294_0",
    "R277_0",
    "R300_0",
    "R284_0",
    "R315_0",
    "R268_0",
    "R304_0",
    "R232_0",
    "R249_0",
    "R253_0",
    "R347_0",
    "R347_1",
    "R338_0",
    "R338_1",
    "R365_0",
    "R365_1",
    "R365_2",
    "R365_3",
]

anndatas = []
for sample in samples:
    stat = "median"
    log_transform = True
    file = Path(tables_dir, f"{sample}.csv")
    if file.is_file():
        df = pd.read_csv(file)
        df.rename(columns={"ID": "label"}, inplace=True)
        df_counts = df.loc[:, df.columns[df.columns.str.contains(f"{stat}")]]
        df_counts.columns = df_counts.columns.str.replace(f"intensity_{stat}_", "")
        colnames_sorted = sorted(df_counts.columns)
        df_counts = df_counts[colnames_sorted].values
        if log_transform:
            df_counts = np.log1p(df_counts)
        adata = sc.AnnData(X=df_counts)
        adata.var_names = colnames_sorted
        adata.obs["ID"] = df["label"].values
        adata.obs["sample"] = sample
        adata.obs["area"] = df["area"].values
        adata.obs["X"] = df["centroid-1"].values
        adata.obs["Y"] = df["centroid-0"].values
    anndatas.append(adata)
adata = ad.concat(anndatas)
adata.obs_names_make_unique()
stain_vector = [x in good_stains for x in adata.var_names]
adata = adata[:, stain_vector]

adata.obs["sample_num"] = (
    adata.obs["sample"]
    .str.replace("R", "", regex=False)
    .str.replace(r"_\d+", "", regex=True)
)
adata.obs["sample_num"] = adata.obs["sample_num"].astype(int)
adata.obs["Block"] = np.nan
adata.obs["Condition"] = np.nan
adata.obs["Day"] = np.nan

for block in table_data:
    ranges = table_data[block]["Range"].split("-")
    range_pd = adata.obs["sample_num"].between(int(ranges[0]), int(ranges[1]))
    adata.obs["Day"][range_pd] = table_data[block]["Day"]
    adata.obs["Block"][range_pd] = block
    adata.obs["Condition"][range_pd] = table_data[block]["Condition"]

In [None]:
color_pallete_perturbation = {"Matrigel": "#17ad97", "No Matrix": "#4d4d4d"}
yap_wls_time_course = pd.DataFrame()
yap_wls_time_course["Log Expression"] = adata[:, "Yap1.3"].X[:, 0]
yap_wls_time_course["Condition"] = np.array(adata.obs["Condition"])
yap_wls_time_course["Day"] = np.array(adata.obs["Day"])
yap_wls_time_course["protein"] = "YAP1"

fig, ax = plt.subplots(figsize=(4, 3))
sns.despine(left=True, bottom=True, right=True)

sns.violinplot(
    yap_wls_time_course,
    x="Day",
    y=f"Log Expression",
    split=True,
    palette=color_pallete_perturbation,
    gap=0.05,
    hue="Condition",
    cut=0,
).legend(loc="center left", bbox_to_anchor=(1.0, 0.5), fontsize=16)
ax.set(ylim=(0, 8))
fig.savefig(f"violin_plot_Yap1_log_median_expression_day_16.pdf", bbox_inches="tight")
plt.close()

In [None]:
samples = [
    "R294_0",
    "R277_0",
    "R300_0",
    "R284_0",
    "R315_0",
    "R268_0",
    "R304_0",
    "R232_0",
    "R249_0",
    "R253_0",
    "R347_0",
    "R347_1",
    "R338_0",
    "R338_1",
    "R365_0",
    "R365_1",
    "R365_2",
    "R365_3",
]

anndatas = []
for sample in samples:
    stat = "mean"
    log_transform = True
    file = Path(tables_dir, f"{sample}.csv")
    if file.is_file():
        df = pd.read_csv(file)
        df.rename(columns={"ID": "label"}, inplace=True)
        df_counts = df.loc[:, df.columns[df.columns.str.contains(f"{stat}")]]
        df_counts.columns = df_counts.columns.str.replace(f"intensity_{stat}_", "")
        colnames_sorted = sorted(df_counts.columns)
        df_counts = df_counts[colnames_sorted].values
        if log_transform:
            df_counts = np.log1p(df_counts)
        adata = sc.AnnData(X=df_counts)
        adata.var_names = colnames_sorted
        adata.obs["ID"] = df["label"].values
        adata.obs["sample"] = sample
        adata.obs["area"] = df["area"].values
        adata.obs["X"] = df["centroid-1"].values
        adata.obs["Y"] = df["centroid-0"].values
    anndatas.append(adata)
adata = ad.concat(anndatas)
adata.obs_names_make_unique()
stain_vector = [x in good_stains for x in adata.var_names]
adata = adata[:, stain_vector]

adata.obs["sample_num"] = (
    adata.obs["sample"]
    .str.replace("R", "", regex=False)
    .str.replace(r"_\d+", "", regex=True)
)
adata.obs["sample_num"] = adata.obs["sample_num"].astype(int)
adata.obs["Block"] = np.nan
adata.obs["Condition"] = np.nan
adata.obs["Day"] = np.nan

for block in table_data:
    ranges = table_data[block]["Range"].split("-")
    range_pd = adata.obs["sample_num"].between(int(ranges[0]), int(ranges[1]))
    adata.obs["Day"][range_pd] = table_data[block]["Day"]
    adata.obs["Block"][range_pd] = block
    adata.obs["Condition"][range_pd] = table_data[block]["Condition"]

In [None]:
color_pallete_perturbation = {"Matrigel": "#17ad97", "No Matrix": "#4d4d4d"}
yap_wls_time_course = pd.DataFrame()
yap_wls_time_course["Log Expression"] = adata[:, "Yap1.3"].X[:, 0]
yap_wls_time_course["Condition"] = np.array(adata.obs["Condition"])
yap_wls_time_course["Day"] = np.array(adata.obs["Day"])
yap_wls_time_course["protein"] = "YAP1"
yap_wls_time_course = yap_wls_time_course[yap_wls_time_course["Day"] == "11"]
# Save as csv
yap_wls_time_course.to_csv("yap_mean_day_11.csv")