## scRNAseq: Epithelial-Mesenchymal Transition in Cancer
Dataset 3 from: [Cook and Vanderhyden 2020](https://www.nature.com/articles/s41467-020-16066-2)

In [None]:
import os
currDir = os.getcwd()

In [None]:
# Cook conditions
all_runs = ["A549-TGFB1",
            "DU145-TGFB1",
            "OVCA420-EGF", "OVCA420-TGFB1", "OVCA420-TNF"]

run_name = all_runs[4]
dataset_name = "3-Cook"

In [None]:
file_name = run_name
run_name

## Run Settings

Import Cook functions

In [None]:
os.chdir(currDir)
%run Cook_functions.ipynb
setDirectories(dataset_name, run_name)

In [None]:
sc.settings.set_figure_params(dpi=150, figsize=[5,5])
plt.rcParams['figure.figsize']=(5,5)

Import settings for current run

In [None]:
run_settings = importSettings(run_name)

total_counts_cutoff = run_settings["total_counts_cutoff"]
mito_cutoff = run_settings["mito_cutoff"]
ribo_cutoff = run_settings["ribo_cutoff"]
leiden_resolution = run_settings["leiden_resolution"]
EMT_clusters_in_order = run_settings["EMT_clusters_in_order"].split(",")
EMT_clusters_in_order = [int(i) for i in EMT_clusters_in_order]
pseudotime_DC = run_settings["pseudotime_DC"]
pseudotime_DC_rootNodes = run_settings["pseudotime_DC_rootNodes"]

# Add markers for visualization, if wanted
run_E_markers = run_settings["run_E_markers"].split(", ") if str(run_settings["run_E_markers"]) != "nan" else ""
run_M_markers = run_settings["run_M_markers"].split(", ") if str(run_settings["run_M_markers"]) != "nan" else ""

## Part 1: IMPORT DATA, FILTERING, AND NORMALIZATION

In [None]:
adata = importData(dataset_name, run_name)
adata = filterData(adata)
adata = find_Mito(adata)
adata = filter_MitoRibo(adata, total_counts_cutoff, mito_cutoff, ribo_cutoff, run_name, dataset_name)
saveFilteredData(adata, run_name)
adataHiVar = normalize_and_hiVar_Data(adata, run_name)

In [None]:
check_cell_cycle(run_name)

## Part 2: EMTSCORE AND CLUSTERING

In [None]:
adataHiVar, df_normalized = importProcessedData(dataset_name, run_name)
adataHiVar = Cook_unique_filter(adataHiVar, run_name)
adataHiVar = PCA(adataHiVar)
adataHiVar = UMAP(adataHiVar)

In [None]:
adataHiVar = leiden(adataHiVar, leiden_resolution, EMT_clusters_in_order, run_name)
adataHiVar = leiden_marker_genes(adataHiVar, dataset_name, run_name)
leiden_marker_genes_highlight(run_name, df_normalized)

In [None]:
# Export DE genes and log2FC for each cluster
for currCluster in adataHiVar.obs["leiden_label"].unique():
    cluster_DE_genes_export(adataHiVar, dataset_name, run_name, EMT_clusters_in_order, currCluster)

In [None]:
# EMTscore from UCell
adataHiVar = UCell_export_import(adataHiVar, run_name)
sc.pl.violin(adataHiVar, keys=["UCell_EMTscore"])

In [None]:
saveClusteredData(adataHiVar, run_name)

## Part 3: PSEUDOTIME

In [None]:
adataHiVar = importClusteredData(dataset_name, run_name)
adataHiVar = diffmap(adataHiVar)
root_nodes = pseudotime_rootNodes(adataHiVar, pseudotime_DC, pseudotime_DC_rootNodes, run_name)
adataHiVar, df_pseudotime = pseudotime_mean(adataHiVar, root_nodes)
savePseudotimeData(adataHiVar, run_name)

In [None]:
pseudotime_per_cluster(adataHiVar, EMT_clusters_in_order, run_name, df_pseudotime)