# B2. Downstream analysis (paper figures)

- Authors: Marcos Malumbres & Agustín Sánchez-Belmonte
- Project: miR-203 controls developmental timing and early fate restriction during preimplantation embryogenesis
- Experiment: single cell RNAseq in early embryos (E3.5 and E4.5) in KO, KI and WT conditions.
- Part: B2. Downstream analysis (with paper figures)

This notebook use the output h5ad file from Part B1. 

It is very similar to B1 Part, but here there is no new analysis, only visualization tasks and final paper figures.

### Content

0. Set up
1. Initial Exploratory Analysis
2. Markers and Gene Signature Scores
3. Clustering
4. Classification of cells
5. MERVL and other markers
6. miR203_KO vs. Control
7. miR203_KI vs. Control
8. Velocity and Pseudotime
9. Save modified .h5ad file


# 0. Set up

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns

import scvelo as scv
scv.set_figure_params()

from matplotlib import rcParams

In [None]:
# Settings
sc.settings.verbosity = 0
sc.logging.print_header()
sc.set_figure_params(dpi=120, color_map='viridis', dpi_save=300)
sc.set_figure_params(figsize=[5,4])

DATA = '/Users/mmalumbres/Library/CloudStorage/OneDrive-VHIO/BioInformatics/BioProjects/miR203 & early embryos/data/'
signature_folder = "/Users/mmalumbres/Library/CloudStorage/OneDrive-VHIO/BioInformatics/BioProjects/miR203 & early embryos/resources/Signatures/"
DESKTOP = '/Users/mmalumbres/Desktop/'
sc.settings.figdir = DESKTOP

In [None]:
import warnings   
warnings.filterwarnings("ignore")

I have uninstalled seaborn 0.13 and installed 0.11.2 so scattermaps now works! (231211)  
But other plots may not work as well

In [None]:
sns.__version__

In [None]:
# Lists and filters
pal1 = ["lightblue", "deepskyblue", "dodgerblue", "navajowhite", "darkorange", "orangered"]
blues_greens = {"E3.5": "DEEPSKYBLUE", "E3.5_KO": "DARKBLUE", "E3.5_dox": "BLUE",
                "E4.5": "GREENYELLOW", "E4.5_KO": "DARKGREEN", "E4.5_dox": "LIMEGREEN"}

## Load all data ("231215_mir203_all.h5ad")

In [None]:
adata = sc.read(DATA + "231215_mir203_all.h5ad")
adata

In [None]:
adata.obs.head(2)

In [None]:
adata.obs.tail(3)

In [None]:
adata.obs.Sample.unique()

In [None]:
sc.pl.umap(adata, color=["Stage", "Treatment", "Sample"], save="_Stage_Treat_Samples.png")

In [None]:
sc.pl.umap(adata, color=["Sample"], palette=blues_greens, save="_Samples.png")

# 1. Initial Exploratory Analysis

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'], vmax=[8000, 10000], jitter=0.4, multi_panel=True)

In [None]:
fig, ax = plt.subplots()
sc.pl.violin(adata, ['total_counts'], rotation=90, jitter=0.4, ax=ax, show=False)
ax.set_ylim(10, 1000)
plt.show()

In [None]:
adata.obs.total_counts.min()

In [None]:
sc.pl.umap(adata, color=["n_genes", "Sample", ])

In [None]:
adata.obs.Sample.value_counts()

In [None]:
sns.countplot(data=adata.obs, x="Sample")
plt.xticks(rotation=45)
plt.savefig(DESKTOP + "bar-plot_Sample.png", dpi=300)

In [None]:
sns.countplot(data=adata.obs, x="Stage")

In [None]:
sns.countplot(data=adata.obs, x="Treatment")

In [None]:
#Plot distribution of mitochondrial and ribosomal genes
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_MT','pct_counts_RB'], groupby='Experiment',
             jitter=0.4, multi_panel=True, rotation=90)

In [None]:
# Plot mitochondrial genes expressed
sc.pl.scatter(adata, x='total_counts', y='pct_counts_MT', size=100)
# Plot total counts
sc.pl.scatter(adata, x='total_counts', y='pct_counts_RB', size=100)
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts', size=100)

In [None]:
sc.pl.pca(adata, color=["Stage", 'Treatment', "Sample"], save="_Stage_Treat_Sample.png")

In [None]:
sc.pl.umap(adata, color=['Sample'], s=30, palette=blues_greens, save="_Sample_30.png")

In [None]:
sc.pl.umap(adata, color=['total_counts', 'pct_counts_MT','pct_counts_RB'], palette="Set2", color_map= plt.cm.Purples)

# 2. Markers  
231207 - reevaluation MM

Initial exploration
- ICM/Epiblast markers: `['Pou5f1', 'Klf4', 'Nanog']`
- Hypoblast markers: `['Gata6', 'Gata4', 'Sox17']`  
- Trophectoderm markers: `['Cdx2', 'Gata3', 'Krt8']`


In [None]:
sc.pl.umap(adata, size=100,color=['Pou5f1', 'Klf4', 'Nanog'],palette="Set2", color_map= plt.cm.Purples, 
           save = '_Epiblast_ICM_markers.png')

In [None]:
sc.pl.umap(adata, size=100,color=['Cdx2', 'Gata3', 'Krt8'],palette="Set2", color_map= plt.cm.Purples,
           save = '_TE_markers.png') 

In [None]:
sc.pl.umap(adata, size=100,color=['Gata6', 'Gata4', 'Sox17'],palette="Set2", color_map= plt.cm.Purples,
           save = '_hipoblast_markers.png')

# 3. Gene Signatures

In [None]:
sc.pl.umap(adata, size=100, color=['2-cell-like', '8-cell-like',],
           palette="Set2", color_map= plt.cm.Purples, save = '_2-8-cell_scores.png') 

In [None]:
sc.pl.umap(adata, size=100, color=['EPI', 'TE', 'prE'],
           palette="Set2", color_map= plt.cm.Purples, save = '_EPI-TE-prE_scores.png')

Select:
- `2-cell-like`: very specific of 2-cell/totipotent cells
- `two_cell_282`: more general 2-cell + 8-cell? early (very similar to `8-cell-like` and to `eight_cell`)
- `Primed`: general E4.5 markers
- `EPI`: Epiblast and ICM lineage
- `TE`: throphoectoderm, very similar to TE_3
- `prE`: primmitive endoderm


In [None]:
two_cell_markers = ["Spz1", "Naalad2", "Sp110", "Fgf1", "Bex6", "Zfp352", "Foxa1"]

In [None]:
sc.pl.umap(adata, color=two_cell_markers, palette="Set2", color_map= plt.cm.Purples,
           save="_2-cell-markers.png")

In [None]:
sc.pl.umap(adata, color="Spz1", vmax=0.6, palette="Set2", color_map= plt.cm.Purples,
           save="_2-cell-marker_Spz1.png")

In [None]:
sc.pl.umap(adata, color="Zfp352", vmax=0.5, palette="Set2", color_map= plt.cm.Purples,
           save="_2-cell-marker_Zfp352.png")

In [None]:
sc.pl.umap(adata, color="Bex6", vmax=1.5, palette="Set2", color_map= plt.cm.Purples,
           save="_2-cell-marker_Bex6.png")

## 3. Clustering

In [None]:
sc.pl.umap(adata, color=["leiden_groups"], save="_leiden_groups.png")

In [None]:
sc.pl.umap(adata, color='leiden_groups', legend_loc='on data', title='', frameon=True, save="_leiden_groups_ON.png") 

## 4. Classify based on developmental markers

In [None]:
sc.pl.umap(adata, color=["Subpop_scaled_scores"], s=40, 
palette={"2-cell-like": "DARKRED","8-cell-like": "ORANGE","Epi-like": "YELLOWGREEN",
         "TE-like": "SEAGREEN","prE-like":"CORNFLOWERBLUE"},
          save="_Subpop_scaled_scores.png")

In [None]:
sc.pl.dotplot(adata, ['2-cell-like','8-cell-like','EPI','TE', "prE"], groupby="Subpop_scaled_scores", 
              standard_scale='var', save="_Subpop_scaled_scores_lineages.png") 

#### Count cells in each group

In [None]:
sns.countplot(data=adata.obs, x="leiden_groups", hue="Treatment", 
              order=["E3.5_Mixed", "E3.5_TE", "E3.5_ICM", "E3.5_PrE", "E4.5_TE", "E4.5_Epi", "E4.5_PrE"])
plt.xticks(rotation=45)
#plt.ylim(0,800)
plt.savefig(DESKTOP + "bar-plot_leiden_groups_genotype.png", dpi=300)

In [None]:
sns.countplot(data=adata.obs, x="Subpop_scaled_scores", hue="Treatment", 
              #order=["E3.5_Mixed", "E3.5_TE", "E3.5_ICM", "E3.5_PrE", "E4.5_TE", "E4.5_Epi", "E4.5_PrE"]
             )
plt.xticks(rotation=45)
#plt.ylim(0,800)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.savefig(DESKTOP + "bar-plot_leiden_groups_genotype.png", dpi=300)

In [None]:
ratios = adata.obs.groupby(["Stage","Treatment"])["Subpop_scaled_scores"].value_counts(normalize=True)
ratios = pd.DataFrame(ratios).reset_index()
ratios.head(2)

In [None]:
sns.catplot(data=ratios, x="Subpop_scaled_scores", y="proportion", hue="Treatment", kind="bar",
            ci=None, legend=False, legend_out=True, aspect=1.5)
#plt.xticks(rotation=45)
#plt.ylim(0,800)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.savefig(DESKTOP + "bar-plot_Subpop_scaled_scores.png", dpi=300)

In [None]:
ratios2 = adata.obs.groupby(["Sample"])["Subpop_scaled_scores"].value_counts(normalize=True)
ratios2 = pd.DataFrame(ratios2).reset_index()
ratios2.head(2)

In [None]:
sns.catplot(data=ratios2, x="Sample", y="proportion", hue="Subpop_scaled_scores", kind="bar",
            ci=None, legend=False, legend_out=True, aspect=1.5)
#plt.xticks(rotation=45)
#plt.ylim(0,800)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.savefig(DESKTOP + "bar-plot_Subpop_scaled_scores.png", dpi=300)

In [None]:
subpop_markers = {"2-cell-like": ["Gm8300", "Zfp352", "Plk2", "Fgf1"],
                  "8-cell-like": ["Alppl2", "Rnf7", "Gm12617", "Sugt1"],
                  "ICM/Epiblast": ["Sox2", "Klf4", "Utf1", "Nanog", "Esrrb"],
                  "Trophectoderm": ["Krt8", "Krt18", "Tspan8", "Id2", "Dppa1"],
                  "Pr. Endoderm": ["Gata4", "Gata6", "Runx1", "Pdgfra", "Creb3l2"],
                 }

In [None]:
sc.pl.dotplot(adata, subpop_markers, groupby="Subpop_scaled_scores", standard_scale="var",
             save="_subpop_markers.png")

# 5. MERVL and other markers
Compute MERVL sequences from publication below using bash

In [None]:
sc.pl.umap(adata, color=["MERVL", "MERVL_E3.5", "Treatment"], palette="Set2", color_map= plt.cm.Purples)
sc.pl.umap(adata, color=["MERVL", "MERVL_E3.5", "Treatment"])

In [None]:
sc.pl.umap(adata, color=["MERVL", "Treatment"], vmin=1, vmax=6, palette="Set2", color_map= plt.cm.Purples,
           save="_MERVL_Treat.png")

In [None]:
sc.pl.violin(adata, ["MERVL"], groupby="Sample", rotation=90, jitter=0.35,
             save="_MERVL_Sample.png") 

In [None]:
sc.pl.dotplot(adata, ["MERVL"], groupby="Sample", save="_MERVL.png")  #, standard_scale="var"

In [None]:
adata

In [None]:
adata.obs.Sample.unique()

In [None]:
adata_KO = adata[adata.obs.Treatment == "KO"]

In [None]:
sc.pl.violin(adata_KO, ["MERVL"], groupby="Subpop_scaled_scores", rotation=90, jitter=0.35,
             save="_MERVL_KO_Subpop.png")  

In [None]:
adata_E3_5_KO = adata[adata.obs.Sample == "E3.5_KO"]

In [None]:
sc.pl.umap(adata_E3_5_KO, color=["MERVL", "Subpop_scaled_scores"], s=200,
          save="_MERVL_E3.5_KO_Subpop.png")

In [None]:
sc.pl.violin(adata, ["Nanog", "Pou5f1", "Gata6"], groupby="Sample", rotation=90, jitter=0.35,
             save="_Nanog_Oct4_Gata6_Sample.png") 

In [None]:
sc.pl.violin(adata, ["Otx2"], groupby="Sample", rotation=90, jitter=0.35,
             save="_Otx2_Sample.png") 

In [None]:
sc.pl.violin(adata, ["Otx2"], groupby="Subpop_scaled_scores", rotation=90, jitter=0.35,
             save="_Otx2_Subpop.png") 

In [None]:
# This new column does not allow saving the new .h5ad file ¿¿¿????
adata.obs['leiden_groups_genotype'] = adata.obs['leiden_groups'].astype("string") + '_' + adata.obs['Treatment'].astype("string")
adata.obs.leiden_groups_genotype = adata.obs.leiden_groups_genotype.astype("category")

In [None]:
adata_E3_5 = adata[adata.obs.Stage == "E3.5"]
adata_E4_5 = adata[adata.obs.Stage == "E4.5"]
adata_Epi = adata[adata.obs.Subpop_scaled_scores == "Epi-like"]
adata_Epi_E4_5 = adata[adata.obs.Stage == "E4.5"]

In [None]:
sc.pl.violin(adata, ["Nanog", "Pou5f1", "Sox2"], groupby="Sample", rotation=90, jitter=0.35,
             save="_Nanog_OCt4_Sox2_Sample.png") 

In [None]:
sc.pl.violin(adata_E3_5, ["Nanog", "Pou5f1", "Sox2"], groupby="Sample", rotation=90, jitter=0.35,
             save="_E3.5_Nanog_OCt4_Sox2_Sample.png") 

In [None]:
sc.pl.violin(adata_E4_5, ["Nanog", "Pou5f1", "Sox2"], groupby="Sample", rotation=90, jitter=0.35,
             save="_E4.5_Nanog_OCt4_Sox2_Sample.png") 

In [None]:
sc.pl.violin(adata_E3_5, ["Cdx2", "Gata3", "Gata4", "Gata6"], groupby="Sample", rotation=90, jitter=0.35,
             save="_E3.5_Gata2_4_6_Sample.png") 

In [None]:
sc.pl.violin(adata_E4_5, ["Cdx2", "Gata3", "Gata4", "Gata6"], groupby="Sample", rotation=90, jitter=0.35,
             save="_E4.5_Gata2_4_6_Sample.png") 

In [None]:
sc.pl.violin(adata_E3_5, ["Nanog", "Pou5f1", "Sox2", "Cdx2", "Gata6"], groupby="Sample", rotation=90, jitter=0.35,
             save="_E3.5_Pluri_Cdx2_Gata6_Sample.png") 

In [None]:
sc.pl.violin(adata_E4_5, ["Nanog", "Pou5f1", "Sox2", "Cdx2", "Gata6"], groupby="Sample", rotation=90, jitter=0.35,
             save="_E4.5_Pluri_Cdx2_Gata6_Sample.png") 

In [None]:
sc.pl.violin(adata_E3_5, ["Nanog", "Pou5f1", "Sox2", "Cdx2", "Gata6"], groupby="leiden_groups_genotype", rotation=90, jitter=0.35,
             order=["E3.5_ICM_Control", "E3.5_Mixed_KO", "E3.5_ICM_dox", "E3.5_TE_Control", "E3.5_TE_KO", "E3.5_TE_dox"],
             save="_E3.5_Pluri_Cdx2_Gata6_leiden_groups_genotype.png") 

In [None]:
sc.pl.violin(adata_E4_5, ["Nanog", "Pou5f1", "Sox2", "Cdx2", "Gata6"], groupby="leiden_groups_genotype", rotation=90, jitter=0.35,
             order=["E4.5_Epi_Control", "E4.5_Epi_KO", "E4.5_Epi_dox", "E4.5_TE_Control", "E4.5_TE_KO", "E4.5_TE_dox", 
                         "E4.5_PrE_Control", "E4.5_PrE_KO", "E4.5_PrE_dox"],
             save="_E4.5_Pluri_Cdx2_Gata6_leiden_groups_genotype.png"
            ) 

In [None]:
sc.pl.violin(adata_E4_5, ["Otx2"], groupby="leiden_groups_genotype", rotation=90, jitter=0.35,
             order=["E4.5_Epi_Control", "E4.5_Epi_KO", "E4.5_Epi_dox", "E4.5_TE_Control", "E4.5_TE_KO", "E4.5_TE_dox", 
                         "E4.5_PrE_Control", "E4.5_PrE_KO", "E4.5_PrE_dox"],
             save="_E4.5_Otx2_leiden_groups_genotype.png"
            ) 

In [None]:
sc.pl.violin(adata_Epi, ["Otx2"], groupby="Sample", rotation=90, jitter=0.35,
             save="_Epi-like_Otx2_Sample.png") 

In [None]:
sc.pl.violin(adata_Epi, ["Nanog", "Pou5f1", "Gata6"], groupby="Sample", rotation=90, jitter=0.35,
             save="_Epi_Nanog-Oct-Gata6_Sample.png") 

In [None]:
sc.pl.violin(adata_Epi, ["Gata3", "Gata4", "Gata6"], groupby="Sample", rotation=90, jitter=0.35,
             save="__Nanog-Oct-Gata6_Sample.png") 

In [None]:
sc.pl.violin(adata_Epi_E4_5, ["Nanog", "Pou5f1", "Gata6"], groupby="Sample", rotation=90, jitter=0.35,
             save="_Epi_E4_5_Nanog-Oct-Gata6_Sample.png") 

In [None]:
sc.pl.violin(adata_Epi_E4_5, ["Nanog", "Pou5f1", "Gata6"], groupby="leiden_groups_genotype", rotation=90, jitter=0.35,
             save="_Epi_E4_5_Nanog-Oct-Gata6_leiden.png") 

# 6. miR-203 KO versus Control

In [None]:
adata.obs.Treatment.unique()

In [None]:
adata_KO = adata[adata.obs.Treatment != 'dox']
adata_KO

In [None]:
sc.pl.umap(adata_KO, color=["Sample"], palette=blues_greens, save="_KO_Samples.png")

### Check KO_E3.5

In [None]:
adata_KO_3_5 = adata[adata.obs.Sample == 'E3.5_KO']

In [None]:
sc.pl.umap(adata_KO_3_5, color=["leiden_groups"], s=200, )

In [None]:
sc.pl.umap(adata_KO_3_5, color=["Subpop_scaled_scores"], s=200, save="_KO-E3.5_Subpop_scaled_scores.png")

In [None]:
sc.pl.umap(adata_KO_3_5, color=["Subpop_scaled_scores"], groups=["2-cell-like"], s=200, save="_KO-E3.5_Subpop_scaled_scores_2-cell.png")

In [None]:
sc.pl.umap(adata_KO_3_5, color=["Zfp352"], groups=["2-cell-like"], s=200, save="_KO-E3.5_Subpop_scaled_scores_2lcell_Zfp352.png")

In [None]:
sc.pl.dotplot(adata_KO_3_5, ['2-cell-like','8-cell-like','EPI','TE', "prE"], groupby="Subpop_scaled_scores", 
              standard_scale='var', save="_KO-E3.5_Subpop_scaled_scores_lineages.png") 

# 7. miR-203 KI versus Control

In [None]:
adata_KI = adata[adata.obs.Treatment != 'KO']
adata_KI

In [None]:
adata

In [None]:
sc.pl.umap(adata_KI, color=["Sample"], palette=blues_greens, save="_KI_Samples.png")

In [None]:
sc.pl.violin(adata_KI, ["2-cell-like", "8-cell-like"], groupby="Sample", jitter=0.3)

In [None]:
sc.pl.violin(adata_KI, ["2-cell-like", "8-cell-like"], groupby="leiden_groups_genotype", jitter=0.3,
            rotation=90)

# 8. Velocity and Pseudotime

## Only in KO E3.5 samples

In [None]:
adata.layers

In [None]:
scv.pl.proportions(adata_KO_3_5, groupby='scaled_scores', save= DESKTOP + "scv.pl.KO-E3.5_scaled_scores.png")

In [None]:
# pre-process
scv.pp.filter_and_normalize(adata_KO_3_5)
scv.pp.moments(adata_KO_3_5)

In [None]:
#compute velocity
scv.tl.velocity(adata_KO_3_5, mode='stochastic')
scv.tl.velocity_graph(adata_KO_3_5)

In [None]:
scv.pl.velocity_embedding(adata_KO_3_5, basis='umap',frameon=False, save= DESKTOP + "velocity_embedding_KO-E3.5_scaled_scores.png")

In [None]:
scv.pl.velocity_embedding_grid(adata_KO_3_5, basis='umap',
                               frameon=False, color=['Subpop_scaled_scores'],title='',
                               scale=0.25,
                               save= DESKTOP + "velocity_embedding_grid_KO-E3.5_scaled_scores.png"
                              )

In [None]:
palette2 = {"2-cell-like": "DARKRED","8-cell-like": "ORANGE","Epi-like": "YELLOWGREEN",
            "TE-like": "SEAGREEN","prE-like":"CORNFLOWERBLUE", "Other":"GRAY"}

In [None]:
scv.pl.velocity_embedding_grid(adata_KO_3_5, basis='umap',frameon=False, color='Subpop_scaled_scores',title='',scale=0.25,
                               palette=palette2)

In [None]:
scv.tl.velocity_confidence(adata_KO_3_5)
keys = 'velocity_length','velocity_confidence'
scv.pl.scatter(adata_KO_3_5,c=keys,cmap='coolwarm',perc=[5,95], save= DESKTOP + "velocity_confidence_KO-E3.5_scaled_scores.png")

In [None]:
scv.pl.velocity_graph(adata_KO_3_5,threshold=0.1,color='Subpop_scaled_scores',
                      save= DESKTOP + "velocity_graph_KO-E3.5_scaled_scores.png")

In [None]:
scv.pl.velocity_graph(adata_KO_3_5,threshold=0.1,color='Subpop_scaled_scores',
                      alpha=0.5,
                      save= DESKTOP + "velocity_graph_KO-E3.5_scaled_scores_a0.5.png"
                     )

In [None]:
adata_KO_3_5.obs.Subpop_scaled_scores.dtype

In [None]:
scv.pl.velocity_embedding_stream(adata_KO_3_5, basis='umap',color='Subpop_scaled_scores',frameon=False,
                                 save= DESKTOP + "velocity_embed_stream_KO-E3.5_scaled_scores.png")

In [None]:
scv.tl.velocity_pseudotime(adata_KO_3_5,root_key=45, end_key=1)
scv.pl.scatter(adata_KO_3_5, color='velocity_pseudotime', cmap='gnuplot', size=150,
               save= DESKTOP + "velocity_pseudotime_KO-E3.5_scaled_scores.png")

In [None]:
adata_KO_3_5.uns['neighbors']['distances'] = adata_KO_3_5.obsp['distances']
adata_KO_3_5.uns['neighbors']['connectivities'] = adata_KO_3_5.obsp['connectivities']
scv.tl.paga(adata_KO_3_5, groups='Subpop_scaled_scores')
df = scv.get_df(adata_KO_3_5, 'paga/transitions_confidence').T
df.style.background_gradient(cmap='Blues').format('{:.2g}')

In [None]:
scv.pl.paga(adata_KO_3_5, basis='umap', color='Subpop_scaled_scores', size=100, alpha=.6, 
            min_edge_width=2, node_size_scale=1.5, save= DESKTOP + "velocity_paga_KO-E3.5_scaled_scores.png")

In [None]:
adata