# Import libraries

In [3]:
import os
from re import search
from dfply import *
import igraph
import fa2

# Load settings

In [4]:
if search("ricard", os.uname()[1]):
    exec(open('/Users/argelagr/gastrulation10x/settings.py').read())
    exec(open('/Users/ricard/gastrulation10x/utils.py').read())
elif search("BI2404M", os.uname()[1]):
    exec(open('/Users/argelagr/gastrulation10x/settings.py').read())
    exec(open('/Users/argelagr/gastrulation10x/utils.py').read())
else:
    exit("Computer not recognised")

## Define I/O

In [5]:
io["outdir"] = io["basedir"] + "/results/trajectories/nmp_scanpy"

## Define options 

scanpy options

In [6]:
# %%capture
# sc.settings.verbosity = 3
# sc.logging.print_versions()
sc.settings.figdir = io["outdir"]
sc.settings.set_figure_params(dpi=80, frameon=False, figsize=(8, 7), facecolor='white')

In [7]:
opts["stages"] = [
	"E6.5",
	"E6.75",
	"E7.0",
	"E7.25",
	"E7.5",
	"E7.75",
	"E8.0",
	"E8.25",
	"E8.5",
	"mixed_gastrulation"
]

opts["celltypes"] = [
   "Epiblast",
   "Primitive_Streak",
   "Caudal_epiblast",
   "Mixed_mesoderm",
   "Caudal_Mesoderm",
   "Somitic_mesoderm",
   "NMP",
   "Spinal_cord"
]

## Load cell metadata

In [8]:
metadata = (pd.read_table(io["metadata"]) >>
    mask(X["stripped"]==False, X["doublet"]==False) >>
    mask(X["stage"].isin(opts["stages"]), X["celltype"].isin(opts["celltypes"]))
)
metadata.shape

(33314, 12)

In [9]:
metadata.head()

Unnamed: 0,cell,barcode,sample,stage,sequencing.batch,doublet,stripped,celltype,umapX,umapY,nFeature_RNA,nCount_RNA
0,cell_1,AAAGGCCTCCACAA,1,E6.5,1,False,False,Epiblast,-10.227546,-2.881687,2547,8963
1,cell_10,AACTGTCTTCGCAA,1,E6.5,1,False,False,Epiblast,-11.243532,-0.87611,1933,5643
2,cell_100,CACAGATGGGGACA,1,E6.5,1,False,False,Epiblast,-10.904429,-0.963977,4278,24947
3,cell_1000,GCCACTACCCGCTT,3,E7.5,1,False,False,Caudal_epiblast,-2.9437,-0.782023,2829,9294
7,cell_100002,GACTGATGTCCCAC,29,E8.5,3,False,False,Spinal_cord,1.99558,-4.249367,3477,17315


# Load anndata object

In [None]:
adata = load_adata(
    adata_file = io["anndata"], 
    metadata_file = io["metadata"],
    cells = metadata.cell.values, 
    normalise = True, 
    filter_lowly_expressed_genes = True
)
adata


This is where adjacency matrices should go now.
  warn(

This is where adjacency matrices should go now.
  warn(


In [None]:
adata.obs["celltype"].value_counts()

In [None]:
sc.pl.scatter(adata, color='celltype', basis="umap", size=5, legend_loc='on data', legend_fontsize=7)

## Feature selection

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=1000)

In [None]:
adata.var["highly_variable"].sum()

## Dimensionality reduction

### PCA

In [None]:
sc.tl.pca(adata, n_comps=15)

In [None]:
sc.pl.scatter(adata, color='celltype', basis="pca", size=10, legend_loc='on data', legend_fontsize=7, 
              save="blood_pca_celltype.pdf")

## Compute neighbors

In [None]:
sc.pp.neighbors(adata, n_neighbors=25, use_rep='X_pca')

### UMAP

In [None]:
sc.tl.umap(adata, min_dist=0.3, spread=1.0)

In [None]:
sc.pl.scatter(adata, color='celltype', basis="umap", size=10, legend_loc='on data', legend_fontsize=7,
              save = "blood_umap_celltype.pdf")

In [None]:
adata.obsm.keys()

### Force-directed layout

In [None]:
sc.tl.draw_graph(adata, layout='fa', init_pos=None)

In [None]:
# sc.pl.draw_graph(adata, color='celltype', legend_loc='on data')
sc.pl.draw_graph(adata, color='celltype', size=12, legend_loc='on data', legend_fontsize=7,
              save = "blood_fa2_celltype.pdf")

In [None]:
sc.pl.draw_graph(adata, color='sample', size=12, legend_loc='on data', legend_fontsize=7,
              save = "blood_fa2_sample.pdf")

## Graph abstraction


In [None]:
sc.tl.paga(adata, groups='celltype')

In [None]:
sc.pl.paga_compare(adata, basis='umap',legend_loc='on data',
                   fontsize=3, threshold=0.23, color='celltype', edge_width_scale=0.1)

In [None]:
sc.pl.paga(adata, threshold=0.3, fontsize=3, edge_width_scale=0.15, node_size_power=0.5, color='celltype')

recomputing the graph using PAGA initialisation

In [None]:
sc.tl.draw_graph(adata, layout='fa', init_pos="paga")

In [None]:
sc.pl.draw_graph(adata, color='celltype', legend_loc='on data')

## Save

In [None]:
trajectory = pd.DataFrame(adata.obsm["X_draw_graph_fa"], index=adata.obs_names, columns=["FA1","FA2"])
# trajectory.to_csv(io["outdir"] + "/blood_trajectory.txt.gz", sep='\t')

In [None]:
# adata.write(io["outdir"] + "/blood_anndata.h5ad")
# metadata.to_csv(io["outdir"] + "/blood_sample_metadata.txt.gz", sep='\t')