# Prepare input Jack2

In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
import scvelo as scv

In [None]:
# load the adata, as published by Isobe et al.

adata = sc.read('./data_input/Jak2_Hom_WT_Ery.h5ad')

In [None]:
# load the umap coordinates, as published by Isobe et al.

adata_coor = sc.read('./data_input/PMCA_Jak2.h5ad')
adata.obsm = adata_coor[adata.obs_names].obsm.copy()

In [None]:
# load the loom files, containing the unspliced reads

loom_MA = sc.read('./data_input/SLX11516_SIGAB2.loom') 
loom_MA.var_names_make_unique()

loom_MB = sc.read('./data_input/SLX11525_SIGAB3.loom') 
loom_MB.var_names_make_unique()

loom_MC = sc.read('./data_input/SLX12969_SIGAB8.loom') 
loom_MC.var_names_make_unique()

loom_WA = sc.read('./data_input/SLX11516_SIGAC2.loom') 
loom_WA.var_names_make_unique()

loom_WB = sc.read('./data_input/SLX11525_SIGAH2.loom') 
loom_WB.var_names_make_unique()

loom_WC = sc.read('./data_input/SLX12969_SIGAA8.loom') 
loom_WC.var_names_make_unique()

adata_loom = loom_WA.concatenate(loom_WB,loom_WC,loom_MA,loom_MB,loom_MC)

In [None]:
# Match the cell names

list_names = adata_loom.obs_names.copy()
list_names2 = [(name.split('x-')[0]).split(':')[1] + '-1_' + name.split(':')[0] for name in list_names]
adata_loom.obs_names = list_names2
adata_loom = adata_loom[adata.obs_names].copy()
adata_loom.obs = pd.concat([adata_loom.obs,adata.obs],axis=1)

In [None]:
# Select the erythropoietic trajectory

adata_ery = adata_loom[adata.obs.Ery_trajectory == 'Yes'].copy()

In [None]:
# store the adata

adata_ery_bu = adata_ery.copy()

In [None]:
# load the genes that are differentially expressed on this trajectory, as published by Isobe et al.

de_genes = pd.read_csv('data_input/Jak2_conditionTest_Ery_lfc0.5.txt',index_col=0,sep = '\t')
de_genes = de_genes.index

In [None]:
# HVGs

scv.pp.filter_and_normalize(adata_ery,min_shared_counts=20, n_top_genes=1000)
hvg = adata_ery.var_names

In [None]:
# combine top 1000 hvg and 

list_def = hvg.append(de_genes)
list_def = np.unique(list_def)

In [None]:
# scvelo preprocessing

In [None]:
adata_ery = adata_ery_bu.copy()
adata_ery = adata_ery[:,list_def].copy()

In [None]:
scv.pp.filter_and_normalize(adata_ery,min_shared_counts=0)
scv.pp.moments(adata_ery,n_neighbors=30,n_pcs=50)

In [None]:
# Create spliced and unspliced matrix

dfs = pd.DataFrame(adata_ery.layers['Ms'])
dfs.columns = adata_ery.var_names
dfs.index = adata_ery.obs.dpt_pseudotime

dfs_m = dfs.iloc[np.where(adata_ery.obs.Condition == 'Jak2_Homo')[0],:].copy()
dfs_w = dfs.iloc[np.where(adata_ery.obs.Condition == 'Jak2_WT')[0],:].copy()

dfu = pd.DataFrame(adata_ery.layers['Mu'])
dfu.columns = adata_ery.var_names
dfu.index = adata_ery.obs.dpt_pseudotime

dfu_m = dfu.iloc[np.where(adata_ery.obs.Condition == 'Jak2_Homo')[0],:].copy()
dfu_w = dfu.iloc[np.where(adata_ery.obs.Condition == 'Jak2_WT')[0],:].copy()

dfs_m.to_csv('./data_input/erythropoiesis_mut_spl.csv')
dfu_m.to_csv('./data_input/erythropoiesis_mut_unspl.csv')

dfs_w.to_csv('./data_input/erythropoiesis_wt_spl.csv')
dfu_w.to_csv('./data_input/erythropoiesis_wt_unspl.csv')


pd.DataFrame(adata_ery.var_names).to_csv('./data_input/list_genes.csv')