# Data: Cheng et al.

In this notebook, we combine the count table with the meta data and filter for embryonic day 5,5.

In [2]:
import scanpy as sc
import anndata

  from louvain.VertexPartition import MutableVertexPartition


In [3]:
meta_cheng = sc.read_csv('../data/rnaseq_mouse_e55/Meta.CellRep_cheng2019.csv', delimiter=";", dtype="str")
adata_cheng = sc.read_text("../data/rnaseq_mouse_e55/Cheng_raw_counts.txt").T
print(meta_cheng)
print(adata_cheng)


AnnData object with n_obs × n_vars = 1724 × 4
AnnData object with n_obs × n_vars = 1724 × 18126


In [4]:
meta_cheng.to_df()["EmbryonicDay"]

EB_130    6,25
EB_132    6,25
EB_131    6,25
EB_137    6,25
EB_133    6,25
          ... 
EB_994     5,5
EB_997     5,5
EB_988     5,5
EB_996     5,5
EB_995     5,5
Name: EmbryonicDay, Length: 1724, dtype: object

In [5]:
adata_cheng.to_df()

Unnamed: 0,Adora1,Prim2,Bai3,Cflar,Gpr39,Obsl1,Syt14,Pld5,Lmx1a,Lactb2,...,ERCC_3.66210938:mix1_7.32421875:mix2.1,ERCC_3.66210938:mix1_7.32421875:mix2.2,ERCC_1.83105469:mix1_3.66210938:mix2,ERCC_0.91552734:mix1_1.83105469:mix2.1,ERCC_0.91552734:mix1_1.83105469:mix2.2,ERCC_0.45776367:mix1_0.91552734:mix2,ERCC_0.22888184:mix1_0.45776367:mix2.1,ERCC_0.22888184:mix1_0.45776367:mix2.2,ERCC_0.11444092:mix1_0.22888184:mix2,ERCC_0.05722046:mix1_0.11444092:mix2
EB_130,52.0,142.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,73.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
EB_132,0.0,23.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,37.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
EB_131,0.0,190.0,0.0,0.0,0.0,105.0,0.0,0.0,0.0,39.0,...,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
EB_137,0.0,5.0,0.0,0.0,0.0,49.0,0.0,0.0,0.0,19.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
EB_133,0.0,160.0,0.0,26.0,0.0,17.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
EB_994,0.0,1.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,30.0,...,0.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
EB_997,9.0,70.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,62.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
EB_988,0.0,59.0,0.0,0.0,0.0,119.0,13.0,0.0,0.0,49.0,...,38.0,0.0,71.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
EB_996,0.0,236.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,...,0.0,0.0,0.0,0.0,0.0,53.0,0.0,0.0,0.0,0.0


In [6]:
for var_name in meta_cheng.var_names:
    adata_cheng.obs[var_name] = meta_cheng.to_df()[var_name]

In [29]:
adata_cheng.var["gene_symbol"] = adata_cheng.var_names

In [30]:
adata_cheng

AnnData object with n_obs × n_vars = 1724 × 18126
    obs: 'EmbryonicDay', 'Lineage', 'Antrior/Transition/Posterior', 'batch'
    var: 'gene_symbol'

In [7]:
adata_cheng.obs[:]

Unnamed: 0,EmbryonicDay,Lineage,Antrior/Transition/Posterior,batch
EB_130,625,EPI,transition,Cheng
EB_132,625,EPI,posterior,Cheng
EB_131,625,EPI,transition,Cheng
EB_137,625,EPI,transition,Cheng
EB_133,625,EPI,transition,Cheng
...,...,...,...,...
EB_994,55,EPI,anterior,Cheng
EB_997,55,VE,,Cheng
EB_988,55,EPI,anterior,Cheng
EB_996,55,ExE,,Cheng


In [33]:
adata_cheng_E55 = adata_cheng[adata_cheng.obs["EmbryonicDay"] == "5,5"]

In [50]:
adata_cheng_E55.obs[:]

Unnamed: 0,embryonic_day,cell_type,atp,batch
EB_1006,55,VE,,Cheng
EB_1007,55,EPI,anterior,Cheng
EB_1010,55,VE,,Cheng
EB_1009,55,VE,,Cheng
EB_1011,55,VE,,Cheng
...,...,...,...,...
EB_994,55,EPI,anterior,Cheng
EB_997,55,VE,,Cheng
EB_988,55,EPI,anterior,Cheng
EB_996,55,ExE,,Cheng


### Filter ERCC genes

In [42]:
gene_symbols_without_erccs = [gene_symbol for gene_symbol in adata_cheng_E55.var_names if not gene_symbol.startswith('ERCC')]
adata_cheng_E55 = adata_cheng_E55.copy().T[adata_cheng_E55.var_names.isin(gene_symbols_without_erccs)].copy().T

In [44]:
adata_cheng_E55.to_df()

Unnamed: 0,Adora1,Prim2,Bai3,Cflar,Gpr39,Obsl1,Syt14,Pld5,Lmx1a,Lactb2,...,Emx2,Rab11fip2,Fam204a,Prlhr,Eif3a,Nanos1,Prdx3,Sfxn4,Gm7102,Csf2ra
EB_1006,0.0,179.0,0.0,0.0,0.0,42.0,0.0,0.0,0.0,86.0,...,0.0,0.0,89.0,0.0,1047.0,1.0,146.0,0.0,2.0,28.0
EB_1007,0.0,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,63.0,0.0,399.0,0.0,200.0,0.0,0.0,0.0
EB_1010,0.0,49.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,9.0,...,0.0,0.0,37.0,0.0,549.0,0.0,139.0,0.0,0.0,9.0
EB_1009,44.0,164.0,0.0,10.0,0.0,200.0,0.0,0.0,0.0,203.0,...,1.0,0.0,104.0,0.0,949.0,0.0,206.0,39.0,8.0,0.0
EB_1011,4.0,190.0,0.0,0.0,0.0,452.0,0.0,0.0,0.0,196.0,...,5.0,0.0,1.0,0.0,733.0,1.0,351.0,16.0,2.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
EB_994,0.0,1.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,30.0,...,0.0,0.0,0.0,0.0,176.0,0.0,13.0,12.0,0.0,1.0
EB_997,9.0,70.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,62.0,...,0.0,0.0,14.0,0.0,79.0,0.0,49.0,0.0,0.0,7.0
EB_988,0.0,59.0,0.0,0.0,0.0,119.0,13.0,0.0,0.0,49.0,...,0.0,0.0,0.0,0.0,1082.0,1.0,765.0,97.0,9.0,0.0
EB_996,0.0,236.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,...,0.0,0.0,42.0,0.0,1963.0,0.0,1532.0,0.0,14.0,243.0


### Rename observations

In [49]:
adata_cheng_E55.obs.rename(columns={"Lineage": "cell_type","EmbryonicDay": "embryonic_day", "Antrior/Transition/Posterior": "atp"}, inplace=True)

In [51]:
adata_cheng_E55.write_h5ad("../data/rnaseq_mouse_e55/Cheng_E55.h5ad")

## Test the result

In [52]:
saved_file = sc.read_h5ad('../data/rnaseq_mouse_e55/Cheng_E55.h5ad')
saved_file.to_df()

Unnamed: 0,Adora1,Prim2,Bai3,Cflar,Gpr39,Obsl1,Syt14,Pld5,Lmx1a,Lactb2,...,Emx2,Rab11fip2,Fam204a,Prlhr,Eif3a,Nanos1,Prdx3,Sfxn4,Gm7102,Csf2ra
EB_1006,0.0,179.0,0.0,0.0,0.0,42.0,0.0,0.0,0.0,86.0,...,0.0,0.0,89.0,0.0,1047.0,1.0,146.0,0.0,2.0,28.0
EB_1007,0.0,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,63.0,0.0,399.0,0.0,200.0,0.0,0.0,0.0
EB_1010,0.0,49.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,9.0,...,0.0,0.0,37.0,0.0,549.0,0.0,139.0,0.0,0.0,9.0
EB_1009,44.0,164.0,0.0,10.0,0.0,200.0,0.0,0.0,0.0,203.0,...,1.0,0.0,104.0,0.0,949.0,0.0,206.0,39.0,8.0,0.0
EB_1011,4.0,190.0,0.0,0.0,0.0,452.0,0.0,0.0,0.0,196.0,...,5.0,0.0,1.0,0.0,733.0,1.0,351.0,16.0,2.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
EB_994,0.0,1.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,30.0,...,0.0,0.0,0.0,0.0,176.0,0.0,13.0,12.0,0.0,1.0
EB_997,9.0,70.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,62.0,...,0.0,0.0,14.0,0.0,79.0,0.0,49.0,0.0,0.0,7.0
EB_988,0.0,59.0,0.0,0.0,0.0,119.0,13.0,0.0,0.0,49.0,...,0.0,0.0,0.0,0.0,1082.0,1.0,765.0,97.0,9.0,0.0
EB_996,0.0,236.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,...,0.0,0.0,42.0,0.0,1963.0,0.0,1532.0,0.0,14.0,243.0
