# Concatenate anndatas

For each time pair, the anndatas are concatenated and the representation form the Seurat integration is added.

In [1]:
import numpy as np
import anndata
import pandas as pd
import scanpy as sc
import scipy

In [2]:
Path="/home/icb/manuel.gander/moscotTime_Reproducibility/Data"
ts=['E3.5', 'E4.5', 'E5.25', 'E5.5', 'E6.25', 'E6.5', 'E6.75', 'E7.0', 'E7.25', 'E7.5', 'E7.75', 'E8.0', 'E8.25', 'E8.5a', 'E8.5b', 'E9.5', 'E10.5', 'E11.5', 'E12.5', 'E13.5']

In [3]:
def load_and_concatenate(ts0, ts1):
    import warnings
    warnings.filterwarnings('ignore')
    sc.settings.verbosity = 0
    A0=sc.read(f"{Path}/anndatas/adata_{ts0}.h5ad")
    A1=sc.read(f"{Path}/anndatas/adata_{ts1}.h5ad")

    if ts0=='E8.5a':
        A0.obs['day']=0
    adata=A0.concatenate(A1, join='inner', index_unique=None, batch_key=None).copy()
    
    if ts0=='E8.5a':
        adata.obs['group']=adata.obs['group'].astype('str')
    return(adata)

In [4]:
def add_representation(adata, ts0, ts1):
    # For E8.5b to E9.5, use the recomputed (better) integration (uses more hvgs=features)
    if ts0=='E8.5b':
        k='_new'
    else:
        k=''

    # Load the representation from Seurat integration
    PCA=pd.read_csv(f"{Path}/Seurat_Representations/{ts0}_{ts1}_pca{k}.csv", sep= ",", index_col='Unnamed: 0')
    adata.obsm['X_pcaS']=PCA.loc[list(adata.obs['cellID'])].values

    UMAP=pd.read_csv(f"{Path}/Seurat_Representations/{ts0}_{ts1}_umap3{k}.csv", sep= ",", index_col='Unnamed: 0')
    adata.obsm['X_umap3']=UMAP.loc[list(adata.obs['cellID'])].values
    return(adata)

In [5]:
def fix_adata_var(adata):
    # Put the gene names into adata.var.index for scoring genes later
    
    # If the genes of the two time points are not the same, two (identical) columns are added
    if 'gene_names-0' in list(adata.var.columns):
        if list(adata.var['gene_names-0'])==list(adata.var['gene_names-1']):
            adata.var['gene_names']=adata.var['gene_names-1']
            del adata.var['gene_names-0']
            del adata.var['gene_names-1']
        else:
            print('Fatal Error: Gene names are not the same!!!')
            return(np.NaN)
        
    adata.var['index']=[str(a) for a in adata.var['gene_names']]
    adata.var=adata.var.set_index('index')
    adata.var_names_make_unique()
    
    return(adata)

# Looping over all time pairs

In [None]:
for i in range(20):
    ts0=ts[i]
    ts1=ts[i+1]
    print(ts0)

    adata=load_and_concatenate(ts0, ts1)
    adata=add_representation(adata, ts0, ts1)
    adata=fix_adata_var(adata)
    
    adata.write(f"{Path}/anndatas/adata_{ts0}_{ts1}.h5ad", compression='gzip')

E3.5
E4.5
E5.25
E5.5
E6.25
E6.5
E6.75
E7.0
E7.25
E7.5
E7.75
E8.0
E8.25
E8.5a


In [None]:
for i in range(15,19):
    ts0=ts[i]
    ts1=ts[i+1]
    print(ts0)

    adata=load_and_concatenate(ts0, ts1)
    adata=add_representation(adata, ts0, ts1)
    adata=fix_adata_var(adata)
    
    adata.write(f"{Path}/anndatas/adata_{ts0}_{ts1}.h5ad", compression='gzip')

E9.5
E10.5


In [12]:
adata.write(f"{Path}/anndatas/adata_{ts0}_{ts1}.h5ad", compression='gzip')