In [1]:
import scanpy as sc
import anndata as ad
import pandas as pd
import os
import numpy as np

## LOADING DATA

In [2]:
data_path = '/data/hadjantalab/lucas/atlas/data/reannotation'

In [3]:
file_path = os.path.join(data_path, 'reanno_endo_cohort55-775_nn-transfer.csv')
reanno_endo_cohort55_775_nn_transfer = pd.read_table(file_path, sep = ',', index_col = 'cell' )
reanno_endo_cohort55_775_nn_transfer = reanno_endo_cohort55_775_nn_transfer.dropna()

In [4]:
file_path = os.path.join(data_path, 'reanno_endo_cohort80-95_nn-transfer.csv')
reanno_endo_cohort80_95_nn_transfer = pd.read_table(file_path, sep = ',', index_col = 'cell' )
reanno_endo_cohort80_95_nn_transfer = reanno_endo_cohort80_95_nn_transfer.dropna()

In [5]:
file_path = os.path.join(data_path, 'reanno_meso_cohort55-775_nn-transfer.csv')
reanno_meso_cohort55_775_nn_transfer = pd.read_table(file_path, sep = ',', index_col = 'cell' )
reanno_meso_cohort55_775_nn_transfer = reanno_meso_cohort55_775_nn_transfer.dropna()

In [6]:
file_path = os.path.join(data_path, 'reanno_meso_cohort80-95_nn-transfer.csv')
reanno_meso_cohort80_95_nn_transfer = pd.read_table(file_path, sep = ',', index_col = 'cell' )
reanno_meso_cohort80_95_nn_transfer = reanno_meso_cohort80_95_nn_transfer.dropna()

In [42]:
nowotschin_blood = pd.DataFrame(adata.obs.loc[adata.obs['celltype_nowotschin'] == 'Blood', 'celltype_nowotschin'].copy())
nowotschin_blood['nn_transfer'] = nowotschin_blood['celltype_nowotschin'].copy()
del nowotschin_blood['celltype_nowotschin']
nowotschin_blood['nn_transfer'] = 'Erythroid'

In [22]:
data_path = '/data/hadjantalab/lucas/atlas/data'
file_path = os.path.join(data_path, 'adata.h5ad')
adata = ad.read_h5ad(file_path)

## ADDING TO ADATA

In [43]:
reanno = pd.concat([reanno_endo_cohort55_775_nn_transfer, reanno_endo_cohort80_95_nn_transfer, reanno_meso_cohort55_775_nn_transfer, reanno_meso_cohort80_95_nn_transfer, nowotschin_blood])

In [45]:
## CREATING NEW ANNOTATION COLUMN AND FILLING IT WITH VALUES

adata.obs['annotation'] = 'to_be_determined'
adata.obs['annotation'] = adata.obs['annotation'].astype('object')

# Use .loc to modify the 'annotation' column for rows where 'celltype_nowotschin' is not 'unknown'
adata.obs.loc[adata.obs['celltype_nowotschin'] != 'unknown', 'annotation'] = \
    adata.obs.loc[adata.obs['celltype_nowotschin'] != 'unknown', 'celltype_nowotschin']

# Use .loc to modify the 'annotation' column for rows where 'celltype_rosshandler' is not 'unknown'
adata.obs.loc[adata.obs['celltype_rosshandler'] != 'unknown', 'annotation'] = \
    adata.obs.loc[adata.obs['celltype_rosshandler'] != 'unknown', 'celltype_rosshandler']


In [46]:

with pd.option_context('display.max_rows', None):
    print(adata.obs['annotation'].value_counts(dropna=False).sort_index())


Allantois                                          8515
Allantois endothelium                              2689
Amniotic ectoderm                                  3140
Anterior Primitive Streak                          1787
Anterior cardiopharyngeal progenitors              1698
Anterior somitic tissues                           3784
Blood                                              1898
Blood progenitors                                  2642
Branchial arch neural crest                        7303
Cardiomyocytes FHF 1                               2619
Cardiomyocytes FHF 2                               2505
Cardiomyocytes SHF 1                               2023
Cardiomyocytes SHF 2                               1990
Cardiopharyngeal progenitors                       1530
Cardiopharyngeal progenitors FHF                   3058
Cardiopharyngeal progenitors SHF                   5151
Caudal epiblast                                    3674
Caudal mesoderm                                 

In [47]:
len(np.unique(adata.obs['annotation']))

118

In [48]:
## TRANSFERRING THE NEW LABELS 
adata.obs.loc[reanno.index, 'annotation'] = reanno['nn_transfer'].copy()


In [49]:
len(np.unique(adata.obs['annotation']))

109

In [50]:
with pd.option_context('display.max_rows', None):
    print(adata.obs['annotation'].value_counts(dropna=False).sort_index())

Allantois                                          8515
Allantois endothelium                              2689
Amniotic ectoderm                                  3140
Anterior cardiopharyngeal progenitors              1912
Anterior somitic tissues                           3878
Blood progenitors                                  2642
Branchial arch neural crest                        7303
Cardiomyocytes FHF 1                               3445
Cardiomyocytes FHF 2                               2540
Cardiomyocytes SHF 1                               2777
Cardiomyocytes SHF 2                               2140
Cardiopharyngeal progenitors                       1629
Cardiopharyngeal progenitors FHF                   3662
Cardiopharyngeal progenitors SHF                   9275
Caudal epiblast                                    3674
Caudal mesoderm                                    3404
Chorioallantoic-derived erythroid progenitors       918
Cranial mesoderm                                

In [51]:
adata

AnnData object with n_obs × n_vars = 535935 × 8342
    obs: 'batch', 'stage', 'sample', 'covariate_composite', 'celltype_nowotschin', 'celltype_rosshandler', 'celltype_pijuansala', 'nowotschin_cluster', 'rosshandler_somite_count', 'rosshandler_anatomy', 'rosshandler_phase', 'rosshandler_S_score', 'rosshandler_G2M_score', 'rosshandler_louvain', 'rosshandler_leiden', 'mt-Atp6', 'mt-Atp8', 'mt-Co1', 'mt-Co2', 'mt-Co3', 'mt-Cytb', 'mt-Nd1', 'mt-Nd2', 'mt-Nd3', 'mt-Nd4', 'mt-Nd4l', 'mt-Nd5', 'mt-Nd6', 'S_score', 'G2M_score', 'phase', '_scvi_batch', '_scvi_labels', 'annotation_old', 'annotation'
    var: 'highly_variable'
    uns: '_scvi_manager_uuid', '_scvi_uuid', 'colormap_annotation', 'colormap_batch', 'colormap_rosshandler', 'colormap_stage', 'log1p', 'neighbors_scVI', 'phase_colors', 'symbolmap_batch', 'symbolmap_plotly_batch', 'symbolmap_plotly_stage', 'symbolmap_stage', 'umap'
    obsm: 'X_scVI', 'X_umap', 'X_umap_scVI', '_scvi_extra_categorical_covs', '_scvi_extra_continuous_covs'
 

## SAVE DATA 

In [52]:
data_path = '/data/hadjantalab/lucas/atlas/data'
file_path = os.path.join(data_path, 'adata.h5ad')
adata.write_h5ad(file_path)

In [53]:
data_path = '/data/hadjantalab/lucas/atlas/data/reannotation'
file_path = os.path.join(data_path, 'reannotation.csv')
reanno.to_csv(file_path, sep = ',', index = True)

In [None]:
anno = pd.DataFrame(adata.obs['annotation'].copy())
data_path = '/data/hadjantalab/lucas/atlas/data'
file_path = os.path.join(data_path, 'annotation.csv')
anno.to_csv(file_path, sep = ',', index = True)