# BDSO annotations

## Import depedencies 

In [None]:
## IMPORT depedencies 

import pandas as pd
import scanpy as sc 
import anndata as ad
import numpy as np
from scipy.sparse import csr_matrix

In [None]:
## Read h5ad file

adata = sc.read("h5ad-downloads/an-integrated-transcriptomic-cellxgene.h5ad")

## Explore the Data

In [None]:
## Display keys

adata.obs.keys()

In [None]:
## Looking into a specific keys - useful for quick viewing (e.g. to see how many categories of CL terms are mapped)

adata.obs[['BICCN_class_label','BICCN_subclass_label','BICCN_cluster_label','BICCN_ontology_term_id','cell_type_ontology_term_id']]

In [None]:
## Looking into specific strings in specific keys
adata.obs.loc[adata.obs['BICCN_cluster_label']=='L6 CT Grp_1']

In [None]:
## gives and overview table of all the data in the obs layer
pd.pandas.set_option('display.max_columns', None)
adata.obs

## Writing a CSV
Usually not needed, but might be useful for other purposes

In [None]:
## Creating a dataframe to write out only columns you are interested in

a_obs = pd.DataFrame()

In [None]:
## Allocating columns in the dataframes

a_obs['author_cell_type'] = adata.obs['BICCN_cluster_label']
a_obs['author_subclass_type'] = adata.obs['BICCN_subclass_label']
a_obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id']

In [None]:
## Create a CSV of allocated columns 

a_obs.to_csv("output/a_obs_layer.csv")

## Simple Replacements
This method uses a simple replacement by mapping BICCN_subclass_label to a CL term.
For bigger scale replacements, use the next section.

In [None]:
dct = {'L6 CT': 'CL:4023042', 'Lamp5': 'CL:4023011'}

In [None]:
## Change CL term based on dictionary above

adata.obs = adata.obs.assign(cell_type_ontology_term_id=adata.obs.BICCN_subclass_label.map(dct).fillna(adata.obs.cell_type_ontology_term_id))

In [None]:
## Check Changes

adata.obs['cell_type_ontology_term_id']

## Use tsv to Replace
This method uses a file "mappings.tsv" to replace cell_type_ontology_term_id by using BICCN_subclass_label

In [None]:
## Load mappings.tsv as a dictionary

mappings = open('mappings.tsv', 'r')
dct = {}
for line in mappings:
    key, value = line.split('\t')
    key = key.strip()
    value = value.strip()
    dct[key] = value
print(dct)
mappings.close()

In [None]:
## Change CL term based on dictionary above

adata.obs = adata.obs.assign(cell_type_ontology_term_id=adata.obs.BICCN_subclass_label.map(dct).fillna(adata.obs.cell_type_ontology_term_id))

In [None]:
## Check Changes

adata.obs['cell_type_ontology_term_id']

## Write out

In [None]:
## Creating new csv for checking 

new_obs = pd.DataFrame()
new_obs['author_cell_type'] = adata.obs['BICCN_cluster_label']
new_obs['author_subclass_type'] = adata.obs['BICCN_subclass_label']
new_obs['cell_type_ontology_term_id'] = adata.obs['cell_type_ontology_term_id']
new_obs.to_csv("output/new_obs_layer.csv")

In [None]:
## Write out to new h5ad file

adata.write("output/updated-file.h5ad")