# Download

- RNA (10X) dataset 
    - counts https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE113576
    - annotations https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6482113/bin/NIHMS1024025-supplement-Table_S1.xlsx


- MERFISH dataset https://datadryad.org/stash/dataset/doi:10.5061/dryad.8t8s248

# Preprocess

In [1]:
import scanpy as sc
import pandas as pd
import numpy as np

In [2]:
adata_rna = sc.read_10x_mtx('../datasets/raw/Preoptic_RNA_MERFISH/RNA')
sc.pp.filter_cells(adata_rna, min_genes = 200)
sc.pp.filter_genes(adata_rna, min_cells = 3)

meta_rna = pd.read_excel('../datasets/raw/Preoptic_RNA_MERFISH/NIHMS1024025-supplement-Table_S1.xlsx',
                         index_col = 0, header = 1)
adata_rna.obs = meta_rna.loc[adata_rna.obs.index, ['Replicate number', 'Cell class (determined from clustering of all cells)']]
adata_rna.obs.columns = ['replicate', 'celltype']
adata_rna.obs['batch'] = 'RNA'

data_merfish = pd.read_csv('../datasets/raw/Preoptic_RNA_MERFISH/MERFISH/Moffitt_and_Bambah-Mukku_et_al_merfish_all_cells.csv',
                           index_col = 0)
adata_merfish = sc.AnnData(data_merfish.iloc[:, 8:])
adata_merfish = adata_merfish[:, True ^ np.isnan(adata_merfish.X.sum(axis = 0))]
adata_merfish.obs = data_merfish[['Animal_ID', 'Cell_class']]
adata_merfish.obs.index = data_merfish.index
adata_merfish.obs.columns = ['animal', 'celltype']
adata_merfish.obs['batch'] = 'MERFISH'

adata = sc.concat((adata_rna, adata_merfish[adata_merfish.obs.animal == 1]))
adata = adata[adata.obs.celltype != 'Ambiguous']
adata = adata[adata.obs.celltype != 'Unstable']
adata = adata[adata.X.sum(axis = 1) > 0]

adata.obs.celltype[adata.obs.celltype == 'Astrocytes'] = 'Astrocyte'
adata.obs.celltype[adata.obs.celltype == 'Endothelial 1'] = 'Endothelial'
adata.obs.celltype[adata.obs.celltype == 'Endothelial 2'] = 'Endothelial'
adata.obs.celltype[adata.obs.celltype == 'Endothelial 3'] = 'Endothelial'
adata.obs.celltype[adata.obs.celltype == 'Immature oligodendrocyte'] = 'OD Immature'
adata.obs.celltype[adata.obs.celltype == 'Mature oligodendrocyte'] = 'OD Mature'
adata.obs.celltype[adata.obs.celltype == 'Newly formed oligodendrocyte'] = 'Newly formed'
adata.obs.celltype[adata.obs.celltype == 'OD Immature 1'] = 'OD Immature'
adata.obs.celltype[adata.obs.celltype == 'OD Immature 2'] = 'OD Immature'
adata.obs.celltype[adata.obs.celltype == 'OD Mature 1'] = 'OD Mature'
adata.obs.celltype[adata.obs.celltype == 'OD Mature 2'] = 'OD Mature'
adata.obs.celltype[adata.obs.celltype == 'OD Mature 3'] = 'OD Mature'
adata.obs.celltype[adata.obs.celltype == 'OD Mature 4'] = 'OD Mature'

adata.write('../datasets/preprocessed/Preoptic_RNA_MERFISH.h5ad')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata_merfish.obs['batch'] = 'MERFISH'
Trying to set attribute `.obs` of view, copying.
... storing 'celltype' as categorical
Trying to set attribute `.obs` of view, copying.
... storing 'batch' as categorical


In [3]:
# Run the following code in R to convert h5ad to h5seurat which Seurat handles
# Check current directory first
# library(SeuratDisk)
# Convert('Preoptic_RNA_MERFISH.h5ad', 'h5seurat')