# Human Primary Cortex by 10X

- https://portal.brain-map.org/atlases-and-data/rnaseq/human-m1-10x
- This data set includes single-nucleus transcriptomes from 76,533 total nuclei derived from 2 post-mortem human brain specimens, to survey cell type diversity in the primary motor cortex (M1C or M1). In total, 127 transcriptomic cell types were identified. This research was supported by the National Institute Of Mental Health of the National Institutes of Health under Award Number U01MH114812. The content on this page is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.

In [1]:
import scanpy as sc
import os,sys,glob
import pandas as pd
import numpy as np
import anndata as ad

In [2]:
meta = pd.read_csv('Allen_M1_10X/metadata.csv', sep=',')
meta.shape

(76533, 39)

In [3]:
# https://www.nature.com/articles/s41586-021-03465-8/tables/1
meta.loc[meta.external_donor_name_label=='H18.30.001', 'sampleID'] = 'H18.30.001'
meta.loc[meta.external_donor_name_label=='H18.30.002', 'sampleID'] = 'H18.30.002'

meta.loc[meta.external_donor_name_label=='H18.30.001', 'Age'] = (60 * 365) + 365
meta.loc[meta.external_donor_name_label=='H18.30.002', 'Age'] = (50 * 365) + 365

meta.loc[meta.external_donor_name_label=='H18.30.001', 'Race'] = 'Unknown'
meta.loc[meta.external_donor_name_label=='H18.30.002', 'Race'] = 'Unknown'

meta.loc[meta.external_donor_name_label=='H18.30.001', 'Hemisphere'] = 'R'
meta.loc[meta.external_donor_name_label=='H18.30.002', 'Hemisphere'] = 'R'

meta.loc[meta.external_donor_name_label=='H18.30.001', 'PMI'] = 18
meta.loc[meta.external_donor_name_label=='H18.30.002', 'PMI'] = 10

meta.loc[meta.external_donor_name_label=='H18.30.001', 'RIN'] = 7.9
meta.loc[meta.external_donor_name_label=='H18.30.002', 'RIN'] = 8.2

In [4]:
cols_to_use = ['sample_name', 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Brain_Region', 'Hemisphere', 'Sex', 'Library',
               'cluster_original', 'cluster_main', 'Dataset']


In [5]:
meta['Sex'] = meta['donor_sex_label']
meta['Brain_Region'] = meta['region_label']
meta['cluster_original'] = meta['subclass_label']
meta['batch'] = 'Unknown'
meta['Assay'] = "10x Chromium 3' v3"
meta['Library'] = "snRNA"
meta['Stage'] = 'Adult'
meta['PMI'] = 'Unknown'
meta['Dataset'] = 'AllenM1'


In [6]:
# Update the cell type
meta['cluster_main'] = 'Others'
meta.loc[meta['class_label'] == 'Glutamatergic', 'cluster_main'] = 'Ext'
meta.loc[meta['class_label'] == 'GABAergic', 'cluster_main'] = 'IN'
meta.loc[meta['cluster_original'] == 'Astrocyte', 'cluster_main'] = 'Astro'
meta.loc[meta['cluster_original'] == 'Microglia', 'cluster_main'] = 'MG'
meta.loc[meta['cluster_original'] == 'OPC', 'cluster_main'] = 'OPC'
meta.loc[meta['cluster_original'] == 'Oligodendrocyte', 'cluster_main'] = 'OD'

In [7]:
meta.groupby(['cluster_original', 'cluster_main']).size()

cluster_original  cluster_main
Astro             Others            568
Endo              Others             64
L2/3 IT           Ext             24231
L5 ET             Ext               858
L5 IT             Ext             13834
L5/6 NP           Ext              1487
L6 CT             Ext              3734
L6 IT             Ext              1829
L6 IT Car3        Ext               327
L6b               Ext              2236
Lamp5             IN               4454
Micro-PVM         Others            108
OPC               OPC               283
Oligo             Others           2942
Pvalb             IN               7782
Sncg              IN                895
Sst               IN               5936
Sst Chodl         IN                 67
VLMC              Others             40
Vip               IN               4858
dtype: int64

In [9]:
adata = sc.read('Allen_M1_10X/matrix.csv.gz')

In [10]:
adata.var.index

Index(['DDX11L1', 'WASH7P', 'MIR6859-1', 'MIR1302-2', 'FAM138A',
       'LOC105379212', 'OR4G4P', 'OR4G11P', 'OR4F5', 'LOC105379213',
       ...
       'ND4', 'TRNH', 'TRNS2', 'TRNL2', 'ND5', 'ND6', 'TRNE', 'CYTB', 'TRNT',
       'TRNP'],
      dtype='object', length=50281)

In [11]:
adata

AnnData object with n_obs × n_vars = 76533 × 50281

In [12]:
sum(adata.obs.index == meta.sample_name)

76533

In [13]:
adata.obs = meta[cols_to_use]

In [14]:
adata.obs

Unnamed: 0,sample_name,batch,sampleID,Age,Assay,Stage,Race,PMI,Brain_Region,Hemisphere,Sex,Library,cluster_original,cluster_main,Dataset
0,AAACCCAAGGATTTCC-LKTX_190129_01_A01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,Sst,IN,AllenM1
1,AAACCCAAGTATGGCG-LKTX_190129_01_A01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,L5/6 NP,Ext,AllenM1
2,AAACCCACAAAGTGTA-LKTX_190129_01_A01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,L5 IT,Ext,AllenM1
3,AAACCCACACTACTTT-LKTX_190129_01_A01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,L2/3 IT,Ext,AllenM1
4,AAACCCACAGTGAGCA-LKTX_190129_01_A01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,Oligo,Others,AllenM1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76528,TTTGTTGAGATGGCGT-LKTX_190130_01_H01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,Oligo,Others,AllenM1
76529,TTTGTTGCACAGCCAC-LKTX_190130_01_H01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,L5 IT,Ext,AllenM1
76530,TTTGTTGCAGAGACTG-LKTX_190130_01_H01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,L2/3 IT,Ext,AllenM1
76531,TTTGTTGCATAATGAG-LKTX_190130_01_H01,Unknown,H18.30.001,22265.0,10x Chromium 3' v3,Adult,Unknown,Unknown,M1,R,F,snRNA,Oligo,Others,AllenM1


In [15]:
adata.obs = adata.obs.set_index('sample_name')

In [16]:
adata.write('anndata/AllenM1.h5ad')