In [1]:
import scanpy as sc
import os,sys,glob
import pandas as pd
import numpy as np
import anndata as ad
import matplotlib.pyplot as plt

import seaborn as sns
from matplotlib.colors import ListedColormap

In [2]:
sc.settings.n_jobs = 96
sc.set_figure_params(figsize=(4,4), vector_friendly = True)
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

In [3]:
import scrublet as scr
import multiprocessing

# Data processing

In [14]:
adata = sc.read("Data/Input_230330_adata_outer_minGenes_50.h5ad") # 3m

In [None]:
adata

AnnData object with n_obs × n_vars = 1918696 × 49133
    obs: 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Hemisphere', 'Library', 'Brain_Region', 'Dataset', 'Sex', 'Diagnosis', 'DF_classification', 'cluster_original', 'cluster_main', 'n_genes'

In [None]:
# Update stage
# https://www.nature.com/articles/nature10523/tables/1
adata.obs['Stage2'] = 'Unknown'

# Set the value of 'Stage2' to 'First trimester' if 'Age' is between 7 and 30
adata.obs.loc[(adata.obs['Age'] <= 7 * 14), 'Stage2'] = 'Fetal (1st trimester)'
adata.obs.loc[(adata.obs['Age'] > 7 * 14) & (adata.obs['Age'] <= 7 * 24), 'Stage2'] = 'Fetal (2nd trimester)'
adata.obs.loc[(adata.obs['Age'] > 7 * 24) & (adata.obs['Age'] <= 365), 'Stage2'] = 'Fetal (3rd trimester)'
adata.obs.loc[(adata.obs['Age'] > 365) & (adata.obs['Age'] <= 545), 'Stage2'] = 'Neonatal'
adata.obs.loc[(adata.obs['Age'] > 545) & (adata.obs['Age'] <= 2555), 'Stage2'] = 'Childhood (1-6Y)'
adata.obs.loc[(adata.obs['Age'] > 2555) & (adata.obs['Age'] <= 4745), 'Stage2'] = 'Childhood (6-12Y)'
adata.obs.loc[(adata.obs['Age'] > 4745) & (adata.obs['Age'] <= 7665), 'Stage2'] = 'Adolescence (12-20Y)'
adata.obs.loc[(adata.obs['Age'] >= 7665) & (adata.obs['Age'] <= 14965), 'Stage2'] = 'Adult (20-40Y)'
adata.obs.loc[(adata.obs['Age'] > 14965) & (adata.obs['Age'] <= 22265), 'Stage2'] = 'Adult (40-60Y)'
adata.obs.loc[(adata.obs['Age'] > 22265) & (adata.obs['Age'] <= 29565), 'Stage2'] = 'Adult (60-80Y)'
adata.obs.loc[(adata.obs['Age'] > 29565), 'Stage2'] = 'Adult (>80Y)'

In [None]:
adata.obs[["Dataset", "Stage2"]].value_counts()

Dataset           Stage2               
Braun             Fetal (1st trimester)    499780
ROSMAP2           Adult (>80Y)             420229
Cao               Fetal (2nd trimester)    214395
Turecki           Adult (20-40Y)            78886
AllenM1           Adult (40-60Y)            76533
Cao               Fetal (1st trimester)     69930
Eze               Fetal (2nd trimester)     60945
Trevino           Fetal (2nd trimester)     57868
ZhangPD           Adult (60-80Y)            54843
ROSMAP2           Adult (60-80Y)            52010
AllenMultiCortex  Adult (40-60Y)            49417
Morabito          Adult (>80Y)              44596
Herring           Neonatal                  38204
                  Childhood (1-6Y)          31252
                  Adult (20-40Y)            24564
ZhangPD           Adult (>80Y)              23140
Herring           Fetal (2nd trimester)     19832
Cameron           Fetal (2nd trimester)     18091
Eze               Fetal (1st trimester)     17494
Morabito  

In [None]:
adata = adata[adata.obs["Dataset"] != "Eze"]
adata = adata[adata.obs["Dataset"] != "Trevino"]

In [None]:
adata.obs["Diagnosis"].value_counts()

nan          910055
normal       275029
AD_dcfdx1    166415
AD_dcfdx4    151202
AD_dcfdx2    121484
MDD           43674
PD            42716
AD            38676
AD_dcfdx5     19671
AD_dcfdx6      9233
AD_dcfdx3      4234
Name: Diagnosis, dtype: int64

In [None]:
adata = adata[adata.obs["Diagnosis"].isin(["nan", "normal"])]

In [None]:
adata.obs[["Diagnosis", "Dataset"]].value_counts()

Diagnosis  Dataset         
nan        Braun               499780
           Cao                 284325
normal     Herring             149035
nan        AllenM1              76533
           AllenMultiCortex     49417
normal     ZhangPD              35267
           Turecki              35212
           Morabito             22796
           Cameron              18091
           Hardwick             14628
dtype: int64

In [None]:
adata.obs["Assay"].value_counts()

10x Chromium 3' v3    569060
sci-RNA-seq3          284325
10x Chromium 3' v2    282282
SMART-Seq v4           49417
Name: Assay, dtype: int64

In [None]:
adata = adata[adata.obs["Assay"].isin(["10x Chromium 3' v3", "10x Chromium 3' v2"])]

In [None]:
adata

View of AnnData object with n_obs × n_vars = 851342 × 49133
    obs: 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Hemisphere', 'Library', 'Brain_Region', 'Dataset', 'Sex', 'Diagnosis', 'DF_classification', 'cluster_original', 'cluster_main', 'n_genes', 'Stage2'

In [None]:
samples = adata.obs["sampleID"].unique()
samples

['H18.30.001', 'H18.30.002', '10X119_2', '10X132_7', '10X123_4', ..., 'HSDG10HC', 'hsDG101HC', 'hsDG13HC', 'hsDG30HC', 'hsDG99HC']
Length: 406
Categories (406, object): ['H18.30.001', 'H18.30.002', '10X119_2', '10X132_7', ..., 'hsDG101HC', 'hsDG13HC', 'hsDG30HC', 'hsDG99HC']

In [None]:
def process_sample(sample):
    print(sample)
    adata1 = adata[adata.obs["sampleID"] == sample]
    
    # Minimum filtering
    # 500 Genes per cell
    sc.pp.filter_cells(adata1, min_genes=500)
    
    # Mitochondria < 50%
    adata1.var['mt'] = adata1.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
    sc.pp.calculate_qc_metrics(adata1, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True) 
    adata1 = adata1[adata1.obs.pct_counts_mt < 50, :]
    
    # Doublet finding
    scrub = scr.Scrublet(adata1.X)
    doublet_scores, predicted_doublets = scrub.scrub_doublets(verbose=False, n_prin_comps = 10)
    adata1.obs['doublet_scores'] = doublet_scores
    adata1.obs['predicted_doublets'] = predicted_doublets    
    adata1.obs['predicted_doublets'] = adata1.obs['predicted_doublets'].astype(str)

    # doublet_scores < 0.2
    adata1 = adata1[adata1.obs.doublet_scores < 0.2, :]

    return adata1

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
if __name__ == '__main__':
        num_processes = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(processes=num_processes)

        with pool as p:
            list_data = p.map(process_sample, samples)

adata2 = ad.concat(list_data, join="outer") # 15m - 20m

H18.30.001
10X116_110X118_710X165_410X188_310X156_7
10X126_710X186_310X163_410X211_410X116_210X114_410X196_210X170_110X288_410X288_610X132_510X185_2
10X119_210X200_110X198_510X132_8


10X116_410X163_810X178_510X170_2
10X109_4
10X211_110X116_510X123_410X89_5

10X254_2
10X110_6


10X198_4
10X126_2


10X126_1


10X152_8


10X118_110X288_110X164_210X169_810X212_410X163_210X210_310X157_110X148_110X208_810X122_610X102_2
10X102_1
10X177_610X188_110X167_210X122_210X169_710X200_210X212_110X207_210X186_410X187_610X125_310X110_5

10X287_410X177_510X89_410X169_1

10X157_2
10X122_3
10X185_810X112_110X104_7
10X104_310X115_810X147_110X118_810X170_410X125_1

10X156_5
10X152_210X126_410X104_410X207_3
10X124_1

10X89_7




10X123_7



10X123_510X254_8

10X101_210X185_4


10X199_6
10X167_7







10X124_610X169_5
















10X288_5





10X89_2






10X170_3



10X116_6
10X122_1
10X187_5
10X116_7
10X101_3
10X124_8
10X156_8
10X186_1
10X125_5
10X92_3
10X119_7
10X165_1
10X92_4
10X132_7
10X148_2
10X92

In [None]:
adata

View of AnnData object with n_obs × n_vars = 851342 × 49133
    obs: 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Hemisphere', 'Library', 'Brain_Region', 'Dataset', 'Sex', 'Diagnosis', 'DF_classification', 'cluster_original', 'cluster_main', 'n_genes', 'Stage2'

In [None]:
adata2

AnnData object with n_obs × n_vars = 806217 × 49133
    obs: 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Hemisphere', 'Library', 'Brain_Region', 'Dataset', 'Sex', 'Diagnosis', 'DF_classification', 'cluster_original', 'cluster_main', 'n_genes', 'Stage2', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_scores', 'predicted_doublets'

In [None]:
adata2

AnnData object with n_obs × n_vars = 806217 × 49133
    obs: 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Hemisphere', 'Library', 'Brain_Region', 'Dataset', 'Sex', 'Diagnosis', 'DF_classification', 'cluster_original', 'cluster_main', 'n_genes', 'Stage2', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_scores', 'predicted_doublets'
    uns: 'log1p'
    layers: 'counts', 'logcounts', 'scaled'

In [44]:
adata2.obs["Dataset"].value_counts()

Braun       481261
Herring     143846
AllenM1      74482
ZhangPD      33103
Turecki      22124
Morabito     20720
Cameron      17033
Hardwick     13648
Name: Dataset, dtype: int64

# Balancing

In [7]:
adata.obs[["Stage2"]].value_counts()

Stage2               
Fetal (1st trimester)    481261
Adult (40-60Y)            74482
Adult (60-80Y)            55502
Adult (20-40Y)            45656
Neonatal                  37254
Fetal (2nd trimester)     35183
Childhood (1-6Y)          30726
Adolescence (12-20Y)      14010
Childhood (6-12Y)         13739
Adult (>80Y)              11969
Fetal (3rd trimester)      6435
dtype: int64

In [9]:
adata.obs[["Brain_Region"]].value_counts()

Brain_Region       
BA9                    155743
M1                      74482
Forebrain               61723
Midbrain                56977
Cortex                  42399
Cerebellum              41211
BA8                     37888
Medulla                 37103
Thalamus                31783
Subcortex               28994
Striatum                27635
Pons                    27504
Brain                   24783
prefrontal cortex       20720
Hindbrain               18173
Hypothalamus            13374
Midbrain dorsal         12421
BA10                    11211
Diencephalon            10032
Frontal Cortex           9396
BA46                     7879
Ganglionic Eminence      7637
Cortex frontal           6437
Cortex temporal          5218
Midbrain ventral         4879
Hippocampus              4471
Cortex occipital         3874
Cortex parietal          3815
Cortex hemisphere B      3767
Caudate+Putamen          3741
Cortex entorhinal        3108
Telencephalon            2038
Cortex hemisphere A 

In [10]:
adata_sub = adata[adata.obs["Dataset"] == "Braun"].copy()

In [14]:
braun_sample = adata_sub.obs["sampleID"].unique()
braun_sample

['10X119_2', '10X132_7', '10X123_4', '10X124_8', '10X211_4', ..., '10X210_7', '10X302_3', '10X252_4', '10X211_5', '10X209_2']
Length: 340
Categories (340, object): ['10X119_2', '10X132_7', '10X123_4', '10X124_8', ..., '10X302_3', '10X252_4', '10X211_5', '10X209_2']

In [22]:
import random
random.seed(0)
# Create an empty list to store the randomly chosen values
random_selection = []

# Randomly choose 100 values and add them to the list
for _ in range(50):
    random_value = random.choice(braun_sample)
    random_selection.append(random_value)

# Print the randomly selected values
random_selection

['10X188_2',
 '10X168_5',
 '10X163_4',
 '10X169_1',
 '10X168_2',
 '10X254_4',
 '10X110_4',
 '10X99_5',
 '10X252_5',
 '10X119_5',
 '10X187_2',
 '10X124_3',
 '10X252_3',
 '10X101_7',
 '10X185_8',
 '10X101_7',
 '10X288_6',
 '10X198_3',
 '10X287_4',
 '10X169_2',
 '10X156_2',
 '10X99_6',
 '10X89_2',
 '10X178_5',
 '10X115_7',
 '10X187_5',
 '10X185_6',
 '10X115_5',
 '10X255_2',
 '10X101_3',
 '10X287_5',
 '10X122_1',
 '10X212_5',
 '10X302_1',
 '10X102_1',
 '10X177_4',
 '10X252_5',
 '10X152_5',
 '10X199_2',
 '10X124_5',
 '10X212_6',
 '10X167_8',
 '10X116_6',
 '10X168_1',
 '10X213_1',
 '10X125_2',
 '10X119_2',
 '10X199_8',
 '10X152_7',
 '10X152_2']

In [23]:
adata_sub = adata_sub[adata_sub.obs["sampleID"].isin(random_selection)]
adata_sub

View of AnnData object with n_obs × n_vars = 68108 × 49133
    obs: 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Hemisphere', 'Library', 'Brain_Region', 'Dataset', 'Sex', 'Diagnosis', 'DF_classification', 'cluster_original', 'cluster_main', 'n_genes', 'Stage2', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_scores', 'predicted_doublets'

In [11]:
adata = adata[adata.obs["Dataset"] != "Braun"]

In [24]:
list_data = [adata_sub, adata]

In [25]:
adata2 = ad.concat(list_data, join="outer") # 15m - 20m

In [26]:
adata2

AnnData object with n_obs × n_vars = 393064 × 49133
    obs: 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Hemisphere', 'Library', 'Brain_Region', 'Dataset', 'Sex', 'Diagnosis', 'DF_classification', 'cluster_original', 'cluster_main', 'n_genes', 'Stage2', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_scores', 'predicted_doublets'

# Metadata

In [5]:
adata2

AnnData object with n_obs × n_vars = 393064 × 49133
    obs: 'batch', 'sampleID', 'Age', 'Assay', 'Stage', 'Race', 'PMI', 'Hemisphere', 'Library', 'Brain_Region', 'Dataset', 'Sex', 'Diagnosis', 'DF_classification', 'cluster_original', 'cluster_main', 'n_genes', 'Stage2', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_scores', 'predicted_doublets'

In [6]:
adata2.layers["counts"] = adata2.X.copy()
sc.pp.normalize_total(adata2)
sc.pp.log1p(adata2)
adata2.layers["logcounts"] = adata2.X.copy()

In [7]:
np.max(adata2.layers["counts"])

17016.0

In [8]:
adata2.layers["scaled"] = sc.pp.scale(adata2, copy = True).X

In [None]:
# Group your AnnData object by the "Dataset" column
grouped = adata.obs.groupby("Dataset")

# Initialize an empty dictionary to store the maximum values for each group
max_values_by_group = {}

# Loop through each group and calculate the maximum value for the gene "ESR1" in the "scaled" layer
for group, group_data in grouped:
    # Extract the subset of AnnData for the current group
    subset_adata = adata[group_data.index]
    
    # Find the maximum value for the gene "ESR1" in the "scaled" layer
    max_value = np.max(subset_adata.layers["counts"][:, subset_adata.var_names == "ESR1"])
    
    # Store the result in the max_values_by_group dictionary with the group name as the key
    max_values_by_group[group] = max_value

# The max_values_by_group dictionary now contains the maximum values for each group
print(max_values_by_group)

{'AllenM1': 15.0, 'Braun': 75.0, 'Cameron': 4.0, 'Hardwick': 10.0, 'Herring': 32.0, 'Morabito': 17.0, 'Trevino': 1.0, 'Turecki': 5.0, 'ZhangPD': 44.0}


In [None]:
# Group your AnnData object by the "Dataset" column
grouped = adata.obs.groupby("Dataset")

# Initialize an empty dictionary to store the maximum values for each group
max_values_by_group = {}

# Loop through each group and calculate the maximum value for the gene "ESR1" in the "scaled" layer
for group, group_data in grouped:
    # Extract the subset of AnnData for the current group
    subset_adata = adata[group_data.index]
    
    # Find the maximum value for the gene "ESR2" in the "scaled" layer
    max_value = np.max(subset_adata.layers["counts"][:, subset_adata.var_names == "ESR2"])
    
    # Store the result in the max_values_by_group dictionary with the group name as the key
    max_values_by_group[group] = max_value

# The max_values_by_group dictionary now contains the maximum values for each group
print(max_values_by_group)

{'AllenM1': 10.0, 'Braun': 56.0, 'Cameron': 15.0, 'Hardwick': 6.0, 'Herring': 11.0, 'Morabito': 13.0, 'Trevino': 3.0, 'Turecki': 5.0, 'ZhangPD': 10.0}


In [9]:
region_mapping = {
    "M1"                     :  "Forebrain",
    "Midbrain"               :  "Midbrain",
    "Forebrain"              :  "Forebrain",
    "Pons"                   :  "Hindbrain",
    "Medulla"                :  "Hindbrain",
    "Subcortex"              :  "Uncategorized",
    "Head"                   :  "Uncategorized",
    "Hindbrain"              :  "Hindbrain",
    "Cerebellum"             :  "Hindbrain",
    "Striatum"               :  "Forebrain",
    "Midbrain dorsal"        :  "Midbrain",
    "Cortex"                 :  "Forebrain",
    "Diencephalon"           :  "Forebrain",
    "Brain"                  :  "Uncategorized",
    "Hippocampus"            :  "Forebrain",
    "Thalamus"               :  "Forebrain",
    "Cortex temporal"        :  "Forebrain",
    "Cortex parietal"        :  "Forebrain",
    "Caudate+Putamen"        :  "Forebrain",
    "Hypothalamus"           :  "Forebrain",
    "Cortex frontal"         :  "Forebrain",
    "Cortical hem"           :  "Uncategorized",
    "Midbrain ventral"       :  "Midbrain",
    "Telencephalon"          :  "Forebrain",
    "Cortex occipital"       :  "Forebrain",
    "Cortex entorhinal"      :  "Forebrain",
    "Cortex hemisphere B"    :  "Forebrain",
    "Cortex hemisphere A"    :  "Forebrain",
    "Frontal Cortex"         :  "Forebrain",
    "Ganglionic Eminence"    :  "Ganglionic Eminence",
    "BA9"                    :  "Forebrain",
    "BA8"                    :  "Forebrain",
    "BA46"                   :  "Forebrain",
    "BA10"                   :  "Forebrain",
    "prefrontal cortex"      :  "Forebrain",
    "Cerebral cortex"        :  "Forebrain"
}

adata2.obs["Brain_Region_Rough"] = adata2.obs["Brain_Region"].replace(region_mapping)
adata2.obs["Brain_Region_Rough"].value_counts()

Forebrain              349838
Hindbrain               19800
Uncategorized            8978
Ganglionic Eminence      7637
Midbrain                 6811
Name: Brain_Region_Rough, dtype: int64

In [12]:
region_mapping = {
    "M1"                     : 	"Cerebral cortex",
    "Midbrain"               : 	"Midbrain",
    "Forebrain"             : 	"Uncategorized",
    "Pons"                   : 	"Pons",
    "Medulla"                : 	"Medulla",
    "Subcortex"              : 	"Uncategorized",
    "Head"                   : 	"Uncategorized",
    "Hindbrain"              : 	"Uncategorized",
    "Cerebellum"             : 	"Cerebelum",
    "Striatum"               : 	"Cerebral nuclei",
    "Midbrain dorsal"        : 	"Midbrain",
    "Cortex"                 : 	"Cerebral cortex",
    "Diencephalon"           : 	"Uncategorized",
    "Brain"                  : 	"Uncategorized",
    "Hippocampus"            : 	"Hippocampus",
    "Thalamus"               : 	"Thalamus",
    "Cortex temporal"        : 	"Cerebral cortex",
    "Cortex parietal"        : 	"Cerebral cortex",
    "Caudate+Putamen"        : 	"Cerebral nuclei",
    "Hypothalamus"           : 	"Hypothalamus",
    "Cortex frontal"         : 	"Cerebral cortex",
    "Cortical hem"           : 	"Uncategorized",
    "Midbrain ventral"       : 	"Midbrain",
    "Telencephalon"          : 	"Uncategorized",
    "Cortex occipital"       : 	"Cerebral cortex",
    "Cortex entorhinal"      : 	"Cerebral cortex",
    "Cortex hemisphere B"    : 	"Cerebral cortex",
    "Cortex hemisphere A"    : 	"Cerebral cortex",
    "Frontal Cortex"         : 	"Cerebral cortex",
    "Ganglionic Eminence"    : 	"Ganglionic Eminence",
    "BA9"                    : 	"Cerebral cortex",
    "BA8"                    : 	"Cerebral cortex",
    "BA46"                   : 	"Cerebral cortex",
    "BA10"                   : 	"Cerebral cortex",
    "prefrontal cortex"      : 	"Cerebral cortex",
    "Cerebral cortex"        : 	"Cerebral cortex",
}

adata2.obs["Brain_Region_Unit"] = adata2.obs["Brain_Region"].replace(region_mapping)
adata2.obs["Brain_Region_Unit"].value_counts()

Cerebral cortex        328529
Uncategorized           17950
Cerebelum               10288
Thalamus                 9509
Ganglionic Eminence      7637
Midbrain                 6811
Medulla                  5161
Pons                     2382
Hypothalamus             2303
Hippocampus              1596
Cerebral nuclei           898
Name: Brain_Region_Unit, dtype: int64

In [13]:
adata2.obs['Brain_Region2'] = adata2.obs['Brain_Region'].astype(str)

adata2.obs.loc[(adata2.obs['Brain_Region'] == 'A1C'), 'Brain_Region2'] = 'Primary auditory cortex (BA41/42)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'V1C'), 'Brain_Region2'] = 'Primary visual cortex (BA17)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'CgG'), 'Brain_Region2'] = 'Anterior cingulate gyrus (BA24/32/33)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'MTG'), 'Brain_Region2'] = 'Middle temporal gyrus (BA21)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'S1lm'), 'Brain_Region2'] = 'Primary somatosensory cortex, lower limb (BA3)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'S1ul'), 'Brain_Region2'] = 'Primary somatosensory cortex, upper limb (BA3)'

adata2.obs.loc[(adata2.obs['Brain_Region'] == 'M1'), 'Brain_Region2'] = 'Primary motor cortex (BA4)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'M1lm'), 'Brain_Region2'] = 'Primary motor cortex, lower limb (BA4)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'M1ul'), 'Brain_Region2'] = 'Primary motor cortex, upper limb (BA4)'


# DLPFC is attributed anatomically to BA 9 and 46 and BA 8, 9 and 10.
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'BA9'), 'Brain_Region2'] = 'Dorsolateral prefrontal cortex (BA9)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'BA8'), 'Brain_Region2'] = 'Frontal cortex (BA8)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'Cortex frontal'), 'Brain_Region2'] = 'Frontal cortex (BA8)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'Frontal Cortex'), 'Brain_Region2'] = 'Frontal cortex (BA8)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'prefrontal cortex'), 'Brain_Region2'] = 'Dorsolateral prefrontal cortex (BA9)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'BA10'), 'Brain_Region2'] = 'Anterior prefrontal cortex (BA10)'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'BA46'), 'Brain_Region2'] = 'Dorsolateral prefrontal cortex (BA46)'


adata2.obs.loc[(adata2.obs['Brain_Region'] == 'Brain'), 'Brain_Region2'] = 'Fetal Brain'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'striatum'), 'Brain_Region2'] = 'Striatum'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'telencephalon'), 'Brain_Region2'] = 'Telencephalon'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'ventral part of telencephalon'), 'Brain_Region2'] = 'Telencephalon, ventral part'
adata2.obs.loc[(adata2.obs['Brain_Region'] == 'ganglionic eminence'), 'Brain_Region2'] = 'Ganglionic Eminence'

adata2.obs.Brain_Region2.value_counts()

Dorsolateral prefrontal cortex (BA9)     176463
Primary motor cortex (BA4)                74482
Frontal cortex (BA8)                      47284
Anterior prefrontal cortex (BA10)         11211
Cerebellum                                10288
Thalamus                                   9509
Cortex                                     8026
Dorsolateral prefrontal cortex (BA46)      7879
Ganglionic Eminence                        7637
Midbrain                                   6399
Fetal Brain                                5888
Medulla                                    5161
Forebrain                                  4889
Subcortex                                  3090
Pons                                       2382
Hypothalamus                               2303
Cortex temporal                            2228
Diencephalon                               2114
Hindbrain                                  1969
Hippocampus                                1596
Cortex entorhinal                       

In [14]:
#adata2.write_h5ad("Input_230907_adata_minFilt_random_layers.h5ad") # 10m - 20m