# Metadata integration across all datasets

This notebook assembles all metadata for Supplementary Table on sample information from the MERFISH datasets generated in this project (coronal sections for aging, sagittal sections for aging, coronal sections for exercise, coronal sections for partial reprogramming).

**Associated manuscript figures/tables:**
- Supplementary Table 4

**Inputs required:**
- `data/integrated_aging_coronal_celltyped_regioned_raw.h5ad` - AnnData object for coronal sections dataset
- `data/integrated_aging_sagittal_clustered_registered_raw.h5ad` - AnnData object for sagittal sections dataset
- `data/integrated_exercise_coronal_celltyped_regioned_raw.h5ad` - AnnData object for exercise dataset
- `data/integrated_reprogramming_coronal_celltyped_regioned_raw.h5ad` - AnnData object for reprogramming dataset

**Conda environment used**: `requirements/merfish.txt`

In [1]:
import scanpy as sc
import pandas as pd
import os
import numpy as np

In [2]:
cols = ["mouse_id", "age", "slide_id", "cohort"]
rows = []

In [3]:
# Coronal aging
adata = sc.read_h5ad("data/integrated_aging_coronal_celltyped_regioned_raw.h5ad")

In [4]:
for mid in np.unique(adata.obs.mouse_id):
    sub_adata = adata[adata.obs.mouse_id==mid]
    row = []
    for col in cols:
        row.append(sub_adata.obs[col].values[0])
    row.append(sub_adata.shape[1])
    row.append(sub_adata.shape[0])
    rows.append(row)

In [5]:
# Sagittal aging
adata = sc.read_h5ad("data/integrated_aging_sagittal_clustered_registered.h5ad")

In [6]:
for mid in np.unique(adata.obs.mouse_id):
    sub_adata = adata[adata.obs.mouse_id==mid]
    row = []
    for col in cols:
        row.append(sub_adata.obs[col].values[0])
    row.append(sub_adata.shape[1])
    row.append(sub_adata.shape[0])
    rows.append(row)

In [7]:
# Exercise
adata = sc.read_h5ad("data/integrated_exercise_coronal_celltyped_regioned_raw.h5ad")

In [8]:
for mid in np.unique(adata.obs.mouse_id):
    sub_adata = adata[adata.obs.mouse_id==mid]
    row = []
    for col in cols:
        row.append(sub_adata.obs[col].values[0])
    row.append(sub_adata.shape[1])
    row.append(sub_adata.shape[0])
    rows.append(row)

In [9]:
# Reprogramming
adata = sc.read_h5ad("data/integrated_reprogramming_coronal_clustered.h5ad")

In [10]:
for mid in np.unique(adata.obs.mouse_id):
    sub_adata = adata[adata.obs.mouse_id==mid]
    row = []
    for col in cols:
        row.append(sub_adata.obs[col].values[0])
    row.append(sub_adata.shape[1])
    row.append(sub_adata.shape[0])
    rows.append(row)

In [13]:
df = pd.DataFrame(rows, columns=cols+["num_genes","num_cells"])
df.to_csv("supp_tables/TableS4_SampleInformation.csv", index=None)