In [1]:
from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
import warnings

warnings.simplefilter('ignore', category=NumbaDeprecationWarning)
warnings.simplefilter('ignore', category=NumbaPendingDeprecationWarning)

In [3]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
from anndata import AnnData
import copy
import matplotlib.pyplot as plt

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

scanpy==1.9.3 anndata==0.9.1 umap==0.5.3 numpy==1.24.3 scipy==1.10.1 pandas==2.0.2 scikit-learn==1.2.2 statsmodels==0.14.0 python-igraph==0.10.4 louvain==0.8.0 pynndescent==0.5.10


# Read WT P7 Data

In [4]:
adata_p7_1 = sc.read_mtx('/path/to/P7/S1/matrix.mtx.gz')
adata_p7_1 = adata_p7_1.transpose()

In [6]:
adata_p7_2 = sc.read_mtx('/path/to/P7/S2/matrix.mtx.gz')
adata_p7_2 = adata_p7_2.transpose()

In [8]:
adata_p7_3 = sc.read_mtx('/path/to/P7/S3/matrix.mtx.gz')
adata_p7_3 = adata_p7_3.transpose()

In [10]:
adata_p7_1.obs_names = pd.read_csv('/path/to/P7/S1/barcodes.tsv.gz', sep='\t', names=['Barcode'])['Barcode'].values
adata_p7_1.var_names = pd.read_csv('/path/to/P7/S1/features.tsv.gz', sep='\t', names=['Ensembl','Name','Gene Expression'])['Name'].values

In [11]:
adata_p7_2.obs_names = pd.read_csv('/path/to/P7/S2/barcodes.tsv.gz', sep='\t', names=['Barcode'])['Barcode'].values
adata_p7_2.var_names = pd.read_csv('/path/to/P7/S2/features.tsv.gz', sep='\t', names=['Ensembl','Name','Gene Expression'])['Name'].values

In [12]:
adata_p7_3.obs_names = pd.read_csv('/path/to/P7/S3/barcodes.tsv.gz', sep='\t', names=['Barcode'])['Barcode'].values
adata_p7_3.var_names = pd.read_csv('/path/to/P7/S3/features.tsv.gz', sep='\t', names=['Ensembl','Name','Gene Expression'])['Name'].values

In [14]:
adata_p7_1.var_names_make_unique()
adata_p7_2.var_names_make_unique()
adata_p7_3.var_names_make_unique()

In [45]:
new_cells_1 = []
for i in adata_p7_1.obs.index:
    new_cells_1.append(i + '_1')
adata_p7_1.obs.index = new_cells_1

new_cells_2 = []
for i in adata_p7_2.obs.index:
    new_cells_2.append(i + '_2')
adata_p7_2.obs.index = new_cells_2

new_cells_3 = []
for i in adata_p7_3.obs.index:
    new_cells_3.append(i + '_3')
adata_p7_3.obs.index = new_cells_3

In [46]:
adata_p7 = ad.concat([adata_p7_1, adata_p7_2, adata_p7_3], label='Batch', keys=['Batch1', 'Batch2', 'Batch3'])

In [47]:
adata_p7.write('path/to/adatas/P7_adata_raw.h5ad')

# Read β2KO P7 Data

In [48]:
adata_p7b2_1 = sc.read_mtx('/path/to/P7b2/S1/matrix.mtx.gz')
adata_p7b2_1 = adata_p7b2_1.transpose()

In [52]:
adata_p7b2_2 = sc.read_mtx('/path/to/P7b2/S2/matrix.mtx.gz')
adata_p7b2_2 = adata_p7b2_2.transpose()

In [50]:
adata_p7b2_1_2 = sc.read_mtx('/path/to/P7b2/S1_2/matrix.mtx.gz')
adata_p7b2_1_2 = adata_p7b2_1_2.transpose()

In [54]:
adata_p7b2_2_2 = sc.read_mtx('/path/to/P7b2/S2_2/matrix.mtx.gz')
adata_p7b2_2_2 = adata_p7b2_2_2.transpose()

In [56]:
adata_p7b2_1.obs_names = pd.read_csv('/path/to/P7b2/S1/barcodes.tsv.gz', sep='\t', names=['Barcode'])['Barcode'].values
adata_p7b2_1.var_names = pd.read_csv('/path/to/P7b2/S1/features.tsv.gz', sep='\t', names=['Ensembl','Name','Gene Expression'])['Name'].values

In [58]:
adata_p7b2_2.obs_names = pd.read_csv('/path/to/P7b2/S2/barcodes.tsv.gz', sep='\t', names=['Barcode'])['Barcode'].values
adata_p7b2_2.var_names = pd.read_csv('/path/to/P7b2/S2/features.tsv.gz', sep='\t', names=['Ensembl','Name','Gene Expression'])['Name'].values

In [57]:
adata_p7b2_1_2.obs_names = pd.read_csv('/path/to/P7b2/S1_2/barcodes.tsv.gz', sep='\t', names=['Barcode'])['Barcode'].values
adata_p7b2_1_2.var_names = pd.read_csv('/path/to/P7b2/S1_2/features.tsv.gz', sep='\t', names=['Ensembl','Name','Gene Expression'])['Name'].values

In [59]:
adata_p7b2_2_2.obs_names = pd.read_csv('/path/to/P7b2/S2_2/barcodes.tsv.gz', sep='\t', names=['Barcode'])['Barcode'].values
adata_p7b2_2_2.var_names = pd.read_csv('/path/to/P7b2/S2_2/features.tsv.gz', sep='\t', names=['Ensembl','Name','Gene Expression'])['Name'].values

In [60]:
adata_p7b2_1.var_names_make_unique()
adata_p7b2_2.var_names_make_unique()
adata_p7b2_1_2.var_names_make_unique()
adata_p7b2_2_2.var_names_make_unique()

In [61]:
new_cells_1 = []
for i in adata_p7b2_1.obs.index:
    new_cells_1.append(i + '_1')
adata_p7b2_1.obs.index = new_cells_1

new_cells_2 = []
for i in adata_p7b2_2.obs.index:
    new_cells_2.append(i + '_2')
adata_p7b2_2.obs.index = new_cells_2

new_cells_1_2 = []
for i in adata_p7b2_1_2.obs.index:
    new_cells_1_2.append(i + '_1_2')
adata_p7b2_1_2.obs.index = new_cells_1_2

new_cells_2_2 = []
for i in adata_p7b2_2_2.obs.index:
    new_cells_2_2.append(i + '_2_2')
adata_p7b2_2_2.obs.index = new_cells_2_2

In [62]:
adata_p7b2 = ad.concat([adata_p7b2_1, adata_p7b2_2, adata_p7b2_1_2, adata_p7b2_2_2], label='Batch', keys=['Batch1', 'Batch2', 'Batch1_2', 'Batch2_2'])

In [63]:
adata_p7b2.write('path/to/adatas/P7b2_adata_raw.h5ad')

# Read P5 Data

In [32]:
adata_p5 = sc.read_mtx('path/to/P5/P5counts.mtx')

In [34]:
adata_p5.var_names = pd.read_csv('path/to/P5/P5genes.csv')['x'].values
adata_p5.obs_names = pd.read_csv('path/to/P5/P5cells.csv')['x'].values

In [35]:
adata_p5.obs['Batch'] = pd.Series(pd.read_csv('path/to/P5/P5batch.csv')['x'].values, dtype='category').values

In [42]:
adata_p5.write('path/to/adatas/P5_adata_raw.h5ad')

# Read P56 Data

In [37]:
adata_p56 = sc.read_mtx('path/to/P56/atlas.mtx')

In [38]:
adata_p56 = adata_p56.transpose()

In [39]:
adata_p56.var_names = pd.read_csv('path/to/P56/atlas_vars.csv')['x'].values
adata_p56.obs_names = pd.read_csv('path/to/P56/atlas_obs.csv')['x'].values

In [40]:
adata_p56.obs['Batch'] = pd.Series(pd.read_csv('path/to/P56/atlas_batch.csv')['x'].values, dtype='category').values

In [43]:
adata_p56.write('path/to/adatas/P56_adata_raw.h5ad')