In [None]:
from pathlib import Path
import scanpy as sc
import os, warnings 
warnings.filterwarnings('ignore') 

In [None]:
path = Path('data')
if not os.path.exists(path):
    os.makedirs(path)

## 10x Genomics Data

### Lymph node

In [None]:
adata = sc.datasets.visium_sge(sample_id="V1_Human_Lymph_Node")

### Glioblastoma

In [None]:
adata = sc.datasets.visium_sge(sample_id="Parent_Visium_Human_Glioblastoma")

### CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer

In [None]:
path = Path('data/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer')
if not os.path.exists(path):
    os.makedirs(path)

!curl -o data/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer/filtered_feature_bc_matrix.h5 https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer_filtered_feature_bc_matrix.h5
!curl -o data/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer/spatial.tar.gz https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer_spatial.tar.gz
!tar -xzvf data/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer/spatial.tar.gz -C data/CytAssist_FFPE_Protein_Expression_Human_Breast_Cancer/

### CytAssist_FFPE_Protein_Expression_Human_Tonsil

In [None]:
path = Path('data/CytAssist_FFPE_Protein_Expression_Human_Tonsil')
if not os.path.exists(path):
    os.makedirs(path)

!curl -o data/CytAssist_FFPE_Protein_Expression_Human_Tonsil/filtered_feature_bc_matrix.h5 https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Protein_Expression_Human_Tonsil/CytAssist_FFPE_Protein_Expression_Human_Tonsil_filtered_feature_bc_matrix.h5
!curl -o data/CytAssist_FFPE_Protein_Expression_Human_Tonsil/spatial.tar.gz https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Protein_Expression_Human_Tonsil/CytAssist_FFPE_Protein_Expression_Human_Tonsil_spatial.tar.gz
!tar -xzvf data/CytAssist_FFPE_Protein_Expression_Human_Tonsil/spatial.tar.gz -C data/CytAssist_FFPE_Protein_Expression_Human_Tonsil/

In [None]:
path = Path('data/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns')
if not os.path.exists(path):
    os.makedirs(path)

!curl -o data/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns/filtered_feature_bc_matrix.h5 https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns_filtered_feature_bc_matrix.h5
!curl -o data/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns/spatial.tar.gz https://cf.10xgenomics.com/samples/spatial-exp/2.1.0/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns_spatial.tar.gz
!tar -xzvf data/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns/spatial.tar.gz -C data/CytAssist_FFPE_Protein_Expression_Human_Tonsil_AddOns/

## Breast Cancer Data (Wu et al.)

In [3]:
path = Path('data/Breast_Wu')
if not os.path.exists(path):
    os.makedirs(path)

cwd = os.getcwd() # record the current working directory
os.chdir(path) # change the working directory for consicer commands below

### scRNA-seq

In [None]:
!curl -O -J https://ftp.ncbi.nlm.nih.gov/geo/series/GSE176nnn/GSE176078/suppl/GSE176078_Wu_etal_2021_BRCA_scRNASeq.tar.gz
!tar -xzvf data/Breast_Wu/scRNASeq.tar.gz

In [None]:
adata_scRNA = sc.read_mtx("count_matrix_sparse.mtx").transpose()
adata_scRNA.obs = pd.read_csv("metadata.csv", index_col=0)
adata_scRNA.var_names = pd.read_csv("count_matrix_genes.tsv", index_col=0, header=None).index.to_list()
adata_scRNA.write_h5ad("scRNA.h5ad")

### ST

In [None]:
!curl -L -O "https://zenodo.org/record/4739739/files/spatial.tar.gz?download=1"
!curl -L -O "https://zenodo.org/record/4739739/files/metadata.tar.gz?download=1"
!curl -L -O "https://zenodo.org/record/4739739/files/filtered_count_matrices.tar.gz?download=1"

In [4]:
!tar -xzvf metadata.tar.gz
!tar -xzvf filtered_count_matrices.tar.gz
!tar -xzvf spatial.tar.gz

tar: Error opening archive: Failed to open 'metadata.tar.gz'
tar: Error opening archive: Failed to open 'filtered_count_matrices.tar.gz'
tar: Error opening archive: Failed to open 'spatial.tar.gz'


### Process ST

In [6]:
from matplotlib.image import imread
from glob import glob
import json

for sample_id in ["1142243F", "1160920F", "CID4290", "CID4535", "CID4465", "CID44971"]:
    for f in glob(f"filtered_count_matrices/{sample_id}_filtered_count_matrix/*.gz"):
        g=f[0:-3]
        !mv $f $g
    adata=sc.read_mtx(f"filtered_count_matrices/{sample_id}_filtered_count_matrix/matrix.mtx").T
    with open(f"filtered_count_matrices/{sample_id}_filtered_count_matrix/barcodes.tsv", "r") as f:
        adata.obs_names=f.read().split("\n")[0:-1]
    with open(f"filtered_count_matrices/{sample_id}_filtered_count_matrix/features.tsv", "r") as f:
        adata.var_names=f.read().split("\n")[0:-1]

    adata.uns["spatial"] = dict()
    adata.uns["spatial"][sample_id] = dict()
    spatial_path='spatial/'+sample_id+'_spatial/'
    files = dict(
        tissue_positions_file=spatial_path+'tissue_positions_list.csv',
        scalefactors_json_file=spatial_path+'scalefactors_json.json',
        hires_image=spatial_path+'tissue_hires_image.png',
        lowres_image=spatial_path+'tissue_lowres_image.png',
    )

    adata.uns["spatial"][sample_id]['images'] = dict()
    for res in ['hires', 'lowres']:
        adata.uns["spatial"][sample_id]['images'][res] = imread(
            str(files[f'{res}_image'])
        )

    # read json scalefactors
    adata.uns["spatial"][sample_id]['scalefactors'] = json.loads(
        Path(files['scalefactors_json_file']).read_bytes()
    )

    # read coordinates
    positions = pd.read_csv(files['tissue_positions_file'], header=None)
    positions.columns = [
        'barcode',
        'in_tissue',
        'array_row',
        'array_col',
        'pxl_col_in_fullres',
        'pxl_row_in_fullres',
    ]
    positions.index = positions['barcode']
    adata.obs = adata.obs.join(positions, how="left")
    adata.obsm['spatial'] = adata.obs[
        ['pxl_row_in_fullres', 'pxl_col_in_fullres']
    ].to_numpy()
    adata.obs.drop(
        columns=['barcode', 'pxl_row_in_fullres', 'pxl_col_in_fullres'],
        inplace=True,
    )
    adata.write_h5ad(f"{sample_id}.h5ad")

NameError: name 'imread' is not defined