# Gouin et al.

空間トランスクリプトーム

Gouin, K.H., Ing, N., Plummer, J.T. et al. An N-Cadherin 2 expressing epithelial cell subpopulation predicts response to surgery, chemotherapy and immunotherapy in bladder cancer. Nat Commun 12, 4906 (2021). https://doi.org/10.1038/s41467-021-25103-7

In [1]:
import os
import sys
import gzip
import numpy as np
import pandas as pd
pd.options.display.max_columns = None

import scanpy as sc
import anndata
import squidpy as sq

## Data download

In [2]:
!mkdir -p Gouin-2021-Nat_Commun-Bladder_cancer/single_cell
!mkdir -p Gouin-2021-Nat_Commun-Bladder_cancer/spatial

### snRNA-seq

Single-nuclei RNA-seq data is reposited in Gene Expression Omnibus with accession number [GSE169379](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE169379)

#### Metadata

In [3]:
!wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE169nnn/GSE169379/matrix/GSE169379_series_matrix.txt.gz -O Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_series_matrix.txt.gz

--2021-12-26 18:51:06--  ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE169nnn/GSE169379/matrix/GSE169379_series_matrix.txt.gz
           => 'Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_series_matrix.txt.gz'
Resolving ftp.ncbi.nlm.nih.gov... 130.14.250.7, 165.112.9.229
Connecting to ftp.ncbi.nlm.nih.gov|130.14.250.7|:21... connected.
Logging in as anonymous ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /geo/series/GSE169nnn/GSE169379/matrix ... done.
==> SIZE GSE169379_series_matrix.txt.gz ... 6404
==> PASV ... done.    ==> RETR GSE169379_series_matrix.txt.gz ... done.
Length: 6404 (6.3K) (unauthoritative)


2021-12-26 18:51:08 (179 KB/s) - 'Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_series_matrix.txt.gz' saved [6404]



In [12]:
n_sample = 30
with gzip.open("Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_series_matrix.txt.gz", "rt") as fi:
    meta = []
    for i,j in enumerate(fi):
        dat = j.strip('\n!').split("\t")
        if len(dat) != n_sample + 1: continue
        dat = [d.strip('"') for d in dat]
        meta.append(dat)
meta = pd.DataFrame(meta).set_index(0).T.set_index("ID_REF").iloc[:,[0, 8, 9]]
meta.columns = ["sample_title", "organism", "tissue"]
meta = meta.assign(sample_id = lambda x: [y.split(": ")[0] for y in x.sample_title],
                   donor_id = lambda x: [y.split(": ")[1] for y in x.sample_title],
                   tissue = lambda x: [y.split(": ")[1] for y in x.tissue])
meta


Unnamed: 0_level_0,sample_title,organism,tissue,sample_id,donor_id
ID_REF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
GSM5199001,B1246-GEX: MIBC_rxn1246,Homo sapiens,muscle-invasive bladder cancer (MIBC),B1246-GEX,MIBC_rxn1246
GSM5199003,B1246-HTO: MIBC_rxn1246,Homo sapiens,muscle-invasive bladder cancer (MIBC),B1246-HTO,MIBC_rxn1246
GSM5199004,B1-GEX: MIBC_rxn1,Homo sapiens,muscle-invasive bladder cancer (MIBC),B1-GEX,MIBC_rxn1
GSM5199005,B1-HTO: MIBC_rxn1,Homo sapiens,muscle-invasive bladder cancer (MIBC),B1-HTO,MIBC_rxn1
GSM5199006,B2-GEX: MIBC_rxn2,Homo sapiens,muscle-invasive bladder cancer (MIBC),B2-GEX,MIBC_rxn2
GSM5199007,B2-HTO: MIBC_rxn2,Homo sapiens,muscle-invasive bladder cancer (MIBC),B2-HTO,MIBC_rxn2
GSM5199008,B3-GEX: MIBC_rxn3,Homo sapiens,muscle-invasive bladder cancer (MIBC),B3-GEX,MIBC_rxn3
GSM5199009,B3-HTO: MIBC_rxn3,Homo sapiens,muscle-invasive bladder cancer (MIBC),B3-HTO,MIBC_rxn3
GSM5199011,B4-GEX: MIBC_rxn4,Homo sapiens,muscle-invasive bladder cancer (MIBC),B4-GEX,MIBC_rxn4
GSM5199012,B4-HTO: MIBC_rxn4,Homo sapiens,muscle-invasive bladder cancer (MIBC),B4-HTO,MIBC_rxn4


#### Expression

In [13]:
!wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE169nnn/GSE169379/suppl/GSE169379_MIBC_snSeq.h5ad.gz -O Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_MIBC_snSeq.h5ad.gz
!gunzip Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_MIBC_snSeq.h5ad.gz

--2021-12-26 18:59:04--  ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE169nnn/GSE169379/suppl/GSE169379_MIBC_snSeq.h5ad.gz
           => 'GSE169379_MIBC_snSeq.h5ad.gz'
Resolving ftp.ncbi.nlm.nih.gov... 130.14.250.13, 165.112.9.229
Connecting to ftp.ncbi.nlm.nih.gov|130.14.250.13|:21... connected.
Logging in as anonymous ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /geo/series/GSE169nnn/GSE169379/suppl ... done.
==> SIZE GSE169379_MIBC_snSeq.h5ad.gz ... 473158739
==> PASV ... done.    ==> RETR GSE169379_MIBC_snSeq.h5ad.gz ... done.
Length: 473158739 (451M) (unauthoritative)


2021-12-26 19:00:31 (5.33 MB/s) - 'GSE169379_MIBC_snSeq.h5ad.gz' saved [473158739]



In [23]:
!wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE169nnn/GSE169379/suppl/GSE169379_non_tumor_snSeq.h5ad.gz -O Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_non_tumor_snSeq.h5ad.gz
!gunzip Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_non_tumor_snSeq.h5ad.gz

--2021-12-26 19:16:36--  ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE169nnn/GSE169379/suppl/GSE169379_non_tumor_snSeq.h5ad.gz
           => 'Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_non_tumor_snSeq.h5ad.gz'
Resolving ftp.ncbi.nlm.nih.gov... 130.14.250.13, 165.112.9.229
Connecting to ftp.ncbi.nlm.nih.gov|130.14.250.13|:21... connected.
Logging in as anonymous ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /geo/series/GSE169nnn/GSE169379/suppl ... done.
==> SIZE GSE169379_non_tumor_snSeq.h5ad.gz ... 87784694
==> PASV ... done.    ==> RETR GSE169379_non_tumor_snSeq.h5ad.gz ... done.
Length: 87784694 (84M) (unauthoritative)


2021-12-26 19:16:56 (4.66 MB/s) - 'Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_non_tumor_snSeq.h5ad.gz' saved [87784694]



In [None]:
adata = sc.read_h5ad("Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_MIBC_snSeq.h5ad")
adata

In [None]:
adata = sc.read_h5ad("Gouin-2021-Nat_Commun-Bladder_cancer/single_cell/GSE169379_non_tumor_snSeq.h5ad")
adata

### Spatial transcriptomics

Spatial transcriptome data is reposited in Gene Expression Omnibus with accession number [GSE171351](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE171351)

In [27]:
!wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE171nnn/GSE171351/suppl/GSE171351_combined_visium.h5ad.gz -O Gouin-2021-Nat_Commun-Bladder_cancer/spatial/GSE171351_combined_visium.h5ad.gz
!gunzip Gouin-2021-Nat_Commun-Bladder_cancer/spatial/GSE171351_combined_visium.h5ad.gz

--2021-12-26 19:23:01--  ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE171nnn/GSE171351/suppl/GSE171351_combined_visium.h5ad.gz
           => 'Gouin-2021-Nat_Commun-Bladder_cancer/spatial/GSE171351_combined_visium.h5ad.gz'
Resolving ftp.ncbi.nlm.nih.gov... 165.112.9.229, 130.14.250.13
Connecting to ftp.ncbi.nlm.nih.gov|165.112.9.229|:21... connected.
Logging in as anonymous ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /geo/series/GSE171nnn/GSE171351/suppl ... done.
==> SIZE GSE171351_combined_visium.h5ad.gz ... 48963566
==> PASV ... done.    ==> RETR GSE171351_combined_visium.h5ad.gz ... done.
Length: 48963566 (47M) (unauthoritative)


2021-12-26 19:23:13 (4.92 MB/s) - 'Gouin-2021-Nat_Commun-Bladder_cancer/spatial/GSE171351_combined_visium.h5ad.gz' saved [48963566]



## Analysis

In [2]:
adata = sc.read_h5ad("Gouin-2021-Nat_Commun-Bladder_cancer/spatial/GSE171351_combined_visium.h5ad")
adata

AnnData object with n_obs × n_vars = 4086 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sampleID', 'Patient', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'n_counts'
    var: 'gene_ids', 'feature_types', 'genome', 'mt', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    uns: 'spatial'
    obsm: 'spatial'

In [34]:
adata.obs

Unnamed: 0,in_tissue,array_row,array_col,sampleID,Patient,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,total_counts_mt,log1p_total_counts_mt,pct_counts_mt,n_counts
AAACCGTTCGTCCAGG-1-0-0-0,1,52,42,A1,Bladder1204,1221,7.108244,1696.0,7.436617,18.396226,26.356132,38.148585,57.488208,48.0,3.891820,2.830189,1696.0
AAACGAGACGGTTGAT-1-0-0-0,1,35,79,A1,Bladder1204,3257,8.088869,6578.0,8.791638,16.950441,23.532989,32.335056,47.978109,113.0,4.736198,1.717847,6578.0
AAACTGCTGGCTCCAA-1-0-0-0,1,45,67,A1,Bladder1204,3385,8.127405,6858.0,8.833317,19.072616,25.590551,33.712453,48.235637,159.0,5.075174,2.318460,6858.0
AAAGGCTACGGACCAT-1-0-0-0,1,62,54,A1,Bladder1204,2392,7.780303,4399.0,8.389359,19.686292,27.301659,36.758354,52.716526,125.0,4.836282,2.841555,4399.0
AAAGGCTCTCGCGCCG-1-0-0-0,1,55,55,A1,Bladder1204,1963,7.582738,3184.0,8.066208,20.069095,27.386935,36.777638,54.051508,59.0,4.094345,1.853015,3184.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTGCAGCCACGTCA-1-1,1,60,74,D1,Bladder371,1031,6.939254,1596.0,7.375882,26.190476,34.774436,47.305764,66.729323,129.0,4.867535,8.082706,1596.0
TTGTGGCCCTGACAGT-1-1,1,18,60,D1,Bladder371,3102,8.040125,7971.0,8.983690,22.682223,32.003513,43.206624,58.650107,305.0,5.723585,3.826370,7971.0
TTGTGTTTCCCGAAAG-1-1,1,51,59,D1,Bladder371,1288,7.161622,2108.0,7.653969,23.292220,32.874763,44.165085,62.618596,120.0,4.795791,5.692600,2108.0
TTGTTAGCAAATTCGA-1-1,1,22,42,D1,Bladder371,2112,7.655864,4357.0,8.379768,22.125316,31.237090,42.299748,59.742942,245.0,5.505332,5.623136,4357.0


In [3]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)
#sc.pp.filter_cells(adata, min_counts=5000)
adata = adata[adata.obs.in_tissue != 0, :]
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)

  view_to_actual(adata)


In [1]:
genes = ["in_tissue"]
for id in ["A1", "B1", "C1", "D1"]:
    sc.pl.spatial(adata[adata.obs.sampleID == id, :], 
                  img_key="lowres", 
                  color=genes,
                  alpha=1, 
                  library_id=id)

NameError: name 'sc' is not defined