# Download Cross-tissue Atlas 2022 (MED)

In [1]:
install.packages("httr")

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done



In [8]:
library(httr)

# Define the list of URLs
urls <- c(
  "https://singlecell.broadinstitute.org/single_cell/data/public/SCP738/cross-tissue-single-cell-stromal-atlas-identifies-shared-pathological-fibroblast-phenotypes-in-four-chronic-inflammatory-diseases?filename=exprs.mtx",
  "https://singlecell.broadinstitute.org/single_cell/data/public/SCP738/cross-tissue-single-cell-stromal-atlas-identifies-shared-pathological-fibroblast-phenotypes-in-four-chronic-inflammatory-diseases?filename=exprs_barcodes.tsv",
  "https://singlecell.broadinstitute.org/single_cell/data/public/SCP738/cross-tissue-single-cell-stromal-atlas-identifies-shared-pathological-fibroblast-phenotypes-in-four-chronic-inflammatory-diseases?filename=exprs_genes.tsv",
  "https://singlecell.broadinstitute.org/single_cell/data/public/SCP738/cross-tissue-single-cell-stromal-atlas-identifies-shared-pathological-fibroblast-phenotypes-in-four-chronic-inflammatory-diseases?filename=metaData.txt",
  "https://singlecell.broadinstitute.org/single_cell/studies/5e442d10771a5b0edfae4a75/manifest"
)
# Define your target directory
target_dir <- "/data/norman/southark/external_datasets/fibroblast_atlas_med_2022"  # replace with your desired directory

# Ensure the target directory exists
if (!dir.exists(target_dir)) {
  dir.create(target_dir, recursive = TRUE)
}

# Define the path for the downloaded file
file_path <- file.path(target_dir, "exprs.mtx")

# Download the file
response <- GET(url, write_disk(file_path, overwrite = TRUE), config = config(ssl_verifypeer = FALSE))

# Check the status of the download
if (status_code(response) == 200) {
  message("File successfully downloaded to ", file_path)
} else {
  message("Failed to download file. Status: ", status_code(response))
}

File successfully downloaded: cross-tissue-single-cell-stromal-atlas-identifies-shared-pathological-fibroblast-phenotypes-in-four-chronic-inflammatory-diseases?filename=exprs.mtx

File successfully downloaded: cross-tissue-single-cell-stromal-atlas-identifies-shared-pathological-fibroblast-phenotypes-in-four-chronic-inflammatory-diseases?filename=exprs_barcodes.tsv

File successfully downloaded: cross-tissue-single-cell-stromal-atlas-identifies-shared-pathological-fibroblast-phenotypes-in-four-chronic-inflammatory-diseases?filename=exprs_genes.tsv

File successfully downloaded: cross-tissue-single-cell-stromal-atlas-identifies-shared-pathological-fibroblast-phenotypes-in-four-chronic-inflammatory-diseases?filename=metaData.txt

File successfully downloaded: manifest



### Switch to python environment

In [10]:
import scanpy as sc
import pandas as pd
import scipy.io
import numpy as np


data_dir = "/data/norman/southark/external_datasets/fibroblast_atlas_med_2022/"

# Load the expression matrix
expression_matrix = scipy.io.mmread(data_dir+"exprs.mtx").T.tocsr()

# Load the barcodes
barcodes = pd.read_csv(data_dir+"exprs_barcodes.tsv", header=None, sep="\t")
barcodes = barcodes[0].values

# Load the genes
genes = pd.read_csv(data_dir+"exprs_genes.tsv", header=None, sep="\t")
genes = genes[0].values

# Load the metadata
metadata = pd.read_csv(data_dir+"metaData.txt", sep="\t", index_col=0)

# Verify that the barcodes in the metadata match the barcodes in the expression matrix
metadata = metadata.loc[barcodes]

# Create an AnnData object
adata = sc.AnnData(X=expression_matrix, obs=metadata, var=pd.DataFrame(index=genes))

# Verify the AnnData object
adata

  metadata = pd.read_csv(data_dir+"metaData.txt", sep="\t", index_col=0)


AnnData object with n_obs × n_vars = 102441 × 19952
    obs: 'species', 'species__ontology_label', 'disease', 'disease__ontology_label', 'organ', 'organ__ontology_label', 'library_preparation_protocol', 'library_preparation_protocol__ontology_label', 'sex', 'biosample_id', 'donor_id', 'gut_inflam_pathology', 'inflam_score', 'nUMI', 'nGene', 'percent_mito', 'sample_type', 'cell_type_within_tissue', 'cell_type_integrated', 'experimental_condition'

In [None]:
# Identify columns with mixed types
def find_mixed_type_columns(df):
    mixed_columns = []
    for col in df.columns:
        if df[col].apply(type).nunique() > 1:
            mixed_columns.append(col)
    return mixed_columns

mixed_columns = find_mixed_type_columns(metadata)
print("Columns with mixed types:", mixed_columns)

Columns with mixed types: ['gut_inflam_pathology', 'inflam_score', 'nUMI', 'nGene', 'percent_mito', 'cell_type_within_tissue', 'cell_type_integrated']

In [39]:
adata.obs.igut_inflam_pathology = adata.obs.gut_inflam_pathology.astype('category')
adata.obs.inflam_score = adata.obs.inflam_score.astype('float')
adata.obs.nUMI = adata.obs.nUMI.astype('int')
adata.obs.nGene = adata.obs.nGene.astype('int')
adata.obs.percent_mito = adata.obs.percent_mito.astype('float')
adata.obs.cell_type_within_tissue = adata.obs.cell_type_within_tissue.astype('category')
adata.obs.cell_type_integrated = adata.obs.cell_type_integrated.astype('category')

In [40]:
adata.obs

Unnamed: 0_level_0,species,species__ontology_label,disease,disease__ontology_label,organ,organ__ontology_label,library_preparation_protocol,library_preparation_protocol__ontology_label,sex,biosample_id,donor_id,gut_inflam_pathology,inflam_score,nUMI,nGene,percent_mito,sample_type,cell_type_within_tissue,cell_type_integrated,experimental_condition
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AAACGCTCAGTTGTCA_SalivaryGland3,NCBITaxon:9606,Homo sapiens,MONDO:0006733,dry eye syndrome,UBERON:0001830,minor salivary gland,EFO:0009922,10x 3' v3,female,SalivaryGland3,SalivaryGland3,,0.381334,6313,2335,0.089656,primary,CD34,FBLN1+ C5,primary
AAACGCTTCCTTATAC_SalivaryGland3,NCBITaxon:9606,Homo sapiens,MONDO:0006733,dry eye syndrome,UBERON:0001830,minor salivary gland,EFO:0009922,10x 3' v3,female,SalivaryGland3,SalivaryGland3,,0.381334,11020,3113,0.093013,primary,CCL19,C2,primary
AAAGGATTCTGCATGA_SalivaryGland3,NCBITaxon:9606,Homo sapiens,MONDO:0006733,dry eye syndrome,UBERON:0001830,minor salivary gland,EFO:0009922,10x 3' v3,female,SalivaryGland3,SalivaryGland3,,0.381334,5787,2367,0.140142,primary,CD34,SPARC+COL3A1+ C4,primary
AAAGGATTCTTTCCAA_SalivaryGland3,NCBITaxon:9606,Homo sapiens,MONDO:0006733,dry eye syndrome,UBERON:0001830,minor salivary gland,EFO:0009922,10x 3' v3,female,SalivaryGland3,SalivaryGland3,,0.381334,5306,1992,0.174331,primary,CD34,C1,primary
AAAGGGCAGAGTGTTA_SalivaryGland3,NCBITaxon:9606,Homo sapiens,MONDO:0006733,dry eye syndrome,UBERON:0001830,minor salivary gland,EFO:0009922,10x 3' v3,female,SalivaryGland3,SalivaryGland3,,0.381334,7568,2417,0.074789,primary,CD34,FBLN1+ C5,primary
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTTGCAGGCACTC_Lung_C_Control,NCBITaxon:9606,Homo sapiens,MONDO:0015925,interstitial lung disease,UBERON:0002048,lung,EFO:0009922,10x 3' v3,unknown,Lung_C_Control,Lung_C,,,8630,3025,0.066280,cultured,,,Control
TTTGTTGCAGTGACCC_Lung_A_Tcells,NCBITaxon:9606,Homo sapiens,MONDO:0015925,interstitial lung disease,UBERON:0002048,lung,EFO:0009922,10x 3' v3,unknown,Lung_A_Tcells,Lung_A,,,10177,3289,0.084504,cultured,,,Tcells
TTTGTTGCATGGCTAT_Lung_B_ECs,NCBITaxon:9606,Homo sapiens,MONDO:0015925,interstitial lung disease,UBERON:0002048,lung,EFO:0009922,10x 3' v3,unknown,Lung_B_ECs,Lung_B,,,8504,1541,0.107008,cultured,,,ECs
TTTGTTGGTCAGGTAG_Lung_A_ECs,NCBITaxon:9606,Homo sapiens,MONDO:0015925,interstitial lung disease,UBERON:0002048,lung,EFO:0009922,10x 3' v3,unknown,Lung_A_ECs,Lung_A,,,24209,5026,0.081581,cultured,,,ECs


In [41]:
# Save the AnnData object to disk
adata.write("/data/norman/southark/external_datasets/fibroblast_atlas_med_2022/med_2022_normalized_expr.h5ad")

In [42]:
# Load the AnnData object from disk
loaded_adata = sc.read("/data/norman/southark/external_datasets/fibroblast_atlas_med_2022/med_2022_normalized_expr.h5ad")

# Verify the loaded object
loaded_adata

AnnData object with n_obs × n_vars = 102441 × 19952
    obs: 'species', 'species__ontology_label', 'disease', 'disease__ontology_label', 'organ', 'organ__ontology_label', 'library_preparation_protocol', 'library_preparation_protocol__ontology_label', 'sex', 'biosample_id', 'donor_id', 'gut_inflam_pathology', 'inflam_score', 'nUMI', 'nGene', 'percent_mito', 'sample_type', 'cell_type_within_tissue', 'cell_type_integrated', 'experimental_condition'