In [None]:
import warnings
import os
import scanpy as sc
import anndata as an
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import scanorama
import tarfile

from urllib import request
from tqdm import tqdm

In [None]:
#sc.logging.print_versions() # gives errror!!
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:

# Create directory for downloading data
for i in ['results','data']:
    for j in ['single_cell','spatial_transcriptomics','in_situ_sequencing']:
        os.makedirs( './'+i+'/'+j , exist_ok= True )

### Download Cell Cycle genes

In [None]:

from urllib import request
request.urlretrieve( 'https://raw.githubusercontent.com/scverse/scanpy_usage/master/180209_cell_cycle/data/regev_lab_cell_cycle_genes.txt' , './results/regev_lab_cell_cycle_genes.txt' )


### Single cell


In [None]:
# Define base URL path to the data
base_path = 'https://export.uppmax.uu.se/snic2022-23-113/courses/spatial_omics_2022/single_cell/'

# Download each dataset
for i in ['10X151w1','10X180w1','10x289w3','10x303w3']:
    tmp = 'Sountoulidis2022_'+i+'_counts.h5'
    request.urlretrieve( base_path+tmp , './data/single_cell/'+tmp )

In [None]:
%%bash
tree ./data/single_cell

In [None]:
# Define paths and get sample_ids
obj_list = list()
file_list = os.listdir('./data/single_cell')
sample_ids = [j.replace('_counts.h5','') for j in [x.replace('Sountoulidis2022_','') for x in file_list]]

# Read each table and append to our list
for i in range(0,len(file_list)):
    print(i)
    tmp = sc.read_10x_h5('./data/single_cell/'+file_list[i])
    tmp.var_names_make_unique()
    tmp.obs['sample_id'] = sample_ids[i]
    obj_list.append(tmp)

In [None]:
# Merge all samples into a single AnnData Object 
adata = an.concat( obj_list , merge='same' )
adata

In [None]:
os.makedirs('./results/single_cell', exist_ok=True)
adata.write_h5ad('./results/single_cell/scRNAseq.h5ad' )

In [None]:
# Remove other objects from memory
del(obj_list,file_list,sample_ids,adata)

### Spatial Transcriptomics

In [None]:
# Define base URL path to the data
base_path = 'https://export.uppmax.uu.se/snic2022-23-113/courses/spatial_omics_2022/spatial_transcriptomics/'    

# For each dataset do:
for j in ['154441','154442']:
    
    # Create directory for the dataset
    os.makedirs( './data/spatial_transcriptomics/'+j+'/spatial' , exist_ok= True )
    ss='/spatial/'
    
    # Download necessary files in their respective folder
    for i in [j+'/filtered_feature_bc_matrix.h5',
              j+ss+'tissue_lowres_image.png',
              j+ss+'tissue_hires_image.png',
              j+ss+'tissue_positions_list.txt',
              j+ss+'scalefactors_json.json']:
        request.urlretrieve( base_path+i , './data/spatial_transcriptomics/'+i.replace('.txt','.csv')  )

In [None]:
%%bash
tree ../data/spatial_transcriptomics

In [15]:
# Define paths and get sample_ids
obj_list = list()
file_list = os.listdir('./data/spatial_transcriptomics')
file_list

['154441', '154442']

In [16]:
# Read each table and append to our list
for i in range(0,len(file_list)):
    print(i)
    tmp = sc.read_visium('./data/spatial_transcriptomics/'+file_list[i])
    tmp.var_names_make_unique()
    tmp.obs['sample_id'] = file_list[i]
    obj_list.append(tmp)

0
reading data\spatial_transcriptomics\154441\filtered_feature_bc_matrix.h5
 (0:00:00)


  utils.warn_names_duplicates("var")


1
reading data\spatial_transcriptomics\154442\filtered_feature_bc_matrix.h5
 (0:00:00)


  utils.warn_names_duplicates("var")


In [17]:
# Merge all samples into a single AnnData Object 
adata = an.concat( obj_list , merge='same',uns_merge="unique" )
adata.var_names_make_unique()

  utils.warn_names_duplicates("obs")


In [19]:
os.makedirs('./results/spatial_transcriptomics', exist_ok=True)
adata.write_h5ad('./results/spatial_transcriptomics/visium.h5ad' )

In [20]:
# Remove other objects from memory
del(obj_list,file_list,adata)

### In Situ Sequencing


In [21]:
# Define base URL path to the data
base_path = "https://export.uppmax.uu.se/snic2022-23-113/courses/spatial_omics_2022/in_situ_sequencing/"    

# Create directory for the dataset
os.makedirs( "./data/in_situ_sequencing" , exist_ok= True )

In [22]:
# Download necessary tar.gz files
for tar_file in ["raw_4_tiles_with_z.tar.gz","SpaceTX.tar.gz"]:
    print ("Downloading " + base_path + tar_file)
    request.urlretrieve( base_path+tar_file , "./data/in_situ_sequencing/"+tar_file )

Downloading https://export.uppmax.uu.se/snic2022-23-113/courses/spatial_omics_2022/in_situ_sequencing/raw_4_tiles_with_z.tar.gz
Downloading https://export.uppmax.uu.se/snic2022-23-113/courses/spatial_omics_2022/in_situ_sequencing/SpaceTX.tar.gz


In [None]:
# Unzip tar.gz files
for tar_file in ["raw_4_tiles_with_z.tar.gz","SpaceTX.tar.gz"]:
    print ("Unzipping " + "./data/in_situ_sequencing/" + tar_file)
    tar = tarfile.open("./data/in_situ_sequencing/" + tar_file, "r:gz")
    
    progress = tqdm(tar.getmembers())
    for member in progress:
        tar.extract(member, path="./data/in_situ_sequencing/")
        # set the progress description of the progress bar
        progress.set_description(f"Extracting {member.name}")
    tar.close()

Unzipping ./data/in_situ_sequencing/raw_4_tiles_with_z.tar.gz
