In [1]:
from pathlib import Path
import requests

import scanpy as sc
import pandas as pd
import numpy as np

data_dir = Path("..") / "data"
data_dir.mkdir(parents=True, exist_ok=True)

url_dict = {
    "hepatocyte_meta": "https://zenodo.org/records/6035873/files/Single_cell_Meta_data.txt?download=1",
    "hepatocyte_counts": "https://zenodo.org/records/6035873/files/Single_cell_UMI_COUNT.txt?download=1",
}

for file, url in url_dict.items():
    file_path = data_dir / f"{file}.txt"
    if file_path.exists():
        print(f"Loading existing data from {file_path}")
    else:
        print(f"File not found. Downloading from {url}...")
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise an error for bad HTTP status codes
        with open(file_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)

count_tmp = pd.read_csv(data_dir / "hepatocyte_counts.txt").set_index("Gene_Name")
meta_tmp = pd.read_csv(data_dir / "hepatocyte_meta.txt")
meta_tmp = (meta_tmp.loc[meta_tmp["Cell_barcode"].isin(count_tmp.columns.to_list())]
            .set_index("Cell_barcode"))
adata = sc.AnnData(X=count_tmp.values.copy().T.astype(np.float32),
                   var=pd.DataFrame(index=count_tmp.index.copy()),
                   obs=meta_tmp.loc[count_tmp.columns.to_numpy(), :].copy())
del count_tmp, meta_tmp
adata = adata[(adata.obs["time_point"] == 0) & (adata.obs["cell_type"] == "Hep"), :].copy()
adata = adata[:, adata.X.sum(axis=0) > 0].copy()
adata

File not found. Downloading from https://zenodo.org/records/6035873/files/Single_cell_Meta_data.txt?download=1...
File not found. Downloading from https://zenodo.org/records/6035873/files/Single_cell_UMI_COUNT.txt?download=1...


AnnData object with n_obs × n_vars = 1999 × 11706
    obs: 'cell_type', 'zone', 'run_id', 'time_point', 'UMAP_X', 'UMAP_Y'