# Generate bulk RNA metadata table

In [1]:
import lamindb as ln
import pandas as pd
from query import create_hubmap_metadata_df, BulkseqDataset

assert ln.setup.settings.instance.slug == "laminlabs/hubmap"

ln.track()

[92m→[0m connected lamindb: laminlabs/hubmap
[92m→[0m updated transform description, loaded Transform('0R8HzkUaLFvQ0000'), re-started Run('qbbcfaoX...') at 2025-02-25 17:01:34 UTC
[92m→[0m notebook imports: bionty==1.1.0 lamindb==1.1.0 pandas==2.2.3 query wetlab==1.0.1


In [2]:
hubmap_metadata_df = pd.read_csv(
    ln.Artifact.get("9G4UaeVKSY0zy7SX0000").cache(), sep="\t"
)
bulk_metadata = hubmap_metadata_df[hubmap_metadata_df["assay_type"].isin(["bulk RNA"])]

In [3]:
bulk_metadata_df = create_hubmap_metadata_df(
    bulk_metadata,
    file_types=[
        "expression_matrices.h5",
    ],
    dataset_class=BulkseqDataset,
)
bulk_metadata_df

Output()

Unnamed: 0,assay,rnaseq_assay_method,title,group_name,consortium,doi,publication_date,status,dataset_type,processing,organ,sample_category,analyte_class,bmi,age,ethnicity,sex,diseases,donor_id,sample_id,ancestor_id,expression_matrices_url
0,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the large intestine of a 67.0...,Stanford TMC,HuBMAP,10.35079/HBM454.ZWSD.895,2020-08-22,protected,RNAseq,raw,LI,block,RNA,30.2,67.0,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM875.RVTT.868,3d742332ef8ca26f34f7ee5b9da3381c,https://assets.hubmapconsortium.org/1c141b1558...
1,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the large intestine of a 67.0...,Stanford TMC,HuBMAP,10.35079/HBM756.GJDX.884,2020-08-22,protected,RNAseq,raw,LI,block,RNA,30.2,67.0,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM488.XJKW.383,d0cb764459fd1d1c6f04fdce2e982831,https://assets.hubmapconsortium.org/9e7b040f23...
2,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the small intestine of a 67.0...,Stanford TMC,HuBMAP,10.35079/HBM656.ZCCL.743,2020-08-22,protected,RNAseq,raw,SI,block,RNA,30.2,67.0,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM229.NKWB.488,35e16f13caab262f446836f63cf4ad42,https://assets.hubmapconsortium.org/a8a603e704...
3,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the small intestine of a 67.0...,Stanford TMC,HuBMAP,10.35079/HBM672.XGNL.299,2020-08-22,protected,RNAseq,raw,SI,block,RNA,30.2,67.0,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM969.HPBJ.369,e1e1c07203b6c09477e8bbaef8dd48bb,https://assets.hubmapconsortium.org/a4f08a41b0...
4,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the large intestine of a 67.0...,Stanford TMC,HuBMAP,10.35079/HBM946.HHKL.578,2020-08-22,protected,RNAseq,raw,LI,block,RNA,30.2,67.0,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM924.ZPQN.563,96d320ccb24a968bc32571895cf1b05e,https://assets.hubmapconsortium.org/52ef7727c3...
5,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the large intestine of a 67.0...,Stanford TMC,HuBMAP,10.35079/HBM954.PCBD.364,2020-08-22,protected,RNAseq,raw,LI,block,RNA,30.2,67.0,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM588.GSHN.453,e4ee92c09a755f8889cb8c37a669e160,https://assets.hubmapconsortium.org/926a7c6557...
6,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the small intestine of a 67.0...,Stanford TMC,HuBMAP,10.35079/HBM855.VBLX.337,2020-08-22,protected,RNAseq,raw,SI,block,RNA,30.2,67.0,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM555.LQJW.397,e80cd8fab25ec8e9cb41e3872e2129c7,https://assets.hubmapconsortium.org/9bb461ae58...
7,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the small intestine of a 67.0...,Stanford TMC,HuBMAP,10.35079/HBM634.HGLT.739,2020-08-22,protected,RNAseq,raw,SI,block,RNA,30.2,67.0,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM649.SRZW.542,8332815c55f72e2aec5610dcb4eab9d7,https://assets.hubmapconsortium.org/acf816bed5...


In [4]:
sc_af = ln.Artifact.from_df(
    bulk_metadata_df,
    key="2025-02-25/meta_bulkrna_original.parquet",
    description="Bulk RNA metadata information to use for ingestion.",
).save()

... uploading j5RQEPmtKuTeo0lO0000.parquet: 100.0%


In [5]:
ln.finish()

[94m•[0m please hit CTRL + s to save the notebook in your editor . [92m✓[0m
[92m→[0m finished Run('qbbcfaoX') after 28s at 2025-02-25 17:02:03 UTC
[92m→[0m go to: https://lamin.ai/laminlabs/hubmap/transform/0R8HzkUaLFvQ0000
[92m→[0m to update your notebook from the CLI, run: lamin save /home/lukas/code/hubmap_registration/generate-bulk-metadata.ipynb
