# Generate bulk RNA metadata table

In [1]:
import os
import sys

sys.path.append(os.path.abspath("../"))

import pandas as pd
import lamindb as ln

from query import create_hubmap_metadata_df, BulkseqDataset

assert ln.setup.settings.instance.slug == "laminlabs/hubmap"

[92m→[0m connected lamindb: laminlabs/hubmap


In [2]:
ln.track()

[92m→[0m loaded Transform('0R8HzkUaLFvQ0002'), re-started Run('gZNU3EkS...') at 2025-05-13 13:51:26 UTC
[92m→[0m notebook imports: lamindb==1.4.0 pandas==2.2.3 query


In [3]:
df = pd.read_csv(
    ln.Artifact.using("laminlabs/hubmap").get("9G4UaeVKSY0zy7SX0001").cache(), sep="\t"
)

# Include all assays here for which we want to get metadata information
bulk_assays = [
    "bulk RNA",
    "bulk-RNA",
]

In [4]:
bulk_metadata_df = create_hubmap_metadata_df(
    df,
    file_types=[
        "expression_matrices.h5",
    ],
    dataset_class=BulkseqDataset,
    assay_filter=bulk_assays,
)
bulk_metadata_df

Output()

Unnamed: 0,uuid,assay,rnaseq_assay_method,title,group_name,consortium,doi,publication_date,status,dataset_type,processing,organ,sample_category,analyte_class,bmi,age,ethnicity,sex,diseases,donor_id,sample_id,ancestor_id,expression_matrices_url
0,2c77b1cdf33dbed3dbfb74e4b578300e,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the large intestine of a 67-y...,Stanford TMC,HuBMAP,10.35079/HBM756.GJDX.884,2020-08-22,protected,RNAseq,raw,LI,block,RNA,30.2,67,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM488.XJKW.383,d0cb764459fd1d1c6f04fdce2e982831,https://assets.hubmapconsortium.org/9e7b040f23...
1,f84c8edc36a65f248c2649ebbe52ad35,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the large intestine of a 67-y...,Stanford TMC,HuBMAP,10.35079/HBM454.ZWSD.895,2020-08-22,protected,RNAseq,raw,LI,block,RNA,30.2,67,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM875.RVTT.868,3d742332ef8ca26f34f7ee5b9da3381c,https://assets.hubmapconsortium.org/1c141b1558...
2,311837bf483627cc967e40092a251096,bulk RNA,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the small intestine of a 67-y...,Stanford TMC,HuBMAP,10.35079/HBM656.ZCCL.743,2020-08-22,protected,RNAseq,raw,SI,block,RNA,30.2,67,White,Female,"[Hypertension, Coronary Artery Disease, Cardia...",HBM279.WPZP.978,HBM229.NKWB.488,35e16f13caab262f446836f63cf4ad42,https://assets.hubmapconsortium.org/a8a603e704...
3,c67e024e8fcc25166aac2e2e883208df,bulk-RNA,KAPA RNA HyperPrep Kit with RiboErase,RNAseq data from the placenta of a 34-year-old...,TMC - University of California San Diego focus...,HuBMAP,10.35079/HBM669.BFZR.956,2025-02-26,protected,RNAseq,raw,PL,block,RNA,,,,,[normal],HBM683.TPQL.293,HBM265.XLDG.695,0236e72a6ce04abc0008ac2a40cf1b35,https://assets.hubmapconsortium.org/7d7ba54632...
4,73507265c43d750e6ec5e48d1e2e0b92,bulk-RNA,KAPA RNA HyperPrep Kit with RiboErase,RNAseq data from the placenta of a 34-year-old...,TMC - University of California San Diego focus...,HuBMAP,10.35079/HBM765.RPVQ.584,2025-02-26,protected,RNAseq,raw,PL,block,RNA,,,,,[normal],HBM864.XRGJ.897,HBM464.GLHP.566,c97c86db1d6c7f62445c06d70289c87a,https://assets.hubmapconsortium.org/fc80dbe141...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,c0793a20fabf88133ada53963ba55350,bulk-RNA,KAPA RNA HyperPrep Kit with RiboErase,RNAseq data from the placenta of a 31-year-old...,TMC - University of California San Diego focus...,HuBMAP,10.35079/HBM699.ZVNG.629,2025-02-26,protected,RNAseq,raw,PL,block,RNA,,,,,[normal],HBM775.JDDD.365,HBM442.RZLD.848,88aaa8a785e4ca9adcf1e91e0f4f6e2c,https://assets.hubmapconsortium.org/9c7ee9633d...
76,34a2e913183f1a7487d47380032aa961,bulk-RNA,KAPA RNA HyperPrep Kit with RiboErase,RNAseq data from the placenta of a 31-year-old...,TMC - University of California San Diego focus...,HuBMAP,10.35079/HBM652.WSKR.343,2025-02-26,protected,RNAseq,raw,PL,block,RNA,,,,,[normal],HBM775.JDDD.365,HBM755.BGWJ.457,5877befa123aa9820159ade8a1bd556f,https://assets.hubmapconsortium.org/00b7898d06...
77,cad68e5e910ca90bda79cc0d6aa586d1,bulk-RNA,KAPA RNA HyperPrep Kit with RiboErase,RNAseq data from the placenta of a 30-year-old...,TMC - University of California San Diego focus...,HuBMAP,10.35079/HBM549.BBBQ.445,2025-02-26,protected,RNAseq,raw,PL,block,RNA,,,,,[normal],HBM635.WSMC.463,HBM582.MSJK.752,68c57420822f7b10d2502fba61ccc02f,https://assets.hubmapconsortium.org/12c79d9141...
78,16fa3bd20ca6920aa32259022994c5b0,bulk-RNA,KAPA RNA HyperPrep Kit with RiboErase,RNAseq data from the placenta of a 35-year-old...,TMC - University of California San Diego focus...,HuBMAP,10.35079/HBM554.ZSNK.294,2025-02-26,protected,RNAseq,raw,PL,block,RNA,,,,,[normal],HBM989.SWJP.679,HBM446.RBTR.472,2aaf73824d7dacb4047c3b82684240f3,https://assets.hubmapconsortium.org/f66fc48f8d...


In [5]:
bulk_af = ln.Artifact.from_df(
    bulk_metadata_df,
    key="hubmap_metadata/meta_bulkrna_original.parquet",
    description="Bulk RNA metadata information to use for ingestion.",
).save()

[92m→[0m creating new artifact version for key='hubmap_metadata/meta_bulkrna_original.parquet' (storage: 's3://lamin-us-west-2/sznqFqn7xUoI')
... uploading j5RQEPmtKuTeo0lO0002.parquet: 100.0%
[93m![0m replacing the existing cache path /Users/altananamsaraeva/Library/Caches/lamindb/lamin-us-west-2/sznqFqn7xUoI/hubmap_metadata/meta_bulkrna_original.parquet


In [6]:
ln.finish()

[94m•[0m please hit CMD + s to save the notebook in your editor .... still waiting .....
......... [92m✓[0m
[93m![0m returning transform with same hash: Transform(uid='0R8HzkUaLFvQ0001', is_latest=False, key='generate-bulk-metadata.ipynb', description='Generate bulk RNA metadata table', type='notebook', hash='0NX4TAjfoTAuQoaLA1uTnw', space_id=1, created_by_id=5, created_at=2025-05-12 12:23:32 UTC)
[92m→[0m finished Run('gZNU3EkS') after 2m at 2025-05-13 13:54:07 UTC
[92m→[0m go to: https://lamin.ai/laminlabs/hubmap/transform/0R8HzkUaLFvQ0001
[92m→[0m to update your notebook from the CLI, run: lamin save /Users/altananamsaraeva/Desktop/Lamin/hubmap-registration/bulk/generate-bulk-metadata.ipynb
