# Register Bulk RNA

In [1]:
import lamindb as ln
import bionty as bt
import wetlab as wl

[92m→[0m connected lamindb: laminlabs/hubmap


In [2]:
ln.track()

[92m→[0m created Transform('uZfw94bwLkfO0000'), started new Run('PGXTm5QW...') at 2025-05-21 13:47:29 UTC
[92m→[0m notebook imports: lamindb==1.5.3 pandas==2.2.3
[94m•[0m recommendation: to identify the notebook across renames, pass the uid: ln.track("uZfw94bwLkfO")


In [3]:
# Corresponds to the curated metadata not the raw API output
metadata = ln.Artifact.get("ajd285FK90Z0ZBN10003").load()

In [4]:
metadata.head()

Unnamed: 0,uuid,assay,rnaseq_assay_method,title,group_name,consortium,doi,publication_date,status,dataset_type,processing,organ,sample_category,analyte_class,bmi,age,ethnicity,sex,diseases,donor_id,sample_id,ancestor_id,expression_matrices_url,tissue,suspension_type,tissue_type
0,2c77b1cdf33dbed3dbfb74e4b578300e,NEBNext Ultra II,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the large intestine of a 67-y...,Stanford TMC,HuBMAP,10.35079/HBM756.GJDX.884,2020-08-22,protected,RNAseq,raw,LI,block,RNA,30.2,67.0,European,female,"[hypertensive disorder, coronary artery disord...",HBM279.WPZP.978,HBM488.XJKW.383,d0cb764459fd1d1c6f04fdce2e982831,https://assets.hubmapconsortium.org/9e7b040f23...,large intestine,bulk,tissue
1,f84c8edc36a65f248c2649ebbe52ad35,NEBNext Ultra II,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the large intestine of a 67-y...,Stanford TMC,HuBMAP,10.35079/HBM454.ZWSD.895,2020-08-22,protected,RNAseq,raw,LI,block,RNA,30.2,67.0,European,female,"[hypertensive disorder, coronary artery disord...",HBM279.WPZP.978,HBM875.RVTT.868,3d742332ef8ca26f34f7ee5b9da3381c,https://assets.hubmapconsortium.org/1c141b1558...,large intestine,bulk,tissue
2,311837bf483627cc967e40092a251096,NEBNext Ultra II,NEBNext Ultra II RNA Library Prep Kit,RNAseq data from the small intestine of a 67-y...,Stanford TMC,HuBMAP,10.35079/HBM656.ZCCL.743,2020-08-22,protected,RNAseq,raw,SI,block,RNA,30.2,67.0,European,female,"[hypertensive disorder, coronary artery disord...",HBM279.WPZP.978,HBM229.NKWB.488,35e16f13caab262f446836f63cf4ad42,https://assets.hubmapconsortium.org/a8a603e704...,small intestine,bulk,tissue
3,c67e024e8fcc25166aac2e2e883208df,KAPA RNA HyperPrep + RiboErase,KAPA RNA HyperPrep Kit with RiboErase,RNAseq data from the placenta of a 34-year-old...,TMC - University of California San Diego focus...,HuBMAP,10.35079/HBM669.BFZR.956,2025-02-26,protected,RNAseq,raw,PL,block,RNA,,,na,unknown,[normal],HBM683.TPQL.293,HBM265.XLDG.695,0236e72a6ce04abc0008ac2a40cf1b35,https://assets.hubmapconsortium.org/7d7ba54632...,placenta,bulk,tissue
4,73507265c43d750e6ec5e48d1e2e0b92,KAPA RNA HyperPrep + RiboErase,KAPA RNA HyperPrep Kit with RiboErase,RNAseq data from the placenta of a 34-year-old...,TMC - University of California San Diego focus...,HuBMAP,10.35079/HBM765.RPVQ.584,2025-02-26,protected,RNAseq,raw,PL,block,RNA,,,na,unknown,[normal],HBM864.XRGJ.897,HBM464.GLHP.566,c97c86db1d6c7f62445c06d70289c87a,https://assets.hubmapconsortium.org/fc80dbe141...,placenta,bulk,tissue


In [None]:
from fsspec.asyn import FSTimeoutError

for idx, row in metadata.iterrows():
    ds = row.to_dict()

    try:
        # Some rows do not have URLs. These are high level dataset views that may have metadata.
        artifacts = []

        if expr_url := ds["expression_matrices_url"]:
            expr_af = ln.Artifact(expr_url, description=f"{ds['title']}").save()
            expr_af.ulabels.add(ln.ULabel.filter(name="expr_bulk").one())
            artifacts.append(expr_af)

            expr_af.references.add(ln.Reference.filter(doi=ds["doi"]).one())
            expr_af.tissues.add(bt.Tissue.filter(name=ds["tissue"]).one())
            expr_af.ethnicities.add(bt.Ethnicity.filter(name=ds["ethnicity"]).one())
            expr_af.phenotypes.add(bt.Phenotype.filter(name=ds["sex"]).one())

            for disease_name in ds["diseases"]:
                expr_af.diseases.add(bt.Disease.filter(name=disease_name).one())

            expr_af.ulabels.add(ln.ULabel.filter(name=ds["suspension_type"]).one())
            expr_af.experimental_factors.add(
                bt.ExperimentalFactor.filter(name=ds["assay"]).one()
            )
            expr_af.donors.add(wl.Donor.filter(name=ds["donor_id"]).one())
            expr_af.biosamples.add(wl.Biosample.filter(name=ds["sample_id"]).one())
            expr_af.ulabels.add(ln.ULabel.filter(name="tissue").one())

        ds_cl = ln.Collection.filter(key=ds["uuid"]).one_or_none()

        if ds_cl:
            ds_cl.artifacts.add(*artifacts)
        elif artifacts:
            ds_cl = ln.Collection(
                artifacts, key=ds["uuid"], description=ds["title"]
            ).save()
    except FSTimeoutError:
        print(f"Timeout error for: {ds['title']} of collection UUID {ds['uuid']}")
        continue

In [6]:
ln.finish()

[94m•[0m please hit CMD + s to save the notebook in your editor  [92m✓[0m
[92m→[0m finished Run('PGXTm5QW') after 1h at 2025-05-21 14:58:17 UTC
[92m→[0m go to: https://lamin.ai/laminlabs/hubmap/transform/uZfw94bwLkfO0000
[92m→[0m to update your notebook from the CLI, run: lamin save /Users/altananamsaraeva/Desktop/Lamin/hubmap-registration/bulk/register-bulk-rna.ipynb
