# Compbio analysis

In [None]:
import lamindb as ln
import scanpy as sc

ln.nb.header()

In [None]:
!lndb login test-user2

## Link experimental metadata to the datasets

Query for datasets ingested from a bioinformatics run:

In [None]:
bfx_runs = ln.db.query.pipeline_run().all()
bfx_runs

In [None]:
dtransform_id = ln.db.query.dtransform(pipeline_run_id=bfx_runs[0].id).first().id
dobjects = ln.db.query.dobject(dtransform_id=dtransform_id).all()
len(dobjects)

Now let's link biometa to the bfx run output datasets:

In [None]:
biometa = ln.db.query.biometa(
    readout_id=ln.db.query.readout(name="single-cell RNA sequencing").one().id
).one()

for dobject in dobjects:
    ln.db.link.biometa(dobject_id=dobject.id, biometa_id=biometa.id)

Let's also link the screen results to its biometa.

In [None]:
dobject = ln.db.query.dobject(name="schmidt22-crispra-gws-IFNG").one()
dobject

In [None]:
biometa = ln.db.query.biometa(
    readout_id=ln.db.query.readout(name="interferon gamma").one().id
).one()

ln.db.link.biometa(dobject_id=dobject.id, biometa_id=biometa.id)

Fill out additional biometa fields for dobjects ingested with feature models.

In [None]:
ln.db.query.jupynb(name="Track and analyze experimental data").all()

In [None]:
jupynb_id = ln.db.query.jupynb(name="Track and analyze experimental data").all()[-1].id
dtransform_id = ln.db.query.dtransform(jupynb_id=jupynb_id).all()[-1].id

In [None]:
dobjects = ln.db.query.dobject(dtransform_id=dtransform_id).all()
dobjects

In [None]:
biometa1 = ln.db.query.biometa(
    readout_id=ln.db.query.readout(name="interferon gamma").one().id
).one()

for dobject in dobjects:
    doject_biometa = ln.db.query.dobject_biometa(dobject_id=dobject.id).one()
    biometa = ln.db.query.biometa(id=doject_biometa.biometa_id).one()
    insert_dict = {}
    for key in biometa.dict().keys():
        if key in ["id", "featureset_id"]:
            continue
        if biometa1.__getattribute__(key) is not None:
            insert_dict[key] = biometa1.__getattribute__(key)

    ln.db.update.biometa(key=biometa.id, **insert_dict)

In [None]:
ln.db.query.biometa(id=doject_biometa.biometa_id).all()

## Load in the raw data generated from the BFX pipeline

In [None]:
h5ad = ln.db.query.dobject(suffix=".h5ad").first()
h5ad

In [None]:
hits = ln.db.query.dobject(name="schmidt22_crispra_gws_IFNG_hits").one()
hits

In [None]:
adata = ln.db.load(h5ad)

In [None]:
screen_hits = ln.db.load(hits)

## Perform single cell analysis, integrating the CRISPR screen data

In [None]:
sc.tl.score_genes(
    adata, adata.var_names.intersection(screen_hits["id"].values).tolist()
)

In [None]:
sc.pl.umap(adata, color="cluster_name")

In [None]:
sc.pl.umap(adata, color="score")

In [None]:
sc.pl.matrixplot(adata, groupby="cluster_name", var_names=["score"])

## Ingest the processed dataset into LaminDB

In [None]:
ln.db.ingest.add(adata, name="schmidt22_perturbseq_analyzed")

In [None]:
ln.db.ingest.commit()