# Compbio analysis

In [None]:
import lamindb as ln
import scanpy as sc

ln.nb.header()

In [None]:
!lndb login test-user2

## Link experimental metadata to the datasets

Query for datasets ingested from a bioinformatics run:

In [None]:
bfx_runs = ln.db.query.pipeline_run()
bfx_runs

In [None]:
dtransform_id = ln.db.query.dtransform(pipeline_run_id=bfx_runs[0].id)[0].id
dobjects = ln.db.query.dobject(dtransform_id=dtransform_id)
len(dobjects)

Now let's link biometa to the bfx run output datasets:

In [None]:
for dobject in dobjects:
    ln.db.link.biometa(dobject_id=dobject.id, biometa_id=2)

Let's also link the screen results to its biometa.

In [None]:
dobjects = ln.db.query.dobject(name="schmidt22-crispra-gws-IFNG")
dobjects

In [None]:
ln.db.link.biometa(dobject_id=dobjects[0].id, biometa_id=1)

Fill out additional biometa fields for dobjects ingested with feature models.

In [None]:
ln.db.query.jupynb(name="Track and analyze experimental data")

In [None]:
jupynb_id = ln.db.query.jupynb(name="Track and analyze experimental data")[-1].id
dtransform_id = ln.db.query.dtransform(jupynb_id=jupynb_id)[-1].id

In [None]:
dobjects = ln.db.query.dobject(dtransform_id=dtransform_id)

In [None]:
dobjects

In [None]:
biometa1 = ln.db.query.biometa(id=1)[0]

for dobject in dobjects:
    doject_biometa = ln.db.query.dobject_biometa(dobject_id=dobject.id)
    assert len(doject_biometa) == 1
    biometa_id = doject_biometa[0].biometa_id
    biometa = ln.db.query.biometa(id=biometa_id)[0]
    insert_dict = {}
    for key in biometa.dict().keys():
        if key in ["id", "featureset_id"]:
            continue
        if biometa1.__getattribute__(key) is not None:
            insert_dict[key] = biometa1.__getattribute__(key)

    ln.db.update.biometa(key=biometa.id, **insert_dict)

## Load in the raw data generated from the BFX pipeline

In [None]:
h5ads = ln.db.query.dobject(file_suffix=".h5ad")
h5ads

In [None]:
hits = ln.db.query.dobject(name="schmidt22_crispra_gws_IFNG_hits")
hits

In [None]:
adata = ln.db.load(h5ads[0])

In [None]:
screen_hits = ln.db.load(hits[0])

## Perform single cell analysis, integrating the CRISPR screen data

In [None]:
sc.tl.score_genes(
    adata, adata.var_names.intersection(screen_hits["id"].values).tolist()
)

In [None]:
sc.pl.umap(adata, color="cluster_name")

In [None]:
sc.pl.umap(adata, color="score")

In [None]:
sc.pl.matrixplot(adata, groupby="cluster_name", var_names=["score"])

## Ingest the processed dataset into LaminDB

In [None]:
ln.db.ingest.add(adata, name="schmidt22_perturbseq_analyzed")

In [None]:
ln.db.ingest.commit()