# Modular workflows

In [1]:
%load_ext autoreload
%autoreload 2
import latenta as la
import lacell as lac
import laflow as laf
import numpy as np

We'll use the same dataset as [before](./1-variables).

In [2]:
adata = la.data.load_myod1()

In [3]:
import scanpy as sc

adata.raw = adata

sc.pp.normalize_per_cell(adata)
sc.pp.log1p(adata)

sc.pp.combat(adata)
sc.pp.pca(adata)

sc.pp.neighbors(adata)
sc.tl.umap(adata)

adata.obs["log_overexpression"] = np.log1p(adata.obs["overexpression"])

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'dissociation' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'gene_overexpressed' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'batch' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'biotype' as categorical
  (abs(g_new - g_old) / g_old).max(), (abs(d_new - d_old) / d_old).max()


lacell not only contains classes that help with model creation, but also with workflow creation. For example, if we're working with transcriptomics data, we will often inherit from {class}`~lac.transcriptome.TranscriptomeDataset` and {class}`~lac.transcriptome.TranscriptomeModel`. These already contain 

In [9]:
import tempfile
import pathlib

project_root = pathlib.Path(tempfile.TemporaryDirectory().name)
project_root.mkdir()
laf.set_project_root(project_root)  # sets the default project root

In [10]:
dataset = lac.transcriptome.TranscriptomeDataset("dataset")

It contains information from one modality, namely the transcriptome:

In [11]:
dataset.transcriptome

In [12]:
dataset.transcriptome.from_adata(adata = adata)

▶️ | dataset/transcriptome/from_adata                                                                    
✅ | dataset/transcriptome/from_adata                                                                    


In [13]:
dataset.transcriptome

In [14]:
dataset

In [114]:
class ConstantModel(lac.transcriptome.TranscriptomeModel):
    default_name = "constant"
    dataset = laf.FlowObj()

    # because we are overwriting the create_model from the parent classes
    # we do not need to specify @laf.Step here
    def create_model(self, output, X, obs, var):
        output = super().create_model_(output, X, obs, var)
        
        transcriptome = output.model_initial
        
        # define the model as before
        overexpression = la.Fixed(
            obs["log_overexpression"], label="overexpression"
        )
        foldchange = transcriptome.find("foldchange")

        foldchange.overexpression = la.links.scalar.Constant(
            overexpression, definition=foldchange.value_definition
        )
        return output

    model = lac.transcriptome.TranscriptomeModel.model
    
    overexpression_observed = laf.LatentaObj(db = {model})
    overexpression_causal = laf.LatentaObj(db = {model})
    
    @laf.Step(
        laf.Inputs(model),
        laf.Outputs(overexpression_observed, overexpression_causal)
    )
    def interpret_overexpression(self, output, model):
        overexpression = model.find("overexpression")

        overexpression_observed = la.posterior.scalar.ScalarObserved(overexpression)
        overexpression_observed.sample(5)
        output.overexpression_observed = overexpression_observed

        overexpression_causal = la.posterior.scalar.ScalarVectorCausal(
            overexpression,
            model,
            interpretable=model.p.mu.expression,
            observed=overexpression_observed,
        )
        overexpression_causal.sample(10)
        overexpression_causal.sample_random(10)
        overexpression_causal.observed
        overexpression_causal.sample_empirical()
        output.overexpression_causal = overexpression_causal
        
        return output
    
class LinearModel(ConstantModel):
    # we change the default name, as to make sure this model is put in a different folder
    default_name = "linear"

    def create_model(self, output, X, obs, var):
        # we can access the inherited function by adding a "_" at the end
        output = super().create_model_(output, X, obs, var)
        
        # extract the model_initial from the output
        model_initial = output.model_initial
        
        # now we can further adapt the model to our wish
        foldchange = model_initial.find("foldchange")
        overexpression = model_initial.find("overexpression")

        foldchange.overexpression = la.links.scalar.Linear(
            overexpression, a=True, definition=foldchange.value_definition
        )
        
        # again return the output
        # because we only adapted the model inplace, we do not need to update the output
        return output


class SplineModel(ConstantModel):
    default_name = "spline"

    def create_model(self, output, X, obs, var):
        output = super().create_model_(output, X, obs, var)
        
        model_initial = output.model_initial

        foldchange = model_initial.find("foldchange")
        overexpression = model_initial.find("overexpression")

        foldchange.overexpression = la.links.scalar.Spline(
            overexpression, definition=foldchange.value_definition
        )

        return output

In [115]:
model = LinearModel(dataset=dataset)
model.create_model()
model.infer_model()
model.interpret_transcriptome()

▶️ | linear/create_model                                                                                 
✅ | linear/create_model                                                                                 
▶️ | linear/create_scheme                                                                                
✅ | linear/create_scheme                                                                                
▶️ | linear/infer_model                                                                                  


  0%|                                                                                                         …

  0%|                                                                                                         …

✅ | linear/infer_model                                                                                  
▶️ | linear/interpret_transcriptome                                                                      


  0%|          | 0/5 [00:00<?, ?it/s]

✅ | linear/interpret_transcriptome                                                                      


In [116]:
model.interpret_overexpression()

▶️ | linear/interpret_overexpression                                                                     


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

✅ | linear/interpret_overexpression                                                                     


In [117]:
model