This notebook helps you to set up a simple experiment to run CoRegTor on some data. You can simply fork this notebook. 

In [1]:
# Install coregtor if not already installed, then import it
try:
    import coregtor
except ImportError:
    %pip install coregtor
    import coregtor

# Additional imports
from pathlib import Path
import pandas as pd

## Exp config

In [2]:
config = {
  "target_gene": [""], ### change this to run for just one exp
  "create_model": {
    "model": "rf",
    "model_options": {
      "max_depth": 5,
      "n_estimators": 1000,
      "n_jobs":5
    }
  },
  "tree_paths": {},
  "create_context": {
    "method": "tree_paths"
  },
  "transform_context": [
    {
      "id": "default",
      "method": "gene_frequency",
      "normalize": False,
      "min_frequency": 1
    }
  ],
  "compare_context": [
    {
      "id": "default",
      "method": "cosine",
      "transformation_id": "default",
      "convert_to_distance": False
    }
  ],
  "output_dir": "$HOME/projects/temp-results",
  "checkpointing": False,
  "force_fresh": False,
  "input": {
    "expression": "$HOME/...",
    "tflist": "$HOME/..."
  }
}

## The full pipeline

In [None]:
def get_a_path(pth):
    return Path(os.path.expanduser(os.path.expandvars(pth))) 
    
expression_data = coregtor.read_GE_data(get_a_path(config["input"]["expression"]))
tflist = pd.read_csv(get_a_path(config["input"]["tflist"]),names=["gene_name"], header=None)

pipeline = Pipeline(expression_data,tflist,config)
pipeline.run()
details = pipeline.run_details()

if config["checkpointing"]:
    output_path = get_a_path(config["output_dir"]) / f"{pipeline.title}.json"
    with open(output_path, "w") as f:
        json.dump(details, f, indent=1)
else:
    print(details)

## To see individual steps

In [None]:
def run_single_target(target: str):
        if target not in expression_data.columns:
            raise ValueError(f"Target '{target}' not in expression data")
        
        # 2. Model input + training
        X, Y = coregtor.create_model_input(expression_data, target, tflist)
        
        # 3. Model training
        model_config = config.get("create_model")
        model = coregtor.create_model(X, Y, **model_config)
        print(model)
        # 4. Tree paths
        tree_paths_config = config.get("tree_paths")
        paths = coregtor.tree_paths(model, X, Y, **tree_paths_config)
    
        # 5. Create contexts
        create_context_config = config.get("create_context")

        print(paths)
        contexts = coregtor.create_context(paths, **create_context_config)
        
        # 6. Transform contexts
        transform_configs = config.get("transform_context", [])
        transform_results = []
        
        for t_config in transform_configs:
            transformed = coregtor.transform_context(contexts, **t_config)
            transform_results.append({"id": t_config["id"], "result": transformed})
        
        # 7. Compare contexts
        compare_configs = config.get("compare_context", [])
        comparison_results = []
        
        for c_config in compare_configs:
            transform_found = next((d for d in transform_results if d.get("id") == c_config.get("transformation_id")), None)
            if transform_found is None:
                continue
            transformed_data = transform_found["result"]
            matrix = coregtor.compare_context(transformed_data, **c_config)    
            comparison_results.append({"id": c_config["id"], "result": matrix})
        