In [1]:
import os
import torch
import argparse
import matplotlib.pyplot as plt

# 0: Downloading the Schiebinger Dataset

In [2]:
%%bash
./SchiebingerDownload.sh

--- Schiebinger Dataset Downloader ---
Directory Data/Schiebinger already exists and is not empty.
Continuing: will attempt to extract/unpack any archives found in this directory.
Skipping cleanup as no new download was performed.
---
Dataset is located in: Data/Schiebinger


# 1: Extracting HVGs of chosen Branch


In [3]:
data_directory = "Data/Schiebinger"
output_directory = "Pipeline/HVGs"
trunk = "2i"  # Options: "serum", "2i", "both"
num_highly_variable_genes = 2000

# Define the specific output filename
tensor_filename = f"schiebinger_hvg_tensor_trunk-{trunk}_{num_highly_variable_genes}hvg.pt"
tensor_path = os.path.join(output_directory, tensor_filename)

reextract = False


In [4]:
from Pipeline.firstSelectHVGs import run_hvg_extraction

if os.path.exists(tensor_path) and not reextract:
    print(f"File {tensor_filename} already exists. Skipping HVG extraction.")
else:
    print(f"Starting HVG extraction...")
    # --- 3. Call the function directly with your parameters ---
    # This is clean, robust, and doesn't involve any argparse messiness.
    hvg_fig = run_hvg_extraction(
        data_dir=data_directory,
        output_dir=output_directory,
        output_file=tensor_filename,
        trunk=trunk,
        n_hvg=num_highly_variable_genes,
        min_counts=2000,  
        max_counts=50000,
        min_cells=50,
        debug=True
    )

    if hvg_fig:
        print("\nDisplaying diagnostic plot:")
        plt.show()



File schiebinger_hvg_tensor_trunk-2i_2000hvg.pt already exists. Skipping HVG extraction.


# 2: Training the Autoencoder

In [5]:
model_save_path = f"Models/Autoencoder/trunk-{trunk}.pt"
latent_save_path = f"LatentSpace/trunk-{trunk}_latent.pt"

bottleneck = 24

latent_dims = [660, 220 , 66, bottleneck]

batch_size = 64
overdispersion = 0.3

num_epochs = 30
Training = False



In [6]:
from Pipeline.secondTrainAutoencoder import run_autoencoder_training

if os.path.exists(model_save_path) and not Training:
    print(f"Model already exists at {model_save_path}. Skipping training.")
else:
    # Call the training function directly with clear, explicit parameters.
    # This is robust, readable, and provides full IDE support.
    run_autoencoder_training(
        tensor_file=tensor_path,
        model_save_path=model_save_path,
        latent_save_path=latent_save_path,
        latent_dims=latent_dims,
        num_epochs=num_epochs,
        batch_size=batch_size,
        overdispersion=overdispersion,
        lr=5e-4,      
        val_split=0.2, 
        debug=False
    )

Model already exists at Models/Autoencoder/trunk-2i.pt. Skipping training.


## 2.1: Visualizing the Latent Space

In [7]:
from Genodesic.Visualizers import UMAP3D

latent_data_bundle = torch.load(latent_save_path)

# Extract the numpy arrays for plotting
latent_reps = latent_data_bundle['latent_reps'].numpy()
timepoints = latent_data_bundle['timepoints'].numpy().flatten()



print(f"Loaded {latent_reps.shape[0]} latent vectors.")

# --- 3. Call your visualization function ---
UMAP3D(
    latent_reps=latent_reps,
    color_by_timepoints=timepoints,
    title=f"Latent Space UMAP (Trunk: {trunk})"
)

cuML found. Using GPU for UMAP acceleration.
Loaded 172756 latent vectors.
--- Starting 3D Visualization ---
Fitting UMAP model...
Creating k3d plot...
Coloring by continuous timepoints and adding a color bar.
--- Visualization Complete ---


Plot(antialias=3, axes=['x', 'y', 'z'], axes_helper=1.0, axes_helper_colors=[16711680, 65280, 255], background…

# 3: Setting up Density Models

In [9]:
from Scripts.train import run_training

notebook_overrides = {
    "model_type": "rqnsf", # Options: "vpsde", "otcfm", "rqnsf"
    "data_file": latent_save_path, 
    "model_save_path": "Models/DensityModels/rqnsf.pt",
    "dim": bottleneck,
    "num_epochs": 50,
    "batch_size": 64
}

trained_model = run_training(config_overrides=notebook_overrides)

INFO: Loading default configuration from Config/models.yaml
INFO: Merging notebook overrides into config.
--- Running Training for RQNSF ---
INFO: Setting up dataloaders...
INFO: Initializing model...
INFO: Starting training loop...


AttributeError: 'SequenceINN' object has no attribute 'dim'