#GenerateExperiment.ipynb

#DESCRIPTION

## This notebook the Experiment Setup entry point

---

This notebook is the entry point for initializing a modular experiment pipeline. It emphasizes a clean, reproducible setup: minimal manual edits, clear separation of configuration from logic, and version-pinned file downloads. The goal is to streamline the workflow—select modules, configure paths, and prepare the environment with minimal friction.

All implementation details are encapsulated in external modules. This notebook guides you through a standard sequence: configure once, select components, inject paths, and stage dependencies. Edit only the cells Experiment Variables and SELECT FILES TO USE; everything else should run as-is.


####Run Order

1.   Set up experiment environment based on the selected root path, chosen modules, and define commit hash.

2.   Stage files and folders, creates only the required folder structure for the modules chosen.

3.   Downloads only necessary files from GitHub based on selected modules and injects paths into configuration files.

4.   Finalize setup, it validates file existence, resolve dependencies, and prepare for pipeline execution.





#Experiment Variables

Define the variable for configuration you want to be used in the experience.




In [1]:
## IMPORT AND SCHEMA TYPES
"""
Typed schemas for registry entries to catch typos at development time.
Placed before CELL 02 so STIMULI/EXPERIMENTAL_PERIODS can be typed.
"""
from typing import TypedDict
from types import MappingProxyType as _RO

class StimSpec(TypedDict, total=False):
    name: str                     # exact CSV column title (e.g., "Stim0")
    trials: int | None            # expected onsets per run, or None if variable
    duration_sec: float | None    # stimulus length in seconds, or None if variable
    detection: tuple[int, int]    # (off_value, on_value) marking onsets
    ignore: bool                  # whether this channel is ignored for alignment/QA

class PeriodSpec(TypedDict, total=False):
    duration_sec: float           # period length in seconds

In [2]:
## IDENTITY & GROUPING

# Whether pose tracking was recorded/used for this dataset
POSE_SCORING: bool = False

# Ordered tokens parsed from filenames
#   "<Experimenter>-<Geno>-<Schema>-<Sex>-<Age>-<FH>-<Cam>-<Timestamp>-<FlyID>.<ext>"
FILENAME_STRUCTURE: list[str] = [
    "Experimenter","Genotype","Protocol","Sex","Age",
    "Setup","Camera","Timestamp","FlyID","Extension",
]

# Token that defines experimental groups
GROUP_IDENTIFIER: str = "Protocol"

# Experimental groups (keys serve as display labels)
#   - id_value: token value in filename that defines this group
#   - color:    hex code for plotting, or None → auto palette
GROUPS: dict[str, dict] = {
    "Control": {"id_value": "20Control_3BlackOut", "color": "#645769"},
    "Loom":    {"id_value": "20Loom_3BlackOut",    "color": "#E35B29"},
}

In [3]:
## STIMULUS REGISTRY & ALIGNMENT

"""
Describe all stimulus channels in tracked CSVs and pick the canonical aligner.

Rules:
- Dict keys are human-facing labels (e.g., "RedLED", "GreenLED").
- Field `name` is the exact CSV column name (e.g., "Stim0", "Stim1").
- `trials`: expected onsets per run, or None if variable.
- `duration_sec`: stimulus length in seconds, or None if variable.
- `detection`: (off_value, on_value) pair used to mark onsets.
- `ignore`: whether this channel is ignored for alignment/QA.
"""

# Name in STIMULI chosen as the canonical alignment stimulus
ALIGNMENT_STIM: str = "VisualStim"

# Stimulus registry
STIMULI: dict[str, StimSpec] = {
    "VisualStim": {
        "name": "VisualStim",
        "trials": 23,
        "duration_sec": 0.5,
        "detection": (0, 1),
        "ignore": False,
    },
    "RedLED": {
        "name": "Stim0",
        "trials": 0,
        "duration_sec": 0.5,
        "detection": (0, 1),
        "ignore": False,
    },
    "GreenLED": {
        "name": "Stim1",
        "trials": 0,
        "duration_sec": 0.5,
        "detection": (0, 1),
        "ignore": False,
    },
}

In [4]:
## PERIOD SCHEDULE
"""
Declare per-period durations in seconds. Frames and ranges are derived later.
"""

# Periods
# - EXPERIMENTAL_PERIODS must contain at least one non-aggregate entry ('Baseline').
# - "Experiment" always exists as the aggregate, even if only one period is defined.
EXPERIMENTAL_PERIODS: dict[str, PeriodSpec] = {
    "Baseline":    {"duration_sec": 300.0},
    "Stimulation": {"duration_sec": 300.0},
    "Rescovery": {"duration_sec": 300.0},
}


In [5]:
## TIMEBASE & ARENA
"""
Define the sampling rate and arena geometry.
"""

# Master tolerance [frames].
NOISE_TOLERANCE: int = 2

# Video frame rate [fps]
FRAME_RATE: int = 60

# Arena width [mm]
ARENA_WIDTH_MM: float = 30.0

# Arena height [mm]
ARENA_HEIGHT_MM: float = 30.0

# LEGACY COMPABILITY

In [6]:
# LEGACY!!! Experiment variables

# Enable pose-derived metrics processing
POSE_SCORING = True  # include pose‑derived metrics from SLEAP

# Stimulus alignment configuration
ALIGNMENT_COL         = "VisualStim"  # column holding stimulus pulses (0→1)
STIMULUS_NUMBER       = 20            # expected onsets per run
STIMULUS_DURATION_SEC = 0.5           # stimulus length (sec)
EXPECTED_STIMULUS     = STIMULUS_NUMBER + 3  # extra events (e.g. lights‑off)


# Timing & arena dimensions
FRAME_RATE      = 60   # frames per second
ARENA_WIDTH_MM  = 30   # arena width (millimetres)
ARENA_HEIGHT_MM = 30   # arena height (millimetres)

# Experimental periods durations (sec)
EXPERIMENTAL_PERIODS = {
    "Baseline":    {"duration_sec": 300},
    "Stimulation": {"duration_sec": 300},
    "Recovery":    {"duration_sec": 300},
}

# Filename and grouping metadata
FILENAME_STRUCTURE = [  # order of fields in scored filenames
    "Experimenter", "Genotype", "Protocol", "Sex", "Age",
    "Setup", "Camera", "Date", "FlyID", "Extension",
]

GROUP_IDENTIFIER = "Protocol"  # metadata field used for grouping runs

# Experimental groups (keep 'Loom' as protocol label)
EXPERIMENTAL_GROUPS = {
    "Control": {
        "label": "Control",                # group name
        "idValue": "20Control_3BlackOut",  # identifier in filename metadata
        "color": "#645769",                # plot color
    },
    "Loom": {
        "label": "Loom",
        "idValue": "20Loom_3BlackOut",
        "color": "#E35B29",
    },
}


# SELECT THE RUN FILES YOU WANT TO USE


In [7]:
## IF true the script will download the files and its dependencies
Folder_Name = "Moita" # @param {"type":"string","placeholder":"insert text here"}
BONFLY_bonsai = True # @param {"type":"boolean"}
BONFLY_protocols = True # @param {"type":"boolean"}
BONFLY_tracker = True # @param {"type":"boolean"}
BEHAVIOR_CLASSIFIER = True # @param {"type":"boolean"}

BEHAVIOR_SCORING_RUN = True
BONSAI = True
CREATE_DATAFRAMES_RUN = False

#CHOOSE EXPERIMENT FOLDER


##Script

In [8]:
from google.colab import drive
from IPython.display import display, clear_output
from ipyfilechooser import FileChooser
import ipywidgets as widgets
import os

# Mount Drive (optional if not mounted)
try:
    drive.mount('/content/drive')
except Exception:
    pass

chooser = FileChooser('/content/drive/MyDrive/')
chooser.title = 'Select the experiment folder'
chooser.show_only_dirs = True
##display(chooser)

continue_button = widgets.Button(description="Continue")
output = widgets.Output()


# Global variable to save folder path
selected_experiment_folder = None

def on_continue_clicked(b):
    global selected_experiment_folder
    with output:
        clear_output()
        if not chooser.selected or not os.path.isdir(chooser.selected):
            print("⚠️ Please select a valid experiment folder before continuing.")
            selected_experiment_folder = None
        else:
            selected_experiment_folder = chooser.selected
            print(f"✅ Folder selected: {selected_experiment_folder}")



continue_button.on_click(on_continue_clicked)
##display(continue_button, output)


Mounted at /content/drive


##Display

In [11]:
display(chooser)
display(continue_button, output)

FileChooser(path='/content/drive/MyDrive', filename='', title='Select the experiment folder', show_hidden=Fals…

Button(description='Continue', style=ButtonStyle())

Output()

# SCRIPT (DO NOT CHANGE)

In [10]:
# --- installs (quiet) ---
import sys, subprocess
def _pip_quiet(*args):
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", *args], check=True)

_pip_quiet("jinja2", "ipyfilechooser", "ipywidgets", "ipynbname")

# --- imports ---
import os, json, shutil, subprocess as sp, datetime
from jinja2 import Environment, FileSystemLoader

# try to detect the current notebook path automatically (Colab-safe)
try:
    import ipynbname
    nb_path = str(ipynbname.path())
    # Caso típico no Colab → devolve algo como "/fileId=1unIVIdAeG4eO5k3iLFgX5GWv6DkGwXMu"
    if nb_path.startswith("/fileId="):
        # Substitui pelo caminho real no Google Drive
        DEV_NOTEBOOK_PATH = "/content/drive/MyDrive/Matheus_e_Rodrigo/TestRun/_GenerateExperiment.ipynb"
    else:
        DEV_NOTEBOOK_PATH = nb_path
except Exception:
    # Fallback se ipynbname falhar → usa caminho manual no Drive
    DEV_NOTEBOOK_PATH = "/content/drive/MyDrive/Matheus_e_Rodrigo/TestRun/_GenerateExperiment.ipynb"

print("📒 Notebook path usado =", DEV_NOTEBOOK_PATH)

# --- globals ---
TMP_CLONE_PATH = "/tmp/experiencias_repo"
GIT_REPO_URL = 'https://github.com/rodrigprogram9/experiencias.git'
EXPERIMENTCONFIG_CELL2_INDEX = 5

# =========================================================
# Dependencies map
# =========================================================
DEPENDENCIES = {
    'CREATE_DATAFRAMES_RUN': {
        'flag': CREATE_DATAFRAMES_RUN,
        'files_to_copy': {
            'CreateDataFrames': ['CreateDataFramesFunctions.py', 'CreateDataFramesMain.py', '__init__.py'],
            'Config': [
                'ExperimentConfig.py',
                'CreateDataFramesConfig.py',
                'PathConfig.py',
                'BehaviorScoringColabConfig.py',
                '__init__.py',
            ],
        },
    },
    'BEHAVIOR_SCORING_RUN': {
        'flag': BEHAVIOR_SCORING_RUN,
        'files_to_copy': {
            'BehaviorScoring': ['BehaviorScoringFunctions.py', 'BehaviorScoringMain.py', '__init__.py'],
            'Config': [
                'ExperimentConfig.py',
                'BehaviorScoringConfig.py',
                'PathConfig.py',
                'BehaviorScoringColabConfig.py',
                '__init__.py',
            ],
        },
    },
}

# =========================================================
# Load code from notebook cell
# =========================================================
def load_cell_code_from_ipynb(ipynb_path, cell_index):
    if not ipynb_path or not os.path.exists(ipynb_path):
        raise FileNotFoundError(f"Notebook file not found: {ipynb_path}")
    with open(ipynb_path, "r", encoding="utf-8") as f:
        notebook = json.load(f)
    if "cells" not in notebook:
        raise ValueError("Invalid notebook structure")
    if cell_index >= len(notebook["cells"]):
        raise IndexError(f"Notebook does not have cell {cell_index}")
    return "".join(notebook["cells"][cell_index]["source"])

# =========================================================
# Clone repo (fresh) + log commit hash
# =========================================================
def clone_repo_once(dest_folder):
    # Remove any previous clone in /tmp to ensure fresh copy
    if os.path.exists(TMP_CLONE_PATH):
        print(f"🧹 Removing existing repo at {TMP_CLONE_PATH} ...")
        shutil.rmtree(TMP_CLONE_PATH)

    # Clone the repository
    print(f"Cloning repository from {GIT_REPO_URL} into {TMP_CLONE_PATH} ...")
    sp.run(['git', 'clone', GIT_REPO_URL, TMP_CLONE_PATH], check=True)
    print("✅ Repository cloning finished.")

    # Retrieve current commit hash
    commit_hash = sp.run(
        ['git', '-C', TMP_CLONE_PATH, 'rev-parse', 'HEAD'],
        stdout=sp.PIPE, text=True, check=True
    ).stdout.strip()

    # Get a list of tracked files in the repository
    file_list = sp.run(
        ['git', '-C', TMP_CLONE_PATH, 'ls-files'],
        stdout=sp.PIPE, text=True, check=True
    ).stdout.strip().split("\n")
    file_list_str = " | ".join(file_list)

    # Get current datetime
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Ensure the Protocols folder exists and define the log file path
    log_dir = os.path.join(dest_folder, "Codes")
    os.makedirs(log_dir, exist_ok=True)
    log_file = os.path.join(log_dir, "CommitHash.txt")

    # Append commit info + GitHub download instructions
    with open(log_file, "a", encoding="utf-8") as f:
        f.write(f"Access date: {now}\n")
        f.write(f"Commit Hash: {commit_hash}\n")
        f.write(f"List of downloaded files: \n"+"\n".join(file_list)+"\n")
        f.write(
            "\n--- How to download this exact version from GitHub ---\n"
            f"1. Open this URL in your browser:\n"
            f"   {GIT_REPO_URL.replace('.git', '')}/tree/{commit_hash}\n"
            "2. Click the green 'Code' button.\n"
            "3. Select 'Download ZIP'.\n"
            "4. Unzip the downloaded file to access the files for this commit.\n"
            "-------------------------------------------------------\n\n"
        )

    # Display information in the console
    print(f"📌 Commit hash: {commit_hash}")
    print(f"💾 Saved clone info to {log_file}")


# =========================================================
# Create folder structure
# =========================================================
def create_experiment_structure(main_folder):
    os.makedirs(os.path.join(main_folder, "Codes", "Config"), exist_ok=True)
    if DEPENDENCIES['BEHAVIOR_SCORING_RUN']['flag']:
        os.makedirs(os.path.join(main_folder, "Codes", "BehaviorScoring"), exist_ok=True)
    if DEPENDENCIES['CREATE_DATAFRAMES_RUN']['flag']:
        os.makedirs(os.path.join(main_folder, "Codes", "CreateDataFrames"), exist_ok=True)

# =========================================================
# Copy static files from repo
# =========================================================
def copy_files_for_run(main_folder, run_info):
    base_codes_path = os.path.join(main_folder, 'Codes')
    for subfolder, file_list in run_info['files_to_copy'].items():
        if not file_list:
            continue
        if subfolder == 'Config':
            dest_folder = os.path.join(base_codes_path, "Config")
        else:
            if run_info['flag'] == DEPENDENCIES['BEHAVIOR_SCORING_RUN']['flag']:
                dest_folder = os.path.join(base_codes_path, "BehaviorScoring")
            elif run_info['flag'] == DEPENDENCIES['CREATE_DATAFRAMES_RUN']['flag']:
                dest_folder = os.path.join(base_codes_path, "CreateDataFrames")
            else:
                dest_folder = base_codes_path
        os.makedirs(dest_folder, exist_ok=True)
        for filename in file_list:
            src_path = os.path.join(TMP_CLONE_PATH, filename)
            dest_path = os.path.join(dest_folder, filename)
            if not os.path.exists(dest_path) and os.path.exists(src_path):
                shutil.copy2(src_path, dest_path)

# =========================================================
# Ensure __init__.py exists
# =========================================================
def ensure_init_py_for_run(main_folder, run_key):
    base_codes_path = os.path.join(main_folder, 'Codes')
    run_folder_name = "BehaviorScoring" if run_key == 'BEHAVIOR_SCORING_RUN' else "CreateDataFrames"
    run_folder_path = os.path.join(base_codes_path, run_folder_name)
    config_init_path = os.path.join(base_codes_path, "Config", "__init__.py")
    target_init_path = os.path.join(run_folder_path, "__init__.py")
    if not os.path.exists(target_init_path) and os.path.exists(config_init_path):
        shutil.copy2(config_init_path, target_init_path)

# =========================================================
# Template helpers
# =========================================================
def inject_template_replace(template_name, target_file, placeholder, context):
    env = Environment(loader=FileSystemLoader(os.path.join(TMP_CLONE_PATH, "templates")))
    template = env.get_template(template_name)
    rendered_snippet = template.render(context)
    with open(target_file, "r", encoding="utf-8") as f:
        content = f.read()
    with open(target_file, "w", encoding="utf-8") as f:
        f.write(content.replace(placeholder, rendered_snippet))

def inject_template_after_marker(template_name, target_file, marker, context):
    env = Environment(loader=FileSystemLoader(os.path.join(TMP_CLONE_PATH, "templates")))
    template = env.get_template(template_name)
    rendered_snippet = template.render(context)
    with open(target_file, "r", encoding="utf-8") as f:
        lines = f.readlines()
    output_lines, inserted = [], False
    for line in lines:
        output_lines.append(line)
        if not inserted and marker in line:
            output_lines.append(rendered_snippet + "\n")
            inserted = True
    with open(target_file, "w", encoding="utf-8") as f:
        f.writelines(output_lines)

def inject_into_notebook_placeholder(notebook_path, placeholder, text_to_insert):
    with open(notebook_path, "r", encoding="utf-8") as f:
        nb = json.load(f)
    for cell in nb.get("cells", []):
        if cell.get("cell_type") == "code":
            cell["source"] = [line.replace(placeholder, text_to_insert) for line in cell.get("source", [])]
    with open(notebook_path, "w", encoding="utf-8") as f:
        json.dump(nb, f, indent=1)

# =========================================================
# Clone and copy Bonsai repo
# =========================================================
BONSAI_REPO_URL = "https://github.com/mth-farias/Personal-Bonfly"
TMP_BONSAI_PATH = "/tmp/bonsai_repo"

def clone_and_copy_bonsai(main_folder):
    base_codes_path = os.path.join(main_folder, 'Bonsai')
    os.makedirs(base_codes_path, exist_ok=True)

    # Remove any previous clone
    if os.path.exists(TMP_BONSAI_PATH):
        print(f"🧹 Removing existing repo at {TMP_BONSAI_PATH} ...")
        shutil.rmtree(TMP_BONSAI_PATH)

    # Clone the Bonsai repository
    print(f"Cloning Bonsai repository from {BONSAI_REPO_URL} into {TMP_BONSAI_PATH} ...")
    sp.run(['git', 'clone', BONSAI_REPO_URL, TMP_BONSAI_PATH], check=True)
    print("✅ Bonsai repository cloning finished.")

    # Copy all files into Bonsai folder
    for root, dirs, files in os.walk(TMP_BONSAI_PATH):
        rel_path = os.path.relpath(root, TMP_BONSAI_PATH)
        target_dir = os.path.join(base_codes_path, rel_path)
        os.makedirs(target_dir, exist_ok=True)
        for file in files:
            src_file = os.path.join(root, file)
            dest_file = os.path.join(target_dir, file)
            shutil.copy2(src_file, dest_file)

    print(f"💾 All Bonsai files copied into {base_codes_path}")

# =========================================================
# MAIN HANDLER
# =========================================================
def on_continue_clicked(b):
    with output:
        clear_output()

        main_folder = os.path.join(chooser.selected, Folder_Name)
        os.makedirs(main_folder, exist_ok=True)

        create_experiment_structure(main_folder)
        clone_repo_once(main_folder)

        # ⬇️ NEW: also clone Bonsai repo
        clone_and_copy_bonsai(main_folder)

        for run_key, run_info in DEPENDENCIES.items():
            if run_info['flag']:
                copy_files_for_run(main_folder, run_info)
                ensure_init_py_for_run(main_folder, run_key)

        # Copy BehaviorScoringRun.ipynb
        src_nb = os.path.join(TMP_CLONE_PATH, 'BehaviorScoringRun.ipynb')
        dest_nb = os.path.join(main_folder, 'Codes', 'BehaviorScoringRun.ipynb')
        if os.path.exists(src_nb):
            shutil.copy2(src_nb, dest_nb)

        # Context for templates
        context = {
            "experiment_root": main_folder,
            "experiment_config_cell2": load_cell_code_from_ipynb(DEV_NOTEBOOK_PATH, EXPERIMENTCONFIG_CELL2_INDEX)
        }

        # Inject PathConfig & ExperimentConfig
        inject_template_replace("PathConfig_root.j2", os.path.join(main_folder, "Codes", "Config", "PathConfig.py"), "__EXP_ROOT__", context)
        inject_template_after_marker("ExperimentConfig_cell2.j2", os.path.join(main_folder, "Codes", "Config", "ExperimentConfig.py"), "#%% CELL 01 – EXPERIMENT CONFIG", context)

        # Inject into BehaviorScoringRun.ipynb placeholder
        inject_into_notebook_placeholder(dest_nb, "__PLACEHOLDER_PATHCONFIG_ROOT__", os.path.join(main_folder, "Codes/Config/PathConfig.py").replace("\\", "/"))

        print("\n✅ All done!")

continue_button.on_click(on_continue_clicked)


📒 Notebook path usado = /content/drive/MyDrive/Matheus_e_Rodrigo/TestRun/_GenerateExperiment.ipynb
