In [None]:
import os
import subprocess

In [None]:
# ==============================================================================================================================
#                Configuration area: Please modify the parameters here.
# ==============================================================================================================================

# 1. Define the four core components of the dataset
GRAPH_TYPE      = "ScaleFree"
GRAPH_PARAMS    = "Gamma2.3_Dmin4"
COV_PARAMS      = "Rho0.5_D5_W0.5_Pos"
GRAPH_SAMPLE_ID = "1"
SIZE_PARAM      = "16kNodes"

# 2. Define the range of sample numbers you want to process (including start and end)  
SAMPLE_START_INDEX = 1
SAMPLE_END_INDEX = 20

# 3. Define your output folder name here!
OUTPUT_FOLDER_NAME = "synthetic/ScaleFree/ScaleFree_Dmin_Pos" 

# 4. please make sure you have a data/sythetic/GRAPH_TYPE folder under the root path  e.g.'Edge-Flow-Hypothesis-Tests/data/synthetic/SmallWorld'
DATA_DIR = "data"

In [None]:
# The following code is not required to be modified.

# Set path
project_root = os.path.abspath('.')
while not os.path.exists(os.path.join(project_root, 'requirements.txt')):
    project_root = os.path.dirname(project_root)
    if project_root == os.path.dirname(project_root):
        raise FileNotFoundError("can't find root path")

os.chdir(project_root)
print(f"Working directory set to project root: {os.getcwd()}")

# Define key paths relative to the project root
DATA_DIR = os.path.join("data", "synthetic", GRAPH_TYPE)
MAIN_SCRIPT_PATH = os.path.join("utilities", "hypothesis_testing", "main.py")

OUTPUTS_DIR = os.path.join("results", OUTPUT_FOLDER_NAME) 

In [None]:
# ==============================================================================================================================
#                  Execution area: The following code is not required to be modified.
# ==============================================================================================================================

# --- 1. Setting of routes and batch IDs ---
dataset_name = f"{GRAPH_TYPE}-{GRAPH_PARAMS}-{COV_PARAMS}-G{GRAPH_SAMPLE_ID}-{SIZE_PARAM}"
dataset_folder = os.path.join(DATA_DIR, dataset_name)
batch_id = dataset_name

print(f"Current Batch Run ID: {batch_id}")
print(f"Data source: {dataset_folder}")
print(f"Results will be saved in: {OUTPUTS_DIR}")
print(f"Processing samples from {SAMPLE_START_INDEX} to {SAMPLE_END_INDEX}")


# --- 2. Generate a list of files based on the configuration ---
dataset_files = [
    os.path.join(dataset_folder, f"{dataset_name}_Sample{i}.csv") 
    for i in range(SAMPLE_START_INDEX, SAMPLE_END_INDEX + 1)
]


# --- 3. Loop through the main analysis script ---

for dataset_path in dataset_files:
    if not os.path.exists(dataset_path):
        print(f"Warning: File not found, skipping: {dataset_path}")
        continue
    
    base_name = os.path.basename(dataset_path)
    dataset_id = os.path.splitext(base_name)[0]
    
    print(f"\n--- Processing: {base_name} ---")

    command = f'python -m utilities.hypothesis_testing.main --input "{dataset_path}" --id "{dataset_id}" --batch-id "{batch_id}" --output "{OUTPUTS_DIR}" --graph-type "{GRAPH_TYPE}" --no-independence'

    subprocess.run(command, shell=True, check=True)
    
    print(f"--- Finished: {base_name} ---")

print("\nAll specified datasets processed!")