In [None]:
import random
from pathlib import Path
import subprocess
import pandas as pd


def filter_sn_lightcurve_random(input_file):
    with open(input_file, 'r') as f:
        lines = f.readlines()

    t0 = None
    header = []
    obs_lines = []
    footer = []

    # 1. Parse the file
    for line in lines:
        if line.startswith('PEAKMJD:'):
            t0 = float(line.split()[1])
        
        if line.startswith('OBS:'):
            obs_lines.append(line)
        elif line.startswith('END_PHOTOMETRY:'):
            footer.append(line)
        elif not line.startswith('TRIGGER:'): 
            header.append(line)

    if t0 is None:
        raise ValueError("Could not find PEAKMJD in the header.")

    # 2. Categorize observations into pools
    pool_less_0 = []
    pool_greater_10 = []
    
    for line in obs_lines:
        t1 = float(line.split()[1])
        delta = t1 - t0
        
        if delta < 0:
            pool_less_0.append(line)
        if delta > 10:
            pool_greater_10.append(line)

    # Make sure we have enough data to satisfy the bounds
    if not pool_less_0:
        raise ValueError("No observations found where t1 - t0 < 0.")
    if not pool_greater_10:
        raise ValueError("No observations found where t1 - t0 > 10.")

    # 3. Randomly select the first two required rows
    selected_less_0 = random.choice(pool_less_0)
    selected_greater_10 = random.choice(pool_greater_10)
    
    # Keep track of what we've already picked so we don't duplicate
    already_selected = {selected_less_0, selected_greater_10}

    # 4. Create a pool for the remaining 5 rows, excluding the ones we just picked
    pool_in_range = []
    for line in obs_lines:
        if line in already_selected:
            continue  # Skip rows we already chose
            
        t1 = float(line.split()[1])
        delta = t1 - t0
        
        if -15 <= delta <= 60:
            pool_in_range.append(line)

    if len(pool_in_range) < 5:
        raise ValueError(f"Not enough unique observations in the [-15, 60] range. Found {len(pool_in_range)}, need 5.")

    # Randomly sample exactly 5 unique rows from this range
    selected_in_range = random.sample(pool_in_range, 5)

    # 5. Combine and sort chronologically
    final_selection = [selected_less_0, selected_greater_10] + selected_in_range
    final_selection.sort(key=lambda x: float(x.split()[1]))
    output_file = input_file
    # 6. Write out the new file
    with open(output_file, 'w') as f:
        for line in header:
            if line.startswith('NOBS:'):
                f.write(f"NOBS: {len(final_selection)}\n")
            else:
                f.write(line)
        
        for line in final_selection:
            f.write(line)
            
        for line in footer:
            f.write(line)



def process_dat_files(directory_path, action_func):
    """
    Finds every .dat file in the specified directory and applies 
    the action_func to it.
    """
    # Create a Path object for the target directory
    directory = Path(directory_path)
    
    # Check if the directory actually exists to avoid errors
    if not directory.is_dir():
        print(f"Error: The directory '{directory_path}' does not exist.")
        return

    # Use .glob() to find all files ending in .dat
    for file_path in directory.glob('*.dat'):
        # Pass the file path to your custom action function
        action_func(file_path)

def extract_columns(filename, columns_to_extract):
    headers = []
    data = []
    
    # Read through the file line by line
    with open(filename, 'r') as f:
        for line in f:
            if line.startswith('VARNAMES:'):
                # Get the column names, excluding the 'VARNAMES:' prefix
                headers = line.strip().split()[1:]
            elif line.startswith('SN:'):
                # Get the data values, excluding the 'SN:' prefix
                row = line.strip().split()[1:]
                data.append(row)
                
    # Create a DataFrame
    df = pd.DataFrame(data, columns=headers)
    
    # Convert string columns to numeric where possible
    df = df.apply(pd.to_numeric, errors='ignore')
    
    # Filter and return only the requested columns
    return df[columns_to_extract]

def run_exe_and_do_thing(omega_w):
    """
    Launches an executable with arguments, waits for it to finish, 
    and then executes the next steps.
    omega_w is a list of strings, e.g. ["OMEGA_MATTER 0.3", "w 0.7"]
    """
    ## nomi dei files
    nome_run = "TEST1" ## it is like this in the two input files, watch out for it!
    nome_file_input = "sim_SDSS_custom.input"
    nome_file_nml = "snfit_SDSS_custom.nml"
    ## paths
    snana_dir = "~/SNANA"
    bin_dir = f"{snana_dir}/SNDIR/bin"
    snlc_sim_path = f"{bin_dir}/snlc_sim.exe"
    snlc_fit_path = f"{bin_dir}/snlc_fit.exe"
    ## dir where we save snlc_sim.exe output .dat files 
    sim_dir_path = f"{snana_dir}/SNROOT/SIM/{nome_run}"
    ## dir of the input files
    snlc_sim_input = f"{snana_dir}/custom_input_files/{nome_file_input}"
    snlc_fit_input = f"{snana_dir}/custom_input_files/{nome_file_nml}"

    sim_command = [snlc_sim_path, snlc_sim_input] + omega_w
    fit_command = [snlc_fit_path, snlc_fit_input]

    result = subprocess.run(sim_command, capture_output=True, text=True, check=True)
    
    print("The .exe finished running successfully!")
    print(f"Here is what the .exe output: {result.stdout}")
    print("cleaning")
    process_dat_files(sim_dir_path, filter_sn_lightcurve_random)
    print("extracted 7 points from dat files")
    print(f"Launching: {snlc_fit_path}")
    result = subprocess.run(fit_command, capture_output=True, text=True, check=True)
    print(result.stdout)
    print("fit done, output should be in where u launched this script")
    columns = ['zHEL', 'zHELERR','zCMB', 'zCMBERR','zHD','zHDERR', 'mB', 'x1', 'c']
    fit_output = f"{snana_dir}/scripts/{nome_run}.FITRES.TEXT"
    extract_columns(fit_output, columns)





In [None]:
run_exe_and_do_thing([""])