In [2]:
import re
from scipy.stats import chi2

In [7]:
def extract_all_fit_stats(ncomp, sigma, jittercorr, low, high):
    """
    Reads the text file 'filename' and extracts all occurrences of
    an observation block. It extracts the observation id, the final fit statistic,
    and the degrees of freedom.
    
    Returns a list of tuples: [(obsid, final_stat, dof), ...].
    """
    
    sigma_str = f'{sigma}sigma' if sigma > 0 else 'noconf'
    jitter_str = '-jittercorr-' if jittercorr else '-'

    filename = f'/Users/leodrake/Documents/MIT/ss433/HRC_2024/2Dfits/multi comp fit results/multi-comp-fit-results-{ncomp}comp-{sigma_str}{jitter_str}{low}-{high}.txt'
    with open(filename, 'r') as f:
        text = f.read()
    
    pattern = re.compile(
        r"Observation:\s*(\S+).*?"
        r"Final fit statistic\s*=\s*([\d\.]+).*?"
        r"Degrees of freedom\s*=\s*(\d+)",
        re.DOTALL
    )
    
    results = []
    for match in pattern.finditer(text):
        obsid     = match.group(1)
        final_stat = float(match.group(2))
        dof        = int(match.group(3))
        results.append((obsid, final_stat, dof))
    return ncomp, results

# Extract
ncomp1, results1 = extract_all_fit_stats(4, 1, True, '0000', '4040')
ncomp2, results2 = extract_all_fit_stats(5, 1, True, '0000', '4040')

if len(results1) != len(results1):
    print("Warning: The two files have a different number of observation blocks.")

# Write comparison to an output file
output_file = f"/Users/leodrake/Documents/MIT/ss433/HRC_2024/2Dfits/fit comparisons/fit-comparison-results-4v5comp-1sigma-jittercorr-0000-4040.txt"
with open(output_file, 'w') as out:
    for ((obsid1, final1, dof1), (obsid2, final2, dof2)) in zip(results1, results2):
        obsid    = obsid1
        delta_C  = final1 - final2
        extra_dof = dof1 - dof2  # or just hard-code 3 if you know it
        p_value  = 1 - chi2.cdf(delta_C, extra_dof)
        
        out.write(f"Observation {obsid}:\n")
        out.write(f"   {ncomp1}-comp final statistic: {final1:.2f}\n")
        out.write(f"   {ncomp2}-comp final statistic: {final2:.2f}\n")
        out.write(f"   Delta C = {delta_C:.2f}\n")
        out.write(f"   Additional degrees of freedom: {extra_dof}\n")
        out.write(f"   Computed p-value: {p_value:.10f}\n")
        out.write("-" * 60 + "\n")

print(f"Comparison results saved to {output_file}")

Comparison results saved to /Users/leodrake/Documents/MIT/ss433/HRC_2024/2Dfits/fit comparisons/fit-comparison-results-4v5comp-1sigma-jittercorr-0000-4040.txt


In [6]:
import re
from scipy.stats import chi2
import os

# The `extract_all_fit_stats` function remains the same as before.
# It is included here for completeness.
def extract_all_fit_stats(ncomp, sigma, jittercorr, low, high, base_path):
    """
    Reads a text file and extracts all occurrences of an observation block.
    """
    sigma_str = f'{sigma}sigma' if sigma > 0 else 'noconf'
    jitter_str = '-jittercorr-' if jittercorr else '-'
    filename = f'{base_path}/multi-comp-fit-results-{ncomp}comp-{sigma_str}{jitter_str}{low}-{high}.txt'
    
    try:
        with open(filename, 'r') as f:
            text = f.read()
    except FileNotFoundError:
        print(f"Error: The file {filename} was not found.")
        return ncomp, []
        
    pattern = re.compile(
        r"Observation:\s*(\S+).*?"
        r"Final fit statistic\s*=\s*([\d\.]+).*?"
        r"Degrees of freedom\s*=\s*(\d+)",
        re.DOTALL
    )
    
    results = []
    for match in pattern.finditer(text):
        obsid = match.group(1)
        final_stat = float(match.group(2))
        dof = int(match.group(3))
        results.append((obsid, final_stat, dof))
        
    return ncomp, results

# ==============================================================================
# UPDATED FUNCTION
# ==============================================================================
def compare_and_write_results(comp_data1, comp_data2, params, output_path):
    """
    Compares two sets of fit results and writes the comparison to a file.
    This version automatically determines which model is simpler/more complex.
    """
    ncomp1, results1 = comp_data1
    ncomp2, results2 = comp_data2

    if not results1 or not results2:
        print("Cannot perform comparison due to missing data from one or both files.")
        return
        
    # --- Determine which model is simpler vs. more complex ---
    if ncomp1 > ncomp2:
        complex_ncomp, complex_results = ncomp1, results1
        simple_ncomp, simple_results = ncomp2, results2
    elif ncomp2 > ncomp1:
        complex_ncomp, complex_results = ncomp2, results2
        simple_ncomp, simple_results = ncomp1, results1
    else:
        print("Warning: Comparing two models with the same number of components. The F-test is not applicable.")
        # We can still write a side-by-side comparison.
        simple_ncomp, simple_results = ncomp1, results1
        complex_ncomp, complex_results = ncomp2, results2

    # --- Construct a consistent output filename ---
    sigma_str = f"{params['sigma']}sigma" if params['sigma'] > 0 else 'noconf'
    jitter_str = 'jittercorr' if params['jittercorr'] else 'no-jitter'
    output_file = (
        f"{output_path}/fit-comparison-results-"
        f"{simple_ncomp}v{complex_ncomp}comp-" # Sorted order
        f"{sigma_str}-{jitter_str}-{params['low']}-{params['high']}.txt"
    )

    # --- Create a dictionary for robust observation matching ---
    complex_results_dict = {obsid: (stat, dof) for obsid, stat, dof in complex_results}

    with open(output_file, 'w') as out:
        out.write(f"Comparison of {simple_ncomp}-component vs. {complex_ncomp}-component fits\n")
        out.write("=" * 70 + "\n\n")

        # Iterate through the simple model and find matches in the complex one
        for obsid_s, final_s, dof_s in simple_results:
            out.write(f"Observation {obsid_s}:\n")

            if obsid_s in complex_results_dict:
                final_c, dof_c = complex_results_dict[obsid_s]

                # Always calculate improvement as C_simple - C_complex
                delta_C_improvement = final_s - final_c
                extra_dof = dof_s - dof_c
                
                # Calculate p-value
                p_value = 1.0 # Default if test is not applicable
                if extra_dof > 0 and delta_C_improvement > 0:
                    p_value = 1 - chi2.cdf(delta_C_improvement, extra_dof)

                out.write(f"  {simple_ncomp}-comp final statistic (Simpler): {final_s:.2f} (dof={dof_s})\n")
                out.write(f"  {complex_ncomp}-comp final statistic (Complex): {final_c:.2f} (dof={dof_c})\n")
                out.write(f"  Delta C (Improvement): {delta_C_improvement:.2f}\n")
                out.write(f"  Additional degrees of freedom: {extra_dof}\n")
                out.write(f"  P-value (F-test): {p_value:.10f}\n")
            else:
                out.write(f"  - No matching result found for the {complex_ncomp}-component model.\n")
            
            out.write("-" * 60 + "\n\n")

    print(f"Comparison results successfully saved to {output_file}")


# ============================
# Main Script Execution
# ============================
if __name__ == "__main__":
    
    # --- Configuration ---
    INPUT_DIR = '/Users/leodrake/Documents/MIT/ss433/HRC_2024/2Dfits/multi comp fit results'
    OUTPUT_DIR = '/Users/leodrake/Documents/MIT/ss433/HRC_2024/2Dfits/fit comparisons'
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    PARAMS = {
        'sigma': 1,
        'jittercorr': True,
        'low': '0000',
        'high': '4040'
    }
    
    # --- Define Models to Compare ---
    # The order you list these does NOT matter.
    NCOMP_A = 4
    NCOMP_B = 5 
    
    # --- Data Extraction ---
    model_A_data = extract_all_fit_stats(NCOMP_A, **PARAMS, base_path=INPUT_DIR)
    model_B_data = extract_all_fit_stats(NCOMP_B, **PARAMS, base_path=INPUT_DIR)

    # --- Comparison and File Writing ---
    # Pass the data in any order. The function will figure it out.
    compare_and_write_results(model_A_data, model_B_data, PARAMS, OUTPUT_DIR)

Comparison results successfully saved to /Users/leodrake/Documents/MIT/ss433/HRC_2024/2Dfits/fit comparisons/fit-comparison-results-4v5comp-1sigma-jittercorr-0000-4040.txt
