In [11]:
import os
import re
from collections import defaultdict
from astropy.io import fits
import subprocess

In [12]:
# # --- CONFIGURATION ---
# base_dir = "/home/supremekai/thesis/J0243_6p6124/hxmt/hxmt_me_l3_products_pipeline"  # Set your base directory
# xcm_output_dir = "/home/supremekai/thesis/J0243_6p6124/hxmt/hxmt_spec_results"  # Where all .xcm files and log go
# os.makedirs(xcm_output_dir, exist_ok=True)

# log_path = os.path.join(xcm_output_dir, "xcm_generation.log")
# log_entries = []

# # --- DATA COLLECTION ---
# exp_dict = defaultdict(lambda: {'LE': {}, 'ME': {}, 'HE': {}})

# for root, dirs, files in os.walk(base_dir):
#     if "pipeline_output" in root:
#         for fname in files:
#             match = re.match(r"(.*?)_(LE|ME|HE)_spec(?:_g0)?\.(pha|rsp)", fname)
#             if match:
#                 expid, inst, ftype = match.groups()
#                 full_path = os.path.join(root, fname)
#                 exp_dict[expid][inst][ftype] = full_path
#             elif re.match(r"(.*?)_(LE|ME|HE)_specbkg\.(pha|bak)", fname):
#                 match = re.match(r"(.*?)_(LE|ME|HE)_specbkg\.(pha|bak)", fname)
#                 expid, inst, ftype = match.groups()
#                 full_path = os.path.join(root, fname)
#                 exp_dict[expid][inst]['bak'] = full_path

# # --- Generate .xcm ---
# for expid, inst_data in exp_dict.items():
#     spectrum_num = 1
#     xspec_lines = [
#         "statistic chi",
#         "cd ../"
#     ]
#     missing_items = []

#     for inst in ['LE', 'ME', 'HE']:
#         files = inst_data[inst]
#         if 'pha' not in files:
#             missing_items.append(f"{inst}.pha missing")
#             continue

#         xspec_lines.append(f"data {spectrum_num}:{spectrum_num} {files['pha']}")

#         if 'rsp' in files:
#             xspec_lines.append(f"response  1:{spectrum_num} {files['rsp']}")
#         else:
#             missing_items.append(f"{inst}.rsp missing")

#         if 'bak' in files:
#             xspec_lines.append(f"backgrnd {spectrum_num} {files['bak']}")
#         else:
#             missing_items.append(f"{inst}.bak missing")

#         spectrum_num += 1

#     if missing_items:
#         log_entries.append(f"Skipped {expid}: " + "; ".join(missing_items))
#         continue

#     # Append the rest of the xspec options
#     xspec_lines += [
#         "ignore 1:1-119,1053-1536 2:1-86,461-1024 3:1-5,25-256",
#         "method leven 10 0.01",
#         "abund wilm",
#         "xsect vern",
#         "cosmo 70 0 0.73",
#         "xset delta 0.01",
#         "systematic 0",
#         "bayes off"
#     ]

#     xcm_path = os.path.join(xcm_output_dir, f"{expid}.xcm")
#     with open(xcm_path, 'w') as f:
#         f.write("\n".join(xspec_lines))

# # --- Write log ---
# with open(log_path, 'w') as log_file:
#     if log_entries:
#         log_file.write("Skipped EXPOSURE_IDs with issues:\n")
#         for entry in log_entries:
#             log_file.write(entry + "\n")
#     else:
#         log_file.write("All EXPOSURE_IDs processed successfully.\n")


In [13]:
def collect_exposure_files(base_dir):
    exp_dict = defaultdict(lambda: {'LE': {}, 'ME': {}, 'HE': {}})

    for root, dirs, files in os.walk(base_dir):
        if "pipeline_output" in root:
            for fname in files:
                match = re.match(r"(.*?)_(LE|ME|HE)_spec(?:_g0)?\.(pha|rsp)", fname)
                if match:
                    expid, inst, ftype = match.groups()
                    full_path = os.path.join(root, fname)
                    exp_dict[expid][inst][ftype] = full_path
                elif re.match(r"(.*?)_(LE|ME|HE)_specbkg\.(pha|bak)", fname):
                    match = re.match(r"(.*?)_(LE|ME|HE)_specbkg\.(pha|bak)", fname)
                    expid, inst, ftype = match.groups()
                    full_path = os.path.join(root, fname)
                    exp_dict[expid][inst]['bak'] = full_path

    return exp_dict


In [14]:
def fix_header_if_needed(pha_path, logger):
    edited_path = pha_path.replace(".pha", "_editedheader.pha")
    try:
        with fits.open(pha_path) as hdul:
            if hdul[1].header.get("HDUCLAS2", "") == "ALL":
                hdul[1].header["HDUCLAS2"] = "TOTAL"
                hdul.writeto(edited_path, overwrite=True)
                logger.append(f"Header fixed: {edited_path}")
            else:
                edited_path = pha_path  # No need to change
    except Exception as e:
        logger.append(f"Header fix failed for {pha_path}: {str(e)}")
        edited_path = None
    return edited_path

def run_ftgrouppha(infile,grouptype, groupscale,respfile, logger):
    outfile = infile.replace("_editedheader.pha", "_grp_min.pha")
    cmd = [
        "ftgrouppha",
        f"infile={infile}",
        f"outfile={outfile}",
        f"respfile={respfile}",
        f"clobber=yes",
        f"grouptype={grouptype}",
        f"groupscale={groupscale}"
    ]

    # cmd= ["grppha",
    #       f"{infile}",
    #       f"{outfile}",
    #       f"clobber=yes",
    #       f"group {grouptype} {groupscale} & exit"
    # ]

    try:
        subprocess.run(cmd, check=True)
        logger.append(f"ftgrouppha success: {outfile}")
        return outfile
    except subprocess.CalledProcessError as e:
        logger.append(f"ftgrouppha failed: {infile} - {str(e)}")
        return None



In [15]:
def generate_xcm(exp_dict, output_dir):
    log_entries = []
    os.makedirs(output_dir, exist_ok=True)

    for expid, inst_data in exp_dict.items():
        xspec_lines = [
            "statistic chi"
        ]
        spectrum_num = 1
        missing_items = []

        for inst in ['LE', 'ME', 'HE']:
            files = inst_data[inst]
            pha_file = files.get('pha')
            rsp_file = files.get('rsp')
            bak_file = files.get('bak')

            if not pha_file or not os.path.exists(pha_file):
                missing_items.append(f"{inst}: no grouped PHA")
                continue

            xspec_lines.append(f"data {spectrum_num}:{spectrum_num} {pha_file}")

            if rsp_file:
                xspec_lines.append(f"response 1:{spectrum_num} {rsp_file}")
            else:
                missing_items.append(f"{inst}: missing RSP")

            if bak_file:
                xspec_lines.append(f"backgrnd {spectrum_num} {bak_file}")
            else:
                missing_items.append(f"{inst}: missing BAK")

            spectrum_num += 1

        if missing_items:
            log_entries.append(f"Skipped {expid} due to missing files: {'; '.join(missing_items)}")
            continue

        xspec_lines += [
            "ignore 1:**-2.0 9.0-** 2:**-8.0 28.0-** 3:**-28.0  50.0-**",
            "method leven 10 0.01",
            "abund wilm",
            "xsect vern",
            "cosmo 70 0 0.73",
            "xset delta 0.01",
            "systematic 0",
            "bayes off"
        ]

        xcm_path = os.path.join(output_dir, f"{expid}_min.xcm")
        with open(xcm_path, 'w') as f:
            f.write("\n".join(xspec_lines))

        log_entries.append(f"Wrote XCM for {expid}: {xcm_path}")

    return log_entries


In [16]:
if __name__ == "__main__":
    base_dir = "/home/supremekai/thesis/J0243_6p6124/hxmt/hxmt_me_l3_products_pipeline"
    xcm_output_dir = "/home/supremekai/thesis/J0243_6p6124/hxmt/hxmt_spec_results"
    log_path = os.path.join(xcm_output_dir, "xcm_generation.log")

    groupscale=30
    grouptype="min" #other options are opt, optmin, smin

    os.makedirs(xcm_output_dir, exist_ok=True)
    log_entries = []

    # Step 1: Collect all relevant files
    exp_dict = collect_exposure_files(base_dir)

    # Step 2: Fix headers and run ftgrouppha
    for expid, inst_data in exp_dict.items():
        for inst in ['LE', 'ME', 'HE']:
            files = inst_data[inst]
            if 'pha' not in files:
                continue

            original_pha = files['pha']
            respfile=files['rsp']
            edited_pha = fix_header_if_needed(original_pha, log_entries)
            if not edited_pha:
                continue
            
            if inst=="LE":
                grouped_pha = run_ftgrouppha(edited_pha, grouptype, 100,respfile, log_entries)
            else:
                grouped_pha = run_ftgrouppha(edited_pha, grouptype, groupscale,respfile, log_entries)
            if grouped_pha:
                # Update exp_dict to point to grouped PHA
                files['pha'] = grouped_pha

    # Step 3: Generate XCM using updated paths
    log_entries += generate_xcm(exp_dict, xcm_output_dir)

    # Step 4: Write log
    with open(log_path, 'w') as log_file:
        if log_entries:
            log_file.write("\n".join(log_entries))
        else:
            log_file.write("All EXPOSURE_IDs processed successfully.\n")