In [None]:
import re
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.backends.backend_pdf import PdfPages
from collections import defaultdict

# Plotting Style Configuration
plt.rcParams['figure.dpi'] = 400
plt.rcParams['figure.figsize'] = [12, 10]
plt.rcParams['font.size'] = 14

# Analysis Configuration
base_dir = '/Users/leodrake/Documents/MIT/ss433/HRC_2024/2Dfits'
center_pixel = 80.5
g1_component = 'core' # Updated to match the new labels

# Dynamic File Configuration
num_comps = 4
sigma_val = 1
psf_type = 'empPSF' # Set to 'empPSF' or '' (empty string)
bin_size = 0.25 
range_str = '000000-160160' 

# Logic to handle the dash for psf_type
# If psf_type is empty, psf_insert is empty. If set, it adds a leading dash
psf_insert = f"-{psf_type}" if psf_type else ""

# Calculate pixel scale
pixscale_arcsec = 0.13175 * bin_size

In [None]:
def load_sherpa_log_to_dataframe(filename):
    """
    reads raw sherpa log, dynamically renames components 
    (core, east, west) based on physics, and returns a dataframe.
    """
    # compiled regex for performance
    obs_id_re = re.compile(r"Observation:\s*(\d+)")
    date_re = re.compile(r"Date:\s*([\d\.]+).*?Exptime:\s*([\d\.]+)")
    
    # regex for parameters (captures g1.xpos, g2.ampl, etc.)
    param_line_re = re.compile(r"^\s*(g\d+|c\d+)\.(?P<param>[a-z0-9]+)\s+(?P<val>[-\d.eE]+)\s+(?P<low>[-\d.eE]+|-------)\s+(?P<up>[-\d.eE]+|-------)", re.M)
    
    # NEW: regex for count rates to get nominal, plus_err, minus_err
    # Matches lines like: "  g1    :  1.5056  (-0.2768/+0.3018)"
    rate_line_re = re.compile(r"^\s*(g\d+|c\d+)\s*:\s*(?P<val>[-\d.eE]+)\s*\((?P<low>[-\d.eE+]+)\s*/\s*(?P<up>[-\d.eE+]+)\)", re.M)

    try:
        with open(filename, 'r') as f:
            raw_text = f.read()
    except FileNotFoundError:
        print(f"error: file not found at {filename}")
        return pd.DataFrame()

    # split into blocks by observation
    obs_blocks = re.split(r'(?=Observation:)', raw_text)
    
    rows = []
    
    for block in obs_blocks:
        if not block.strip(): continue

        # 1. extract metadata
        obs_match = obs_id_re.search(block)
        if not obs_match: continue
        obs_id = int(obs_match.group(1))
        
        date_match = date_re.search(block)
        if date_match:
            mjd, exptime = float(date_match.group(1)), float(date_match.group(2))
        else:
            mjd, exptime = (np.nan, np.nan)

        # 2. extract components and parameters into temp dict
        # comps[comp_id][param] = (val, low, up)
        comps = defaultdict(dict)
        
        for match in param_line_re.finditer(block):
            c_id = match.group(1)
            param = match.group('param')
            val = float(match.group('val'))
            low_str = match.group('low')
            up_str = match.group('up')
            comps[c_id][param] = (val, low_str, up_str)

        # 3. extract count rates into temp dict
        # rates[comp_id] = (val, low, up)
        rates = {}
        for match in rate_line_re.finditer(block):
            c_id = match.group(1)
            val = float(match.group('val'))
            low_val = float(match.group('low'))
            up_val = float(match.group('up'))
            # Calculate absolute errors
            if low_val < 0:
                minus_err = abs(low_val)
                plus_err = abs(up_val)
            else:
                minus_err = abs(up_val)
                plus_err = abs(low_val)
            rates[c_id] = (val, minus_err, plus_err)

        # 4. logic: identify core, east, west
        g_ids = [k for k in comps.keys() if k.startswith('g')]
        if not g_ids: continue 

        def get_val(cid, p): return comps[cid].get(p, (0,0,0))[0]

        # identify core (max amplitude)
        core_id = max(g_ids, key=lambda c: get_val(c, 'ampl'))
        
        # identify jets (remaining sorted by xpos)
        remaining = [c for c in g_ids if c != core_id]
        remaining_sorted = sorted(remaining, key=lambda c: get_val(c, 'xpos'))
        
        mapping = {core_id: 'core'}
        if remaining_sorted:
            mapping[remaining_sorted[0]] = 'east' 
        if len(remaining_sorted) > 1:
            mapping[remaining_sorted[-1]] = 'west'
            
        if len(remaining_sorted) > 2:
            extras = remaining_sorted[1:-1]
            for i, extra_id in enumerate(extras, start=1):
                mapping[extra_id] = f'extra_{i}'
        
        for c_id in comps:
            if c_id.startswith('c'):
                mapping[c_id] = 'bkg'

        # 5. build rows for dataframe
        for old_id, new_name in mapping.items():
            if old_id not in comps: continue
            
            row = {
                'obs_id': obs_id,
                'mjd': mjd,
                'exptime': exptime,
                'component': new_name
            }
            
            # flatten parameters
            for param, (val, low_s, up_s) in comps[old_id].items():
                row[param] = val
                row[f'{param}_minus'] = low_s
                row[f'{param}_plus'] = up_s
            
            # Add count rate data (nominal, plus_err, minus_err)
            if old_id in rates:
                r_val, r_min, r_plus = rates[old_id]
                row['nominal'] = r_val
                row['minus_err'] = r_min
                row['plus_err'] = r_plus
            else:
                # Fallback if rate block missing
                row['nominal'] = np.nan
                row['minus_err'] = np.nan
                row['plus_err'] = np.nan

            rows.append(row)

    df = pd.DataFrame(rows)
    
    if df.empty:
        return df

    # 6. cleanup types
    err_cols = [c for c in df.columns if c.endswith('_minus') or c.endswith('_plus')]
    for col in err_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce').abs()
    
    if df['mjd'].isna().all() and 'obs_id' in df.columns:
        df['mjd'] = df['obs_id'].astype('category').cat.codes

    return df

In [None]:
# Load Data directly from Raw Log
# construct the raw filename dynamically based on cell 1 configuration
raw_filename = f'multi-comp-fit-results-{num_comps}comp-{sigma_val}sigma-jittercorr{psf_insert}-{range_str}.txt'
raw_file_path = os.path.join(base_dir, 'multi comp fit results', raw_filename)

print(f"Loading and processing: {raw_filename}")
df = load_sherpa_log_to_dataframe(raw_file_path)

# check if data loaded
if df.empty:
    raise ValueError("DataFrame is empty. Check if the file exists and has correct format.")

# Save Clean Readable File (CSV)
clean_filename = f'tracker-{num_comps}comp-{sigma_val}sigma-jittercorr{psf_insert}-{range_str}.csv'
clean_file_path = os.path.join(base_dir, 'component tracking results', clean_filename)
df.to_csv(clean_file_path, index=False)
print(f"Clean readable file saved to: {clean_file_path}")

# Vectorized Recenter on Core
# isolate reference (core) positions
ref_df = df[df['component'] == g1_component][['obs_id', 'xpos', 'ypos']]
ref_df = ref_df.rename(columns={'xpos': 'ref_x', 'ypos': 'ref_y'})

# merge reference positions back into the main dataframe
df = df.merge(ref_df, on='obs_id', how='left')

# fill missing reference positions with center pixel
df['ref_x'] = df['ref_x'].fillna(center_pixel)
df['ref_y'] = df['ref_y'].fillna(center_pixel)

# calculate offsets vectorized
df['dx'] = df['ref_x'] - center_pixel
df['dy'] = df['ref_y'] - center_pixel

# apply displacement
df['xpos'] -= df['dx']
df['ypos'] -= df['dy']

# cleanup columns
df.drop(columns=['dx', 'dy', 'ref_x', 'ref_y'], inplace=True)

# Calculate Offsets, PA, Radius, and Propagate Errors
df['xoff'] = df['xpos'] - center_pixel
df['yoff'] = df['ypos'] - center_pixel

pa_rad = np.arctan2(-df['xoff'], df['yoff'])
df['PA'] = np.degrees(pa_rad)
df['pa_rad'] = pa_rad 

d2 = df['xoff']**2 + df['yoff']**2
dpa_dx = np.divide(-df['yoff'], d2, out=np.full_like(d2, np.nan), where=d2 != 0)
dpa_dy = np.divide(df['xoff'], d2, out=np.full_like(d2, np.nan), where=d2 != 0)

df['PA_err_plus'] = np.degrees(np.sqrt((dpa_dx * df['xpos_plus'])**2 + (dpa_dy * df['ypos_plus'])**2))
df['PA_err_minus'] = np.degrees(np.sqrt((dpa_dx * df['xpos_minus'])**2 + (dpa_dy * df['ypos_minus'])**2))

df['radius'] = np.hypot(df['xoff'], df['yoff']) * pixscale_arcsec

r_pix = df['radius'] / pixscale_arcsec
is_zero = np.isclose(r_pix, 0)
df['radius_plus_err'] = np.where(is_zero, np.hypot(df['xpos_plus'], df['ypos_plus']), np.sqrt((df['xoff']*df['xpos_plus'])**2 + (df['yoff']*df['ypos_plus'])**2)/r_pix) * pixscale_arcsec
df['radius_minus_err'] = np.where(is_zero, np.hypot(df['xpos_minus'], df['ypos_minus']), np.sqrt((df['xoff']*df['xpos_minus'])**2 + (df['yoff']*df['ypos_minus'])**2)/r_pix) * pixscale_arcsec

# Add a flag column for suspect data
# Note: This logic previously flagged 'g2'. Since 'g2' is now relabeled, 
# you may need to update this logic if you want to flag specific components.
df.sort_values('mjd', inplace=True)
df['flag'] = 'clean'

# Pivot Data and Save
# Get the number of components *before* pivoting
n_comps_found = df['component'].nunique()

# Use the configuration variables for the output filename to match input
output_csv_name = f'gaussian-component-data-{n_comps_found}-comps-{sigma_val}sigma-jittercorr{psf_insert}-{range_str}.csv'
output_csv_filename = os.path.join(base_dir, output_csv_name)

# Perform pivot
pivoted = df.pivot_table(index='mjd', columns='component', values=['nominal', 'plus_err', 'minus_err'])
df_nom, df_plus, df_minus = [pivoted[val].sort_index() for val in ['nominal', 'plus_err', 'minus_err']]

# Save the main DataFrame
df.to_csv(output_csv_filename, index=False)
print(f"DataFrame with {n_comps_found} components saved to {output_csv_filename}")

In [None]:
grouped_by_comp = df.groupby('component')

# Create a dictionary to map components to colors
# Exclude the reference component (core) from the color loop so it doesn't shift
comps = [c for c in df_nom.columns if c != g1_component]
colors = ['dodgerblue', 'mediumseagreen',  'mediumslateblue','lightcoral']
comp_colors = {comp: colors[i % len(colors)] for i, comp in enumerate(comps)}

n = len(comps)
delta = 0.02
offsets = {c: (i - (n - 1) / 2) * delta for i, c in enumerate(comps)}

# Dynamic PDF name
pdf_name = f'position-rates-plots-{n+1}comp-{sigma_val}sigma-jittercorr{psf_insert}-{range_str}.pdf'
pdf_filename = os.path.join(base_dir, pdf_name)

with PdfPages(pdf_filename) as pdf:
    # First figure: PA vs time + stacked count rates
    fig = plt.figure(figsize=(12, 6))
    gs  = GridSpec(n, 2, figure=fig, width_ratios=[1,1], hspace=0, wspace=0.3)

    # Left: PA vs time
    ax_pa = fig.add_subplot(gs[:,0])
    for comp, color in comp_colors.items():
        if comp in grouped_by_comp.groups:
            grp = grouped_by_comp.get_group(comp)
            
            # Plot all components
            # (Flagging logic removed as it depended on 'g2' specifically)
            ax_pa.errorbar(
                grp['mjd'], grp['PA'],
                yerr=[grp['PA_err_minus'], grp['PA_err_plus']],
                marker='.', linestyle='-', capsize=3, color=color, label=comp
            )
            
    ax_pa.set_ylabel('Position Angle (°)')
    ax_pa.set_xlabel('MJD')
    ax_pa.set_title('Position Angle vs Time')
    ax_pa.set_ylim(-180,180)
    ax_pa.grid(True)
    ax_pa.legend()

    # Right: stacked count-rate panels
    ax_bottom = None
    for i_comp, focus in reversed(list(enumerate(comps))):
        if i_comp == n - 1:
            ax = fig.add_subplot(gs[i_comp, 1])
            ax.set_xlabel('MJD')
            ax_bottom = ax
        else:
            ax = fig.add_subplot(gs[i_comp, 1], sharex=ax_bottom)
            ax.tick_params(labelbottom=False)
        ax.grid(True, zorder=0)
        
        for comp_name, current_color in comp_colors.items():
            if comp_name not in df_nom.columns: continue
            
            x_val = df_nom.index + offsets[comp_name]
            y_val = df_nom[comp_name]
            y_err_val = [df_minus[comp_name], df_plus[comp_name]]
            
            alpha_val, line_style, label_text, z_order = (1.0, '-', comp_name, 10) if comp_name == focus else (0.3, '', None, 1)
            
            ax.errorbar(
                x_val, y_val, yerr=y_err_val, color=current_color,
                marker='.', linestyle=line_style, capsize=3,
                alpha=alpha_val, label=label_text, zorder=z_order
            )

        ax.set_yticks([0.1,0.3])
        if ax.has_data():
             ax.legend(loc='upper left')

    fig.text(0.73, 0.885, 'Component Count Rates', ha='center', va='bottom', fontsize=17)
    fig.text(0.495, 0.5, 'Count rate (counts/s)', va='center', rotation='vertical')
    
    pdf.savefig(fig)
    plt.close(fig) # Prevent inline display

    # Second figure: polar plot of PA on sky
    fig_polar = plt.figure(figsize=(8,6))
    ax_polar = fig_polar.add_subplot(111, projection='polar')
    ax_polar.set_theta_zero_location('N')
    ax_polar.set_theta_direction(1)
    ax_polar.set_thetamin(-180)
    ax_polar.set_thetamax(180)
    ax_polar.set_rlabel_position(135)

    for comp, color in comp_colors.items():
        if comp in grouped_by_comp.groups:
            grp = grouped_by_comp.get_group(comp)
            
            ax_polar.errorbar(
                grp['pa_rad'], grp['radius'],
                xerr=[np.deg2rad(grp['PA_err_minus'].fillna(0)), np.deg2rad(grp['PA_err_plus'].fillna(0))],
                yerr=[grp['radius_minus_err'].fillna(0), grp['radius_plus_err'].fillna(0)],
                marker='.', linestyle='', color=color, capsize=3, label=comp
            )

    angles = np.arange(-150, 180, 30)
    angles = np.append(angles, 180)
    ax_polar.set_rmin(0)
    ax_polar.set_thetagrids(angles, [f"{int(a)}°" for a in angles])
    ax_polar.set_title('On Sky Component Positions (arcsec)')
    ax_polar.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))

    pdf.savefig(fig_polar)
    plt.close(fig_polar) # Prevent inline display
    
print(f"Plots saved to: {pdf_filename}")