In [18]:
import os
import pandas as pd
import re

In [19]:
import os
import pandas as pd
import re

def extract_period_info(file_path):
    """Extracts mean period and standard deviation from a given file."""
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Debugging output
    print(f"\nReading file: {file_path}")
    print(content[:500])  # Print first 500 characters to verify format

    mean_match = re.search(r"Mean\s+period:\s*([\d\.]+)", content)
    std_match = re.search(r"Standard\s+deviation\s+\(error\s+on\s+period\):\s*([\d\.eE+-]+)", content)

    if mean_match and std_match:
        mean_period = float(mean_match.group(1))
        std_dev = float(std_match.group(1))
        print(f"Extracted -> Mean Period: {mean_period}, Std Dev: {std_dev}")  # Debugging
        return mean_period, std_dev
    else:
        print("Pattern not found in file.")  # Debugging
        return None, None

def process_observations(base_directory):
    """Traverses the bootstrap_500 directory and extracts period data from all ProposalIDs."""
    categories = ['HE', 'ME', 'LE']
    data = {cat: [] for cat in categories}

    for proposal_id in os.listdir(base_directory):
        proposal_path = os.path.join(base_directory, proposal_id)
        if not os.path.isdir(proposal_path) or not proposal_id.startswith("P"):  # Ensure it's a ProposalID folder
            continue

        for obs_id in os.listdir(proposal_path):
            obs_path = os.path.join(proposal_path, obs_id, 'bootstrap_output')
            if not os.path.isdir(obs_path):
                continue

            for category in categories:
                category_dir = os.path.join(obs_path, category)
                if not os.path.isdir(category_dir):
                    continue
                
                for file in os.listdir(category_dir):
                    if file.endswith(".qdp"):
                        file_path = os.path.join(category_dir, file)
                        mean_period, std_dev = extract_period_info(file_path)
                        if mean_period is not None and std_dev is not None:
                            lc_identifier = file.split('_lcnet_')[0]  # Extract LC file identifier
                            data[category].append([lc_identifier, mean_period, std_dev])

    # Save the results to CSV files
    for category in categories:
        if data[category]:  # Only save if there is data
            df = pd.DataFrame(data[category], columns=['LC Identifier', 'Mean Period', 'Std Dev'])
            output_file = f"{base_directory}/{category}_period_data.csv"
            df.to_csv(output_file, index=False)
            print(f"Saved {category} data to {output_file}")
        else:
            print(f"No data found for {category}, CSV not created.")


In [20]:
# Example usage
base_directory = "bootstrap_500"  # Path where ProposalIDs are stored
process_observations(base_directory)


Reading file: bootstrap_500/P0504279/P0504279001/bootstrap_output/HE/P050427900103_HE_lcnet_27-250keV_0.05s_all_t-chi1.qdp
Mean period: 9.79503
Standard deviation (error on period): 2.14336e-05

Extracted -> Mean Period: 9.79503, Std Dev: 2.14336e-05

Reading file: bootstrap_500/P0504279/P0504279001/bootstrap_output/HE/P050427900101_HE_lcnet_27-250keV_0.05s_all_t-chi1.qdp
Mean period: 9.7951
Standard deviation (error on period): 9.15423e-06

Extracted -> Mean Period: 9.7951, Std Dev: 9.15423e-06

Reading file: bootstrap_500/P0504279/P0504279001/bootstrap_output/HE/P050427900102_HE_lcnet_27-250keV_0.05s_all_t-chi1.qdp
Mean period: 9.79466
Standard deviation (error on period): 0.000102116

Extracted -> Mean Period: 9.79466, Std Dev: 0.000102116

Reading file: bootstrap_500/P0504279/P0504279001/bootstrap_output/HE/P050427900104_HE_lcnet_27-250keV_0.05s_all_t-chi1.qdp
Mean period: 9.79501
Standard deviation (error on period): 9.29516e-06

Extracted -> Mean Period: 9.79501, Std Dev: 9.2951