In [None]:
import os
import glob
import pandas as pd
from utils import get_unsafe_channels  # Make sure this function is defined in utils.py

def combine_csv(dir_path, ifo):
    """
    Combine CSV files in the given directory after filtering out unsafe channels.
    
    Parameters:
      - dir_path (str): Directory containing CSV files.
      - ifo (str): Interferometer identifier (used to filter unsafe channels).
    
    Returns:
      DataFrame: A combined DataFrame whose columns are renamed based on each file.
    """
    # Use absolute path for consistency.
    dir_path = os.path.abspath(dir_path)
    all_files = glob.glob(os.path.join(dir_path, "*.csv"))
    if not all_files:
        raise FileNotFoundError(f"No CSV files found in directory: {dir_path}")
    
    # Get the list of unsafe channels and sanitize their names.
    chan_removes = get_unsafe_channels(ifo=ifo)['channel']
    chan_removes = [chan.replace(':', '_').replace('-', '_') for chan in chan_removes]
    
    # Filter out files whose basenames start with any unsafe channel.
    filtered_files = [
        file for file in all_files
        if not any(os.path.basename(file).startswith(chan) for chan in chan_removes)
    ]
    if not filtered_files:
        raise ValueError("No valid CSV files found after filtering unsafe channels.")
    
    combined_data = []
    column_names = []
    for file_path in filtered_files:
        try:
            # Use part of the filename to generate column names.
            base_name = os.path.basename(file_path).split('_14')[0]
            column_freq = f"{base_name}_freq"
            column_corr = f"{base_name}_corr"
            df = pd.read_csv(file_path, header=None)
            # If the file has 1 column, add a second column with NaN values.
            if df.shape[1] == 1:
                print(f"Warning: {file_path} has {df.shape[1]} column; expected 2. Adding a column of NaNs.")
                df[1] = pd.NA
            # If there are more than 2 columns, keep only the first 2.
            elif df.shape[1] > 2:
                df = df.iloc[:, :2]
            combined_data.append(df)
            column_names.extend([column_freq, column_corr])
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")
    
    if not combined_data:
        raise ValueError("No CSV files could be processed.")
    
    # Concatenate dataframes side by side.
    combined_df = pd.concat(combined_data, axis=1, ignore_index=True)
    if combined_df.shape[1] != len(column_names):
        raise ValueError("Mismatch between the number of columns and column names.")
    combined_df.columns = column_names
    return combined_df

# Example usage in a Jupyter Notebook cell:
combined_df = combine_csv("/home/shu-wei.yeh/coherence-monitor/channel_files/K1", "K1")
# display(combined_df.head())


In [None]:
# For example, assume we have a list of channel names from some previous processing:
channels = ["K1-PEM-VOLT_AS_TABLE_GND_OUT_DQ"]

# Write the channel list to an INI file.
ini_filename = "chanlist_O4.ini"
with open(ini_filename, "w") as f:
    # First line is the strain channel.
    f.write("K1:CAL-CS_PROC_DARM_STRAIN_DBL_DQ\n")
    # Write each witness channel, replacing the first underscore with a dash.
    for c in channels:
        new_c = c.replace("_", "-", 1)
        f.write(new_c + "\n")

print(f"Channels written to {ini_filename}")


In [None]:
# Read and display the contents of the INI file.
with open(ini_filename, "r") as f:
    ini_contents = f.read()

print("Contents of the INI file:")
print(ini_contents)

In [6]:
test_dir = '/home/shu-wei.yeh/coherence-monitor/channel_files/K1'  # Change to your test directory path.
filtered_files = glob.glob(os.path.join(test_dir, "*.csv"))
print("Test files:", filtered_files)

Test files: ['/home/shu-wei.yeh/coherence-monitor/channel_files/K1/imc_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/volt_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/psl_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/related_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/vis_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/lsc_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/asc_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/mic_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/omc_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/tms_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/cal_channels.csv', '/home/shu-wei.yeh/coherence-monitor/channel_files/K1/pem_channels.csv']


In [None]:
combined_data = []
column_names = []
for file_path in filtered_files:
    try:
        # Extract a part of the filename (up to the string "_14") to create a base name.
        base_name = os.path.basename(file_path).split('_14')[0]
        # Generate two column names based on the base name.
        column_freq = f"{base_name}_freq"
        column_corr = f"{base_name}_corr"
        # Read the CSV file without a header.
        df = pd.read_csv(file_path, header=None)
        # Check the number of columns in the DataFrame:
        # - If there's only one column, add a second column filled with NaN values.
        if df.shape[1] == 1:
            print(f"Warning: {file_path} has {df.shape[1]} column; expected 2. Adding a column of NaNs.")
            df[1] = pd.NA
        # - If there are more than 2 columns, keep only the first two columns.
        elif df.shape[1] > 2:
            df = df.iloc[:, :2]
        # Append the processed DataFrame to the list.
        combined_data.append(df)
        # Extend the column_names list with the new names (one for each column in the file).
        column_names.extend([column_freq, column_corr])
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")



In [8]:
for i, df in enumerate(combined_data):
    print(f"DataFrame {i}:")
    display(df.head())
print("Column Names:", column_names)

DataFrame 0:


Unnamed: 0,0,1
0,K1:IMC-MCL_SERVO_OUT_DQ,
1,K1:IMC-CAV_REFL_OUT_DQ,
2,K1:IMC-CAV_TRANS_OUT_DQ,
3,K1:IMC-IMMT1_TRANS_QPDA1_DC_PIT_OUT_DQ,
4,K1:IMC-IMMT1_TRANS_QPDA1_DC_SUM_OUT_DQ,


DataFrame 1:


Unnamed: 0,0,1
0,K1:PEM-VOLT_AS_TABLE_GND_OUT_DQ,
1,K1:PEM-VOLT_IMCREFL_TABLE_GND_OUT_DQ,
2,K1:PEM-VOLT_ISS_TABLE_GND_OUT_DQ,
3,K1:PEM-VOLT_OMC_CHAMBER_GND_OUT_DQ,
4,K1:PEM-VOLT_PSL_TABLE_GND_OUT_DQ,


DataFrame 2:


Unnamed: 0,0,1
0,K1:PSL-IP_QPD1_DC_PIT_OUT_DQ,
1,K1:PSL-IP_QPD1_DC_YAW_OUT_DQ,
2,K1:PSL-IP_QPD2_DC_PIT_OUT_DQ,
3,K1:PSL-IP_QPD2_DC_YAW_OUT_DQ,
4,K1:PSL-PMC_MIXER_MON_OUT_DQ,


DataFrame 3:


Unnamed: 0,0,1
0,K1:PEM-MIC_OMC_BOOTH_OMC_Z_OUT_DQ,
1,K1:PEM-MIC_OMC_TABLE_AS_Z_OUT_DQ,
2,K1:PEM-SEIS_BS_GND_X_OUT_DQ,
3,K1:PEM-SEIS_BS_GND_Y_OUT_DQ,
4,K1:PEM-SEIS_BS_GND_Z_OUT_DQ,


DataFrame 4:


Unnamed: 0,0,1
0,K1:VIS-PR3_BF_DAMP_GAS_IN1_DQ,
1,K1:VIS-ITMX_TM_WIT_L_DQ,
2,K1:VIS-ITMX_TM_WIT_P_DQ,
3,K1:VIS-ITMX_TM_WIT_Y_DQ,
4,K1:VIS-ITMX_MN_WIT_L_DQ,


DataFrame 5:


Unnamed: 0,0,1
0,K1:LSC-CARM_SERVO_MIXER_DAQ_OUT_DQ,
1,K1:LSC-ALS_CARM_OUT_DQ,
2,K1:LSC-ALS_DARM_OUT_DQ,
3,K1:LSC-AS_PDA1_DC_OUT_DQ,
4,K1:LSC-AS_PDA1_RF17_I_ERR_DQ,


DataFrame 6:


Unnamed: 0,0,1
0,K1:ASC-POP_FORWARD_QPDA1_DC_PIT_OUT_DQ,
1,K1:ASC-POP_FORWARD_QPDA1_DC_YAW_OUT_DQ,
2,K1:ASC-POP_FORWARD_QPDA1_DC_SUM_OUT_DQ,
3,K1:ASC-POP_FORWARD_QPDA2_DC_PIT_OUT_DQ,
4,K1:ASC-POP_FORWARD_QPDA2_DC_YAW_OUT_DQ,


DataFrame 7:


Unnamed: 0,0,1
0,K1:PEM-MIC_PSL_TABLE_PSL1_Z_OUT_DQ,
1,K1:PEM-MIC_PSL_TABLE_PSL2_Z_OUT_DQ,
2,K1:PEM-MIC_PSL_TABLE_PSL3_Z_OUT_DQ,


DataFrame 8:


Unnamed: 0,0,1
0,K1:OMC-LSC_ERR_IN1_DQ,
1,K1:OMC-TRANS_DC_A_OUT_DQ,
2,K1:OMC-TRANS_DC_B_OUT_DQ,
3,K1:OMC-TRANS_DC_SUM_OUT_DQ,


DataFrame 9:


Unnamed: 0,0,1
0,K1:TMS-X_GR_QPDA1_PIT_OUT_DQ,
1,K1:TMS-X_GR_QPDA1_YAW_OUT_DQ,
2,K1:TMS-X_GR_QPDA1_SUM_OUT_DQ,
3,K1:TMS-X_GR_QPDA2_PIT_OUT_DQ,
4,K1:TMS-X_GR_QPDA2_YAW_OUT_DQ,


DataFrame 10:


Unnamed: 0,0,1
0,K1:CAL-PCAL_EX_1_PD_RX_V_DQ,
1,K1:CAL-PCAL_EX_1_PD_TX_V_DQ,
2,K1:CAL-PCAL_EX_2_PD_TX_V_DQ,
3,K1:CAL-PCAL_EY_1_PD_RX_V_DQ,
4,K1:CAL-PCAL_EY_1_PD_TX_V_DQ,


DataFrame 11:


Unnamed: 0,0,1
0,K1:PEM-ACC_PSL_TABLE_PSL1_Y_OUT_DQ,
1,K1:PEM-ACC_PSL_TABLE_PSL2_X_OUT_DQ,
2,K1:PEM-ACC_PSL_TABLE_PSL3_Z_OUT_DQ,
3,K1:PEM-ACC_PSL_PERI_PSL2_X_OUT_DQ,
4,K1:PEM-ACC_MCF_TABLE_IMCREFL_Z_OUT_DQ,


Column Names: ['imc_channels.csv_freq', 'imc_channels.csv_corr', 'volt_channels.csv_freq', 'volt_channels.csv_corr', 'psl_channels.csv_freq', 'psl_channels.csv_corr', 'related_channels.csv_freq', 'related_channels.csv_corr', 'vis_channels.csv_freq', 'vis_channels.csv_corr', 'lsc_channels.csv_freq', 'lsc_channels.csv_corr', 'asc_channels.csv_freq', 'asc_channels.csv_corr', 'mic_channels.csv_freq', 'mic_channels.csv_corr', 'omc_channels.csv_freq', 'omc_channels.csv_corr', 'tms_channels.csv_freq', 'tms_channels.csv_corr', 'cal_channels.csv_freq', 'cal_channels.csv_corr', 'pem_channels.csv_freq', 'pem_channels.csv_corr']
