# 1. Housekeeping

In [55]:
import ciao_contrib.runtool
from ciao_contrib.runtool import *
import pandas as pd
import numpy as np
from numpy import load
import astropy 
from astropy.table import Table
from astropy.io import fits
import os
import glob
import matplotlib.pyplot as plt
import ipywidgets as widgets

# List Folders Function
def list_folders(path):
    return [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]
    
# Specify path
path = '/Users/steven/Library/CloudStorage/OneDrive-ImperialCollegeLondon/01-27-2023-FYP-CfA/4-Code-Data/Chandra-Data/'
folders = list_folders(path)
folder_list = []
for folder in folders:
    folder_list.append(folder)

# Select Set
set_widget = widgets.Dropdown(options=folder_list[:],value=folder_list[1],description='Set :',disabled=False); set_widget

Dropdown(description='Set :', index=1, options=('Set-2', 'Set-Y', 'Set-5', 'Set-4', 'Set-3', 'Set-6', 'Set-1')…

In [56]:
# Set ID
set_id = set_widget.value.split("-")[1]
# Folder List
data_folder_path = f'/Users/steven/Library/CloudStorage/OneDrive-ImperialCollegeLondon/01-27-2023-FYP-CfA/4-Code-Data/Chandra-Data/Set-{set_id}/Data'
data_folders = sorted(list_folders(data_folder_path))
# Select Folder
folder_widget = widgets.Dropdown(options=data_folders[:],value=data_folders[1],description='Folder :',disabled=False); folder_widget

Dropdown(description='Folder :', index=1, options=('ra-5-10', 'ra-0-5'), value='ra-0-5')

# 2. Source Region Filter Files

Incremental (preferred)

In [54]:
# Set ID
set_id = set_widget.value.split("-")[1]
# Folder ID
folder_id = folder_widget.value
# File Loop
for event_filename in glob.iglob(f'{data_folder_path}/{folder_id}/acisf*regevt3*gz'):
    obsid = int(event_filename.split('_')[0][-5:])
    try: 
        regionid = int(event_filename.split('_')[2][-4:])
    except: 
        regionid = int(event_filename.split('_')[3][-4:]) 
    region_filename = [region for region in glob.iglob(f'{data_folder_path}/{folder_id}/acisf*reg3.fits.gz') if str(obsid) in region and str(regionid) in region][0]
    filtered_filename = event_filename.replace(".fits", "_filtered.fits")
    ciao_contrib.runtool.dmcopy(f'{event_filename}[sky=region({region_filename})]', filtered_filename)

OSError: An error occurred while running 'dmcopy':
  Clobber set to no, and output file /Users/steven/Library/CloudStorage/OneDrive-ImperialCollegeLondon/01-27-2023-FYP-CfA/4-Code-Data/Chandra-Data/Set-Y/Data/ra-0-5/acisf08557_000N021_r0039_regevt3_filtered.fits.gz exists.

Full Loop (alternative to incremental approach, not preferred)

In [None]:
# Set ID
set_id = set_widget.value.split("-")[1]
# Folder List
data_folder_path = f'/Users/steven/Library/CloudStorage/OneDrive-ImperialCollegeLondon/01-27-2023-FYP-CfA/4-Code-Data/Chandra-Data/Set-{set_id}/Data'
data_folders = list_folders(data_folder_path)
# Folder Loop
for data_folder_name in data_folders:
    # File Loop
    for event_filename in glob.iglob(f'{data_folder_path}/{data_folder_name}/acisf*regevt3*gz'):
        obsid = int(event_filename.split('_')[0][-5:])
        try: 
            regionid = int(event_filename.split('_')[2][-4:])
        except: 
            regionid = int(event_filename.split('_')[3][-4:]) 
        region_filename = [region for region in glob.iglob(f'{data_folder_path}/{data_folder_name}/acisf*reg3.fits.gz') if str(obsid) in region and str(regionid) in region][0]
        filtered_filename = event_filename.replace(".fits", "_filtered.fits")
        ciao_contrib.runtool.dmcopy(f'{event_filename}[sky=region({region_filename})]', filtered_filename)

        print(filtered_filename)

In [None]:
# test1 = '/Users/steven/Library/CloudStorage/OneDrive-ImperialCollegeLondon/01-27-2023-FYP-CfA/4-Code-Data/Chandra-Data/Set-Y/Data/ra-0-5/acisf11081_000N020_r0100_regevt3_filtered.fits.gz'
# test2 = '/Users/steven/Library/CloudStorage/OneDrive-ImperialCollegeLondon/01-27-2023-FYP-CfA/4-Code-Data/Chandra-Data/Set-Y/Data/ra-0-5/acisf11081_000N020_r0100_regevt3.fits.gz'

# df1 = fits.open(test1)
# df2 = fits.open(test2)

# len(df1)

# with fits.open(test1) as hdul:
#     events1 = hdul["Events"].data
#     df_events1 = pd.DataFrame.from_records(Table(events1), columns=events1.columns.names)

# with fits.open(test2) as hdul:
#     events2 = hdul["Events"].data
#     df_events2 = pd.DataFrame.from_records(Table(events2), columns=events2.columns.names)

# print(len(df_events1))
# print(len(df_events2))

# 3. Load and Save Event File Table

Incremental

In [58]:
# Set ID
set_id = set_widget.value.split("-")[1]
# Folder ID
folder_id = folder_widget.value
# Initialise dataframe list of all eventfiles
list_df_events = []
# File Loop
for filename in glob.iglob(f'{data_folder_path}/{folder_id}/acisf*regevt*filtered*gz'):
  with fits.open(filename) as hdul:
      # Events
      events = hdul["Events"].data
      events_table = Table(events)
      events_cols = events.columns.names
      df_events = pd.DataFrame.from_records(events_table, columns=events_cols)
      df_events = df_events.sort_values(by=["time"])
      # GTI (Good Time Interval)
      gti = hdul["GTI"].data
      gti_table = Table(gti)
      gti_cols = gti.columns.names
      df_gti = pd.DataFrame.from_records(gti_table, columns=gti_cols)
      # Apply GTI Filter to Events
      gti_mask = np.zeros(len(df_events), dtype=bool)
      for i in range(len(df_gti)):
          start = df_gti.iloc[i]['START']
          stop = df_gti.iloc[i]['STOP']
          gti_mask |= (df_events["time"] >= start) & (df_events["time"] < stop)
      df_events = df_events[gti_mask]
      # Apply energy, pha, grade Filter to Events
      df_events = df_events[(df_events['pha']>40) & (df_events['grade']>=0) & (df_events['energy']>500) & (df_events['energy']<7000)]
      # Add obsid and region_id column (from filename)
      df_events["obsid"] = int(filename.split('_')[0][-5:])
      try: 
        df_events["region_id"] = int(filename.split('_')[2][-4:]) #need to add try except while looping 
      except: 
        df_events["region_id"] = int(filename.split('_')[3][-4:]) 
      # Append to dataframe list
      list_df_events.append(df_events)

# Combine dfs in dataframe list into one df and save in folder
df_eventfiles = pd.concat(list_df_events)
df_eventfiles.to_csv(f'{path}Set-{set_id}/eventfiles-{set_id}-{folder_id}.csv',index=False)

Full Loop (alternative to incremental approach, not preferred)

In [44]:
# Set ID
set_id = set_widget.value.split("-")[1]
# Initialise dataframe list of all eventfiles
list_df_events = []
# Folder List
data_folder_path = f'/Users/steven/Library/CloudStorage/OneDrive-ImperialCollegeLondon/01-27-2023-FYP-CfA/4-Code-Data/Chandra-Data/Set-{set_id}/Data'
data_folders = list_folders(data_folder_path)
# Folder Loop
for data_folder_name in data_folders:
  # File Loop
  for filename in glob.iglob(f'{data_folder_path}/{data_folder_name}/acisf*regevt*filtered*gz'):
    with fits.open(filename) as hdul:
        # Events
        events = hdul["Events"].data
        events_table = Table(events)
        events_cols = events.columns.names
        df_events = pd.DataFrame.from_records(events_table, columns=events_cols)
        df_events = df_events.sort_values(by=["time"])
        # GTI (Good Time Interval)
        gti = hdul["GTI"].data
        gti_table = Table(gti)
        gti_cols = gti.columns.names
        df_gti = pd.DataFrame.from_records(gti_table, columns=gti_cols)
        # Apply GTI Filter to Events
        gti_mask = np.zeros(len(df_events), dtype=bool)
        for i in range(len(df_gti)):
            start = df_gti.iloc[i]['START']
            stop = df_gti.iloc[i]['STOP']
            gti_mask |= (df_events["time"] >= start) & (df_events["time"] < stop)
        df_events = df_events[gti_mask]
        # Apply energy, pha, grade Filter to Events
        df_events = df_events[(df_events['pha']>40) & (df_events['grade']>=0) & (df_events['energy']>500) & (df_events['energy']<7000)]
        # Add obsid and region_id column (from filename)
        df_events["obsid"] = int(filename.split('_')[0][-5:])
        try: 
          df_events["region_id"] = int(filename.split('_')[2][-4:]) #need to add try except while looping 
        except: 
          df_events["region_id"] = int(filename.split('_')[3][-4:]) 
        # Append to dataframe list
        list_df_events.append(df_events)

# Combine dfs in dataframe list into one df and save in folder
df_eventfiles = pd.concat(list_df_events)
df_eventfiles.to_csv(f'{path}Set-{set_id}/eventfiles-{set_id}.csv',index=False)

# 3. Concatenate Eventfiles

In [62]:
# Set ID
set_id = set_widget.value.split("-")[1]
# Folder ID
folder_id = folder_widget.value
# Load Eventfiles Table
df_events_list=[]
for evtfile_name in os.listdir(f'{path}Set-{set_id}/'):
    if evtfile_name.startswith(f'eventfiles-{set_id}-ra'):
        df_evt = pd.read_csv(f'{path}Set-{set_id}/{evtfile_name}')
        df_events_list.append(df_evt)
df_evt = pd.concat(df_events_list)
df_evt.to_csv(f'{path}Set-{set_id}/eventfiles-{set_id}.csv',index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_labels['obsreg_id'] = df_labels['obsid'].astype(str) + '_' + df_labels['region_id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_input['obsreg_id'] = df_input['obsid'].astype(str) + '_' + df_input['region_id'].astype(str)


# 4. Generate Input

In [47]:
# Set ID
set_id = set_widget.value.split("-")[1]
# Folder ID
folder_id = folder_widget.value
# Load Properties Table
df_prop = pd.read_csv(f'{path}Set-{set_id}/properties-{set_id}.csv')
# Load Properties Table
df_events = pd.read_csv(f'{path}Set-{set_id}/eventfiles-{set_id}.csv')
# Extract important labels and input columns
df_labels = df_prop[['obsid','region_id','cnts_aper_b','cnts_aperbkg_b','src_cnts_aper_b','flux_aper_b','hard_hm','hard_hs','hard_ms','var_prob_b','var_prob_h','var_prob_m','var_prob_s']]
df_input = df_events[['obsid','region_id','time','energy','chipx','chipy']]
# Make Sure Eventfile Table only includes information contained in Properties Table and vice versa
df_labels['obsreg_id'] = df_labels['obsid'].astype(str) + '_' + df_labels['region_id'].astype(str)
df_input['obsreg_id'] = df_input['obsid'].astype(str) + '_' + df_input['region_id'].astype(str)
df_labels = df_labels[df_labels['obsreg_id'].isin(df_input['obsreg_id'].unique())]
df_input = df_input[df_input['obsreg_id'].isin(df_labels['obsreg_id'].unique())]
# Save ungrouped tables
df_labels.to_csv(f'{path}Set-{set_id}/labels-{set_id}.csv',index=False)
df_input.to_csv(f'{path}Set-{set_id}/input-{set_id}.csv',index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_labels['obsreg_id'] = df_labels['obsid'].astype(str) + '_' + df_labels['region_id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_input['obsreg_id'] = df_input['obsid'].astype(str) + '_' + df_input['region_id'].astype(str)
