# Throughputs_LSSTComCam_c26202

Based on Lynne Jones code here: https://rubin-obs.slack.com/archives/C0824CTA335/p1732311332938929

Created:  2024.11.27

## 1. Initial Setup...

### 1.1 Import useful python packages

In [None]:
# Generic python packages
import pylab as plt
import numpy as np
import pandas as pd
import glob
import math
import os
import gc
import warnings
from IPython.display import display


# LSST Science Pipelines (Stack) packages
import lsst.daf.butler as dafButler
import lsst.afw.display as afwDisplay

# rubin_sim-related packages
import rubin_sim.phot_utils as pt
import syseng_throughputs as st
from rubin_sim.data import get_data_dir

# Astropy-related packages
from astropy import units as u
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy.table import Table

# Set a standard figure size to use
plt.rcParams['figure.figsize'] = (8.0, 8.0)
afwDisplay.setDefaultBackend('matplotlib')

# Set filter warnings to "ignore" to avoid a lot of "logorrhea" to the screen:
warnings.filterwarnings("ignore")

### 1.2 Include user input

In [None]:
# Which repo, collection, instrument, and skymap to use.
# See https://rubinobs.atlassian.net/wiki/spaces/DM/pages/48834013/Campaigns#1.1.-ComCam
# and https://rubinobs.atlassian.net/wiki/spaces/DM/pages/226656354/LSSTComCam+Intermittent+Cumulative+DRP+Runs
#repo = 'embargo'
repo = '/repo/main'

#collections = 'LSSTComCam/runs/DRP/20241101_20241113/w_2024_46/DM-47566'
#collections = 'LSSTComCam/runs/DRP/20241101_20241120/w_2024_47/DM-47746'
#collections = 'LSSTComCam/runs/DRP/20241101_20241204/w_2024_49/DM-47988'
#collections = 'LSSTComCam/runs/DRP/DP1/w_2025_03/DM-48478'
#collections = 'LSSTComCam/runs/DRP/DP1/w_2025_04/DM-48556'
collections = 'LSSTComCam/runs/DRP/DP1/w_2025_05/DM-48666'

instrument = 'LSSTComCam'
skymap_name = 'lsst_cells_v1'
day_obs_start = 20241101
day_obs_end = 20241212

# Set environment variable to point to location of the rubin_sim_data 
#  (per Lynne Jones' Slack message on the #sciunit-photo-calib channel from 26 Nov 2024):
os.environ["RUBIN_SIM_DATA_DIR"] = "/sdf/data/rubin/shared/rubin_sim_data"

# Which CalSpec C26202 spectrum FITS file to to use?
sedfile = '~/Downloads/c26202_stiswfcnic_007.fits'
#sedfile = '~/Downloads/c26202_mod_007.fits'
#sedfile = '~/Downloads/c26202_stiswfcnic_006.fits'
#sedfile = '~/Downloads/c26202_mod_008.fits'
#sedfile = '~/Downloads/c26202_stiswfcnic_007.fits'

# RA, DEC of C26202
raDeg = 53.136845833333325
decDeg = -27.86349444444444

# Plot symbol colors to use for ugrizy
plot_filter_colors_white_background = {'u': '#0c71ff', 'g': '#49be61', 'r': '#c61c00', 'i': '#ffc200', 'z': '#f341a2', 'y': '#5d0000'}

# Variables controlling output...
verbose = 3         # verbose = 0, 1, 2, 3, ...  Higher numbers mean more output.
outputCSV = False    # output CSV files
# There was a major change in the DRP pipeline starting with w_2025_05.
# See:  https://rubin-obs.slack.com/archives/C07TXQUAXUZ/p1738795935921129
post_w_2025_04 = True

### 1.3 Define useful classes and functions

In [None]:
# Cartesian x,y match with error (per Claude-3.5-Sonnet)

def cartesianXYMatchWithError(df1, xcol1, ycol1, df2, xcol2, ycol2, sep_limit=1.0, allMatches=True):
    
    import numpy as np
    from scipy.spatial import cKDTree
    import pandas as pd

    # Create KD-tree for efficient spatial searching
    tree = cKDTree(df2[[xcol2, ycol2]])

    # Find nearest neighbors within sep_limit
    separations, indices = tree.query(df1[[xcol1, ycol1]],
                                  distance_upper_bound=sep_limit)

    # Create mask for valid matches (separations less than sep_limit)
    valid_matches = separations < sep_limit

    # Create merged dataframe using only valid matches
    merged_df = pd.concat([
        df1[valid_matches].reset_index(drop=True),
        df2.iloc[indices[valid_matches]].reset_index(drop=True)
        ], axis=1)

    # If you want to keep track of the match separations
    merged_df['separation'] = separations[valid_matches]

    # If you want to keep just the best match, sort by separation 
    # and keep first occurrence of each df2 index
    if allMatches != True:
        merged_df = merged_df.sort_values('separation').drop_duplicates(
            subset=df2.columns, keep='first'
        )

    return merged_df



In [None]:
# Useful class to stop "Run All" at a cell 
#  containing the command "raise StopExecution"
class StopExecution(Exception):
    def _render_traceback_(self):
        pass

### 1.4 Instantiate the butler and registry

In [None]:
butler = dafButler.Butler(repo, collections=collections)
registry = butler.registry

## 2. Estimate expected counts for airmasses X=1.0 to 2.5

### 2.1 Build the hardware and system for ugrizy for Cerro Pachon for airmasses X=1.0-2.5 in steps of 0.1 airmass

In [None]:
# From https://github.com/lsst-pst/syseng_throughputs/blob/main/notebooks/InterpolateZeropoint.ipynb

defaultDirs = st.setDefaultDirs()
defaultDirs['detector'] = defaultDirs['detector'].replace('/joint_minimum', '/itl')
print(defaultDirs)

airmasses = np.arange(1.0, 2.6, 0.1).round(2)

system = {}
for x in airmasses:
    atmos = st.readAtmosphere(os.path.join(get_data_dir(), 'throughputs', 'atmos'), atmosFile=f'atmos_{x*10 :.0f}_aerosol.dat')
    h, s = st.buildHardwareAndSystem(defaultDirs, addLosses=True,  atmosphereOverride=atmos)
    system[x] = s
hardware = h


### 2.2 Plot filter passbands (without the atmospheric component) and the atmospheric transmission for airmasses 1

In [None]:
# From https://github.com/lsst-pst/syseng_throughputs/blob/main/notebooks/InterpolateZeropoint.ipynb

# Plot only if verbosity level is higher than 2...
if verbose > 2:
    
    colors = plot_filter_colors_white_background
    for f in 'ugrizy':
        plt.plot(hardware[f].wavelen, hardware[f].sb, color=colors[f], linestyle=':')
    for x in [1.0, 1.2, 2.0]:
        atmos = st.readAtmosphere(os.path.join(get_data_dir(), 'throughputs', 'atmos'), atmosFile=f'atmos_{x*10 :.0f}_aerosol.dat')
        plt.plot(atmos.wavelen, atmos.sb, linestyle='-')
    plt.ylim(0, 1)
    plt.xlim(300, 1100)
    plt.xlabel("Wavelength (nm)")


### 2.3 Read in the CalSpec SED file and translate it into `rubin_sim` format 

In [None]:
# Read the sedfile file 
seddata = fits.getdata(sedfile)

# Translate to rubin_sim format
wavelen = seddata['WAVELENGTH'] * u.angstrom.to(u.nanometer) # This is in angstroms - need in nanometers
flambda = seddata['FLUX'] / (u.angstrom.to(u.nanometer)) # this is in erg/sec/cm^^2/ang but we want /nm 

# Convert to rubin_sim format
sed = pt.Sed(wavelen=wavelen, flambda=flambda)

### 2.4 Define the photometric parameters to use.

In [None]:
phot_params = pt.PhotometricParameters(exptime=30, nexp=1, gain=1.0)


### 2.5 Calculate the expected counts for C26202 for the given photometric parameters over the airmass range of X=1.0-2.5

In [None]:
counts = {}
for f in 'ugrizy':
    counts[f] = []
    for x in airmasses:
        counts[f].append(sed.calc_adu(system[x][f], phot_params))
    counts[f] = np.array(counts[f])


In [None]:
df_counts = pd.DataFrame(counts, index=airmasses)

if verbose > 0:
    display(df_counts)

### 2.6 Output results to CSV file

In [None]:
if outputCSV:
    outputFile = 'LSSTComCam_C26202_expected_counts.csv'
    df_counts.to_csv(outputFile)  #  Here, we want to keep the index for the DataFrame, which, in this case, is the airmass

## 3. Query USDF Butler for ComCam exposures

### 3.1 Create query

In [None]:
# Taken from vv-team-notebooks/reports/TargetReport.ipynb

# Query the metadata for the `exposure` dimension, limiting the results to this particular instrument and range of days of observation:
query="instrument='%s' AND day_obs>=%d AND day_obs<=%d" % (instrument, day_obs_start, day_obs_end)
results = registry.queryDimensionRecords('exposure',where=query)


### 3.2 Check that there are results; stop execution if there are none

In [None]:
# Taken from vv-team-notebooks/reports/TargetReport.ipynb

# Stop executing if there are no results returned:

n_results = results.count()

if n_results <= 0:
    raise StopExecution
else:
    print("""There are %d results returned from querying the butler for instrument %s between dates %d and %d (inclusive).""" % 
          (n_results, instrument, day_obs_start, day_obs_end))

### 3.3 Instantiate a DataFrame to contain the exposure information

In [None]:
# Taken from vv-team-notebooks/reports/TargetReport.ipynb

# Instantiate a pandas `DataFrame` with useful columns available in the `exposure` dimension:
df_exp = pd.DataFrame(columns=['id', 'obs_id','day_obs', 'seq_num',
                                    'time_start','time_end' ,'type', 'reason', 
                                    'target','filter','zenith_angle',
                                    'expos','ra','dec','skyangle',
                                    'azimuth','zenith','science_program',
                                    'jd','mjd'])

### 3.4 Read the query results into the new Data Frame

In [None]:
# Taken from vv-team-notebooks/reports/TargetReport.ipynb

# Read the query results into the new pandas `DataFrame`:

for count, info in enumerate(results):
    
    try:

        df_exp.loc[count] = [info.id, info.obs_id, info.day_obs, info.seq_num, 
                                  info.timespan.begin.utc.iso,
                                  info.timespan.end.utc.iso, 
                                  info.observation_type, info.observation_reason, info.target_name, 
                                  info.physical_filter, info.zenith_angle, 
                                  info.exposure_time,info.tracking_ra, info.tracking_dec, 
                                  info.sky_angle,info.azimuth ,info.zenith_angle, 
                                  info.science_program, info.timespan.begin.jd, info.timespan.begin.mjd]

    except:
    
        print(">>>   Unexpected error:", sys.exc_info()[0])
        info_timespan_begin_to_string = "2021-01-01 00:00:00.00"
        info_timespan_end_to_string = "2051-01-01 00:00:00.00"
        info_timespan_begin_jd = 0
        info_timespan_begin_mjd = 0
        df_exp.loc[count] = [info.id, info.obs_id, info.day_obs, info.seq_num, 
                                  pd.to_datetime(info_timespan_begin_to_string),
                                  pd.to_datetime(info_timespan_end_to_string), 
                                  info.observation_type, info.observation_reason, info.target_name, 
                                  info.physical_filter, info.zenith_angle, 
                                  info.exposure_time,info.tracking_ra, info.tracking_dec, 
                                  info.sky_angle,info.azimuth ,info.zenith_angle, 
                                  info.science_program, info_timespan_begin_jd, info_timespan_begin_mjd ]
 
    

### 3.5 Clean up DataFrame

In [None]:
# Taken from vv-team-notebooks/reports/TargetReport.ipynb

# Re-cast the `id`, `day_obs`, and `seq_num` rows as `int`'s:
df_exp = df_exp.astype({"id": int,'day_obs': int,'seq_num':int})

In [None]:
# Taken from vv-team-notebooks/reports/TargetReport.ipynb

# Replace `NaN`'s in the `ra` and `dec` columns with zero.  
# (`NaN`'s in `ra`, `dec` wreak havoc for the healpix tools defined in Section 1.2 above.) 
# ***(Maybe no longer necessary?)***

df_exp['ra'] = df_exp['ra'].fillna(0)
df_exp['dec'] = df_exp['dec'].fillna(0)

In [None]:
df_exp

### 3.6 Add airmass to DataFrame

In [None]:
# Add an airmass to df_exp...

df_exp['airmass'] = np.round(1./np.cos(np.deg2rad(df_exp['zenith_angle'])), decimals=3)

In [None]:
# Printout zenith angle and airmass if verbosity level is greater than 1...
if verbose > 1:
    display(df_exp[['zenith_angle','airmass']])

### 3.7 Extract just "science" exposures

In [None]:
# Create a `DataFrame` containing just the science exposures:
df_sci = df_exp[df_exp.type == 'science']

In [None]:
# Look at columns for the (exposure/visit) id, zenith_angle, and airmass, 
#  but only if verbosity level is greater than 1:
if verbose > 1:
    display(df_sci[['id', 'zenith_angle','airmass']])

### 3.8 Remove any exposures in the "bad visit" list

#### 3.8.1 Read in "bad visit" list

In [None]:
df_bad_visits=Table.read("https://raw.githubusercontent.com/lsst-dm/excluded_visits/refs/heads/main/LSSTComCam/bad.ecsv").to_pandas()
#df_bad_visits.rename(columns={'exposure': 'visit'}, inplace=True)

# Look at bad visits table, but only if verbosity level is greater than 0:
if verbose > 0:
    display(df_bad_visits)

#### 3.8.2 Remove from df_sci and exposuress found in df_bad_visits

In [None]:
df_sci = df_sci[~df_sci['id'].isin(df_bad_visits['exposure'])]


In [None]:
# Look at columns for the (exposure/visit) id, zenith_angle, and airmass, 
#  but only if verbosity level is greater than 0:
if verbose > 0:
    display(df_sci[['id', 'zenith_angle','airmass']])

### 3.9 Save results as a CSV file

In [None]:
if outputCSV:
    outputFile = """LSSTComCam_visits_%d-%d.csv""" % (day_obs_start, day_obs_end)
    df_sci.to_csv(outputFile, index=False)

### 3.10 Create a Pandas DataFrame from df_sci that just contains the visit id, zenith angle, and airmass

In [None]:
df_sci_airmass = df_sci[['id', 'zenith_angle','airmass']].copy(deep=True)
df_sci_airmass.reset_index(drop=True, inplace=True)

# Look at pandas dataframe, but only if verbosity level is greater than 0:
if verbose > 0:
    display(df_sci_airmass)

## 4. Query USDF Butler for ComCam measurements of C26202

### 4.1 Find the `dataId`'s for all `calexp`'s in this repo/collection that overlap the sky position of C26202:

In [None]:
datasetRefs = butler.query_datasets("calexp", where="visit_detector_region.region OVERLAPS POINT(ra, dec)",
                                    bind={"ra": raDeg, "dec": decDeg})

# Look datasetRefs, but only if verbosity level is greater than 1:
if verbose > 1:
    for i, ref in enumerate(datasetRefs):    
        print(i, ref.dataId)

print(f"\nFound {len(datasetRefs)} calexps")

### 4.2 Create a pandas Dataframe containing the `sourceTable` info for all these `calexp`'s

#### 4.2.1 Loop over the `datasetRefs` again, grabbing the contents of the `sourceTable` table for each `ref` and combining them into all into one big pandas DataFrame.  

In [None]:
src_list = []

for i, ref in enumerate(datasetRefs):
    
    # Retrieve sourceTable for this visit & detector...
    dataId = {'visit': ref.dataId['visit'], 'detector': ref.dataId['detector']}
    src = butler.get('sourceTable', dataId=dataId)
    src_list.append(src)
    
    # Look at visit/detector info, but only if verbosity level is greater than 1:
    if verbose > 1:
        print(f"{i} Visit {ref.dataId['visit']}, Detector {ref.dataId['detector']}:  Retrieved catalog of {len(src)} sources.")

src_all = pd.concat(src_list, ignore_index=True)

print("")
print(f"Total combined catalog contains {len(src_all)} sources.")


In [None]:
# Show resulting pandas dataframe, but only if verbosity level is greater than 1:
if verbose > 1:
    display(src_all)

#### 4.2.2 Add zenith distance and airmass to src_all

In [None]:
src_all_tmp = pd.merge(src_all, df_sci_airmass, left_on='visit', right_on='id')
src_all_tmp.drop('id', axis=1, inplace=True)
# Remove any rows for which airmass is a NaN
src_all_tmp.dropna(subset=['airmass'], inplace=True)
src_all = src_all_tmp

# Show resulting pandas dataframe, but only if verbosity level is greater than 0:
if verbose > 0:
    display(src_all)

#### 4.2.3 Save `src_all` as a CSV file

In [None]:
if outputCSV:
    src_all.to_csv('LSSTComCam_C26202_fields.sourceTable.csv', index=False)

### 4.3 Create a pandas Dataframe containing the `icSrc` table info for all these `calexp`'s

#### 4.3.1 Loop over the `datasetRefs` again, grabbing the contents of the `icSrc` table for each `ref` and combining them into all into one big pandas DataFrame.  

In [None]:
icSrc_list = []

for i, ref in enumerate(datasetRefs):
    
    # Retrieve sourceTable for this visit & detector...
    try:
        dataId = {'visit': ref.dataId['visit'], 'detector': ref.dataId['detector']}

        # There was a major change in the DRP pipeline starting with w_2025_05.
        # See:  https://rubin-obs.slack.com/archives/C07TXQUAXUZ/p1738795935921129
        if post_w_2025_04:
            icSrc = butler.get('initial_psf_stars_detector', dataId=dataId)
            icSrc_table = icSrc
        else:
            icSrc = butler.get('icSrc', dataId=dataId)
            icSrc_table = icSrc.asAstropy()
            
        df_icSrc = icSrc_table.to_pandas()
        icSrc_list.append(df_icSrc)

        # Look at visit/detector info, but only if verbosity level is greater than 1:
        if verbose > 1:
            print(f"{i} Visit {ref.dataId['visit']}, Detector {ref.dataId['detector']}:  Retrieved catalog of {len(icSrc_table)} sources.")

    except Exception as e:
        print(f"Unexpected error: {e}")

icSrc_all = pd.concat(icSrc_list, ignore_index=True)

print("")
print(f"Total combined catalog contains {len(icSrc_all)} sources.")


In [None]:
# Show resulting pandas dataframe, but only if verbosity level is greater than 0:
if verbose > 0:
    display(icSrc_all)

In [None]:
#for col in icSrc_all.columns:
#    print(col,)

#### 4.3.2 Save `icSrc_all` as a CSV file

In [None]:
if outputCSV:
    icSrc_all.to_csv('LSSTComCam_C26202_fields_icSrc.csv', index=False)

## 5 Calculate psf to total flux aperture magnitudes on a per-visit basis

### 5.1 Match src_all and icSrc_all catalogs

In [None]:
# We will perform the match using the cartesianXYMatchWithError function defined above.

df1 = src_all.copy(deep=True)
xcol1 = 'x'
ycol1 = 'y'

df2 = icSrc_all.copy(deep=True)
xcol2 = 'base_SdssCentroid_x'
ycol2 = 'base_SdssCentroid_y'

sep_limit = 1.0

df_match = cartesianXYMatchWithError(df1, xcol1, ycol1, df2, xcol2, ycol2, 1.0, False)

# Print number of matches
print(f"Number of matches found: {len(df_match)}")

# Print statistics of match distances
print("\nMatch distance statistics:")
print(df_match['separation'].describe())

In [None]:
# Show resulting pandas dataframe, but only if verbosity level is greater than 0:
if verbose > 0:
    display(df_match)

### 5.2 Create dataframe containing the visit-by-visit median psf-to-total flux aperture corrections.

In [None]:
# Create a column containing the psf-to-total flux aperture correction for each individual source.

# There was a major change in the DRP pipeline starting with w_2025_05.
# See:  https://rubin-obs.slack.com/archives/C07TXQUAXUZ/p1738795935921129
if post_w_2025_04:
    #  We will use 'base_PsfFlux_instFlux' as our primary instrumental flux measurement.
    #  We will take 1.04*'base_CircularApertureFlux_12_0_instFlux' as the total flux.
    df_match['apCorrTot'] = 1.04*df_match['base_CircularApertureFlux_12_0_instFlux'] / df_match['base_PsfFlux_instFlux']
else:
    #  We will use 'base_PsfFlux_instFlux' as our primary instrumental flux measurement.
    #  We will take 'base_CircularApertureFlux_70_0_instFlux' as the total flux.
    df_match['apCorrTot'] = df_match['base_CircularApertureFlux_70_0_instFlux'] / df_match['base_PsfFlux_instFlux']


# Create a mask to cull sources with "bad" measurements.

# There was a major change in the DRP pipeline starting with w_2025_05.
# See:  https://rubin-obs.slack.com/archives/C07TXQUAXUZ/p1738795935921129
if post_w_2025_04:
    mask1 = (~df_match.pixelFlags_bad) & (~df_match.pixelFlags_saturated) & \
            (~df_match.extendedness_flag) & (df_match.detect_isPrimary) & \
            (~df_match.base_CircularApertureFlux_12_0_flag) & \
            (~df_match.base_PsfFlux_flag)  
else:
    mask1 = (~df_match.pixelFlags_bad) & (~df_match.pixelFlags_saturated) & \
            (~df_match.extendedness_flag) & (df_match.detect_isPrimary) & \
            (~df_match.base_CircularApertureFlux_70_0_flag) & \
            (~df_match.base_PsfFlux_flag)  

# Create an another mask to cull sources that are too faint or (possibly) too bright.
psfFlux_min = df_match[mask1]['base_PsfFlux_instFlux'].quantile(0.75)
psfFlux_max = df_match[mask1]['base_PsfFlux_instFlux'].quantile(0.95)
mask = mask1 & (df_match.base_PsfFlux_instFlux >= psfFlux_min) & (df_match.base_PsfFlux_instFlux < psfFlux_max)

# Calculate median ratio per visit, ignoring NaNs
median_apCorrTots = df_match[mask].groupby('visit')['apCorrTot'].agg(lambda x: np.nanmedian(x))

# Create a pandas DataFrame out of this pandas Series
df_median_apCorrTots = median_apCorrTots.reset_index()

# Rename `apCorrTot` to `apCorrTot_median` in df_median_apCorrTots
df_median_apCorrTots.rename(columns={'apCorrTot': 'apCorrTot_median'}, inplace=True)

## Remove the original apCorrTot column from df_match
#df_match.drop('apCorrTot', axis=1, inplace=True)

# Show the dataframe of median apCorrTots by visit id, 
#  but only if verbosity level is greater than 1:
if verbose > 1:
    display(df_median_apCorrTots)

### 5.3 Add the visit-by-visit median aperture corrections to the `df_match` (combined `src_all`+`icSrc_all`) pandas DataFrame

In [None]:
df_match = df_match.merge(df_median_apCorrTots, on='visit')

In [None]:
# Display result sorted in ascending order of visit (primarily) and RA (secondarily), 
#  but only if verbosity level is greater than 0:
if verbose > 0:
    display(df_match.sort_values(by=['visit', 'ra']))

## 6. Extract the rows containing C26202 from the matched src_all and icSrc_all catalogs

In [None]:
# Based on code retrieved from Claude-3.5-Sonnet

# Create a mask to cull sources with "bad" measurements.

# There was a major change in the DRP pipeline starting with w_2025_05.
# See:  https://rubin-obs.slack.com/archives/C07TXQUAXUZ/p1738795935921129
if post_w_2025_04:
    mask1 = (~df_match.pixelFlags_bad) & (~df_match.pixelFlags_saturated) & \
            (~df_match.extendedness_flag) & (df_match.detect_isPrimary) & \
            (~df_match.base_CircularApertureFlux_12_0_flag) & \
            (~df_match.base_PsfFlux_flag)  
else:
    mask1 = (~df_match.pixelFlags_bad) & (~df_match.pixelFlags_saturated) & \
            (~df_match.extendedness_flag) & (df_match.detect_isPrimary) & \
            (~df_match.base_CircularApertureFlux_70_0_flag) & \
            (~df_match.base_PsfFlux_flag)  

# Apply mask, keeping only the "good" measurements of `df_match`
df_match_cleaned = df_match[mask1]

# Create SkyCoord object for the coordinates of C26202
ref_coord = SkyCoord(ra=raDeg*u.degree, dec=decDeg*u.degree)

# Create SkyCoord object for all points in the dataframe
df_coords = SkyCoord(ra=df_match_cleaned['ra'].values*u.degree, 
                     dec=df_match_cleaned['dec'].values*u.degree)

# Calculate separations
separations = ref_coord.separation(df_coords)

# Create mask for points within 3.0 arcseconds
mask_sep = separations < 3.0*u.arcsec

# Get filtered dataframe
nearby_good_df = df_match_cleaned[mask_sep]

# If you want to include the separations in the result
orig_columns = nearby_good_df.columns
nearby_good_df = df_match_cleaned[mask_sep].copy()
nearby_good_df['separation_c26202'] = separations[mask_sep].arcsec

# Find (and keep) the closet match within the match radius
best_df = nearby_good_df.sort_values('separation_c26202').drop_duplicates(subset=orig_columns, keep='first')


In [None]:
# Display the resulting table, but only if verbosity level is greater than 1:
if verbose > 1:
    display(best_df)

In [None]:
# Dispaly only the most relevant columns of the resulting table, 
# but only if verbosity level is greater than 0:
if verbose > 0:
    display(best_df[['visit', 'band', 'airmass', 'base_PsfFlux_instFlux', 'apCorrTot_median']])

In [None]:
#for colname in best_df.columns:
#    print(colname)

## 7. Calculate the ratio of observed to expected throughputs for ComCam based on C26202


### 7.1 Add a column to `best_df` containing the expected counts for C26202 based on the contents of `df_counts` created earlier

We will use the `interp1d` interpolation function from the `scipy.interpolate` package to perform linear interpolations between the airmasses listed in `df_counts`.

In [None]:
# Based on code retrieved from Claude-3.5-Sonnet

from scipy.interpolate import interp1d

# Create a dictionary to store interpolation functions for each band
interpolators = {}
for band in ['u', 'g', 'r', 'i', 'z', 'y']:
    interpolators[band] = interp1d(df_counts.index, 
                                 df_counts[band], 
                                 kind='linear',
                                 bounds_error=False,    # Return nan for out of bounds
                                 fill_value=np.nan)

# Create new column with interpolated values
best_df['total_counts_expected'] = best_df.apply(
    lambda row: interpolators[row['band']](row['airmass']), 
    axis=1
)

# You can check the results if verbosity level is greater than 0):
if verbose > 0:
    display(best_df[['visit', 'band', 'airmass', 'total_counts_expected']])

# Optional: Check for any NaN values (would indicate airmass outside interpolation range)
nan_matches = best_df[best_df['total_counts_expected'].isna()]
if len(nan_matches) > 0:
    print("\nRows with no matches (airmass out of range):")
    print(nan_matches[['visit', 'band', 'airmass']])

### 7.2 Add a column to `best_df` containing the total counts observed and the ratio of total counts observed to total counts expected


In [None]:
best_df['total_counts_observed'] = best_df['apCorrTot_median'] * best_df['base_PsfFlux_instFlux']
best_df['ratio_obs_exp'] = best_df['total_counts_observed'] / best_df['total_counts_expected']

Let's look at them...

In [None]:
# Set pandas to show all rows (but only if verbosity level is greater than 1)...
if verbose > 1:
    pd.set_option("display.max_rows", None)

In [None]:
# Output to screen the most relevant columns for all rows, 
#  but only if verbosity level is greater than 0...
if verbose > 0:
    display(best_df[['visit', 'band', 'airmass', 'base_PsfFlux_instFlux', 'apCorrTot_median', 'total_counts_observed', 'total_counts_expected', 'ratio_obs_exp']])

In [None]:
# Reset pandas to its default maximum rows to print to screen
# (if it had been reset earlier due to verbosity level greater than 1)...
if verbose > 1:
    pd.reset_option("display.max_rows")

### 7.3 Plot a histogram of the ratio of total counts observed to total counts for each passband

In [None]:
# Based on code retrieved from Claude-3.5-Sonnet and Poe.com Assistant

# Set up the plot
plt.figure(figsize=(10, 6))

# Define colors and transparency for each band
colors = plot_filter_colors_white_background
alpha = 1.0   # transparency level
linewidth = 3 # linewidth for the step histogram lines 

# Define bins.  Here, we want to look around ratio=1.00+/-0.20 in steps of 0.01
#bins = np.arange(0.80, 1.20, 0.01)
bins = np.arange(0.00, 2.00, 0.01)

# Plot histogram for each band
for band in ['u', 'g', 'r', 'i', 'z', 'y']:
    band_data = best_df[best_df['band'] == band]['ratio_obs_exp']
    if len(band_data) > 0:  # only plot if we have data for this band
        plt.hist(band_data, bins=bins, alpha=alpha, histtype='step', linewidth=linewidth, 
                label=f'band {band}', color=colors[band],
                density=False)  # density=True normalizes the area

plt.xlabel('Ratio (Observed Counts/Expected Counts)')
plt.ylabel('Number')
#plt.xlim([0.80, 1.20])
plt.xlim([0.00, 2.00])

plt.title('Distribution of Observed/Expected Total Counts Ratio by Band for C26202')
plt.legend()
plt.grid(True, alpha=0.3)

# Optional: adjust layout to prevent label clipping
plt.tight_layout()

plt.show()



### 7.3 Print summary statistics for each band

In [None]:
# Based on code retrieved from Claude-3.5-Sonnet

for band in ['u', 'g', 'r', 'i', 'z', 'y']:
    band_data = best_df[best_df['band'] == band]['ratio_obs_exp']
    if len(band_data) > 0:
        print(f"\nBand {band} statistics:")
        print(f"N = {len(band_data)}")
        print(f"Mean = {band_data.mean():.3f}")
        print(f"Median = {band_data.median():.3f}")
        print(f"Std = {band_data.std():.3f}")

**Let's stop here for now:**

In [None]:
print("Stopping here...")
raise StopExecution

## 7. Sandbox

In [None]:
# Check header of an ISR-corrected exposure image
icExp = butler.get('icExp', dataId=dataId)
icExp_info = icExp.getInfo()
print(icExp_info.getMetadata())
print(dataId)

In [None]:
# From Lynne Jones...

# Read the file 
seddata = fits.getdata(sedfile)
# Translate to rubin_sim format
wavelen = seddata['WAVELENGTH'] * u.angstrom.to(u.nanometer) # This is in angstroms - need in nanometers
flambda = seddata['FLUX'] / (u.angstrom.to(u.nanometer)) # this is in erg/sec/cm^^2/ang but we want /nm 

defaultDirs = st.setDefaultDirs()
defaultDirs['detector'] = defaultDirs['detector'].replace('/joint_minimum', '/itl')
hardware, system = st.buildHardwareAndSystem(defaultDirs)

sed = pt.Sed(wavelen=wavelen, flambda=flambda)


# exposure time and gain -- gain=1 -> e- counts
phot_params = pt.PhotometricParameters(exptime=30, gain=1, nexp=1)

mags = {}
counts = {}
counts_100k = {}
for f in 'ugrizy':
    mags[f] = sed.calc_mag(system[f])
    counts[f] = sed.calc_adu(system[f], phot_params)
    counts_100k[f] = counts[f]/100000

#pd.DataFrame([mags, counts, counts_100k], index=['AB mag', 'total counts', 'counts(100k)'])
for f in 'ugrizy':
    print(f, mags[f], counts[f], counts_100k[f])

In [None]:
#fgcm_stars = butler.get("fgcm_Cycle5_StandardStars", collections=["u/erykoff/LSSTComCam/DM-47919/highlat/build4/run3"]).asAstropy()

In [None]:
#fgcm_stars

In [None]:
#fgcm_stars.write('fgcm_stars.ecsv')

In [None]:
#from astropy.table import Table
#df_bad_visits=Table.read("https://raw.githubusercontent.com/lsst-dm/excluded_visits/refs/heads/main/LSSTComCam/bad.ecsv").to_pandas()
#df_bad_visits.rename(columns={'exposure': 'visit'}, inplace=True)

In [None]:
#df_bad_visits

In [None]:
#best_df[best_df['band'] == band]

In [None]:
#for colname in best_df.columns:
#    print(colname)

In [None]:
#best_df['detector'].unique()

In [None]:
#best_df['id'].unique()

In [None]:
#best_df[best_df['band']=='u'].plot('visit','ratio_obs_exp', kind='scatter')

In [None]:
# Based on code retrieved from Claude-3.5-Sonnet and Poe.com Assistant

# Set up the plot
plt.figure(figsize=(10, 6))

# Define colors and transparency for each band
colors = plot_filter_colors_white_background
alpha = 1.0   # transparency level

# Plot for each band
for band in ['u', 'g', 'r', 'i', 'z', 'y']:
    band_data = best_df[best_df['band'] == band]
    plt.scatter(band_data['visit'], band_data['ratio_obs_exp'], 
                label=f'band {band}', 
                color=plot_filter_colors_white_background[band], 
                alpha=0.7)
    
plt.xlabel('Visit')
plt.ylabel('Ratio (Observed Counts/Expected Counts)')

plt.title('Observed/Expected Total Counts Ratio vs. Visit for C26202')
plt.legend()
plt.grid(True, alpha=0.3)

# Optional: adjust layout to prevent label clipping
plt.tight_layout()

plt.show()



In [None]:
# Based on code retrieved from Claude-3.5-Sonnet and Poe.com Assistant

# Set up the plot
plt.figure(figsize=(10, 6))

# Define colors and transparency for each band
colors = plot_filter_colors_white_background
alpha = 1.0   # transparency level

# Plot for each band
for band in ['u']:
    band_data = best_df[best_df['band'] == band]
    plt.scatter(band_data['detector'], band_data['ratio_obs_exp'], 
                label=f'band {band}', 
                color=plot_filter_colors_white_background[band], 
                alpha=0.7)
    
plt.xlabel('detector')
plt.ylabel('Ratio (Observed Counts/Expected Counts)')

plt.title('Observed/Expected Total Counts Ratio vs. Visit for C26202')
plt.legend()
plt.grid(True, alpha=0.3)

# Optional: adjust layout to prevent label clipping
plt.tight_layout()

plt.show()



In [None]:
# Based on code retrieved from Claude-3.5-Sonnet and Poe.com Assistant

# Set up the plot
plt.figure(figsize=(10, 6))

# Define colors and transparency for each band
colors = plot_filter_colors_white_background
alpha = 1.0   # transparency level

# Plot for each band
for band in ['u']:
    band_data = best_df[best_df['band'] == band]
    plt.scatter(band_data['airmass'], band_data['ratio_obs_exp'], 
                label=f'band {band}', 
                color=plot_filter_colors_white_background[band], 
                alpha=0.7)
    
plt.xlabel('Airmass')
plt.ylabel('Ratio (Observed Counts/Expected Counts)')

plt.title('Observed/Expected Total Counts Ratio vs. Visit for C26202')
plt.legend()
plt.grid(True, alpha=0.3)

# Optional: adjust layout to prevent label clipping
plt.tight_layout()

plt.show()



In [None]:
best_df['visit']

In [None]:
df_sci

In [None]:
merged_df = pd.merge(best_df, df_sci, left_on='visit', right_on='id', how='left')
merged_df

In [None]:
# Based on code retrieved from Claude-3.5-Sonnet and Poe.com Assistant

# Set up the plot
plt.figure(figsize=(10, 6))

# Define colors and transparency for each band
colors = plot_filter_colors_white_background
alpha = 1.0   # transparency level

# Plot for each band
for band in ['u']:
    band_data = merged_df[merged_df['band'] == band]
    plt.scatter(band_data['airmass_y'], band_data['ratio_obs_exp'], 
                label=f'band {band}', 
                color=plot_filter_colors_white_background[band], 
                alpha=0.7)
    
plt.xlabel('Airmass')
plt.ylabel('Ratio (Observed Counts/Expected Counts)')

plt.title('Observed/Expected Total Counts Ratio vs. Visit for C26202')
plt.legend()
plt.grid(True, alpha=0.3)

# Optional: adjust layout to prevent label clipping
plt.tight_layout()

plt.show()


In [None]:
# Based on code retrieved from Claude-3.5-Sonnet and Poe.com Assistant

# Set up the plot
plt.figure(figsize=(10, 6))

# Define colors and transparency for each band
colors = plot_filter_colors_white_background
alpha = 1.0   # transparency level

# Plot for each band
for band in ['u', 'g', 'r', 'i', 'z', 'y']:
    band_data = merged_df[merged_df['band'] == band]
    plt.scatter(band_data['mjd'], band_data['ratio_obs_exp'], 
                label=f'band {band}', 
                color=plot_filter_colors_white_background[band], 
                alpha=0.7)
    
plt.xlabel('MJD')
plt.ylabel('Ratio (Observed Counts/Expected Counts)')

plt.title('Observed/Expected Total Counts Ratio vs. Visit for C26202')
plt.legend()
plt.grid(True, alpha=0.3)

# Optional: adjust layout to prevent label clipping
plt.tight_layout()

plt.show()


In [None]:
# Based on code retrieved from Claude-3.5-Sonnet and Poe.com Assistant

# Set up the plot
plt.figure(figsize=(10, 6))

# Define colors and transparency for each band
colors = plot_filter_colors_white_background
alpha = 1.0   # transparency level

# Plot for each band
#for band in ['u', 'g', 'r', 'i', 'z', 'y']:
for band in ['u']:
    band_data = merged_df[merged_df['band'] == band]
    plt.scatter(band_data['expos'], band_data['ratio_obs_exp'], 
                label=f'band {band}', 
                color=plot_filter_colors_white_background[band], 
                alpha=0.7)
    
plt.xlabel('exposure time [sec]')
plt.ylabel('Ratio (Observed Counts/Expected Counts)')

plt.title('Observed/Expected Total Counts Ratio vs. Visit for C26202')
plt.legend()
plt.grid(True, alpha=0.3)

# Optional: adjust layout to prevent label clipping
plt.tight_layout()

plt.show()
