# ABoffsets_LSSTobs_CalSpec

Authors:  C. L. Adair, D. L. Tucker, with help from L. Jones, J. Carlin, E. Rykoff, and others

Created:  2024.11.15
Updated: 2025.10.15

## 1. Initial Setup...

### 1.1 Import useful python packages

In [1]:
# Generic python packages
import pylab as plt
import numpy as np
import pandas as pd
import glob
import math
import os
import gc
import warnings
import matplotlib.patches as patches

# LSST Science Pipelines (Stack) packages
import lsst.daf.butler as dafButler
import lsst.afw.display as afwDisplay
import lsst.geom as geom
from lsst.pipe.tasks.registerImage import RegisterConfig, RegisterTask

# rubin_sim-related packages
import rubin_sim.phot_utils as pt
from rubin_sim.phot_utils import Bandpass
import syseng_throughputs as st
from rubin_sim.data import get_data_dir

# Astropy-related packages
from astropy import units as u
#import astropy.units as u
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy.wcs import WCS
from astropy.visualization import LinearStretch, ImageNormalize
from astropy.stats import sigma_clipped_stats

# PIL package
from PIL import Image

# Set a standard figure size to use
plt.rcParams['figure.figsize'] = (8.0, 8.0)
afwDisplay.setDefaultBackend('matplotlib')

# Set filter warnings to "ignore" to avoid a lot of "logorrhea" to the screen:
warnings.filterwarnings("ignore")

  import pkg_resources


### 1.2 Include user input

In [2]:
# Which repo, collection, instrument, and skymap to use.
# See https://rubinobs.atlassian.net/wiki/spaces/DM/pages/48834013/Campaigns#1.1.-ComCam
# and https://rubinobs.atlassian.net/wiki/spaces/DM/pages/226656354/LSSTComCam+Intermittent+Cumulative+DRP+Runs

#instrument = 'LSSTComCam'
#repo = '/repo/dp1'
#collections = 'LSSTComCam/DP1'
#skymap_name = 'lsst_cells_v1'
#day_obs_start = 20241101
#day_obs_end = 20241231

# Instrument and observation start/end
instrument = 'LSSTCam'
repo = '/repo/main'
collections = 'LSSTCam/runs/DRP/20250604_20250921/w_2025_39/DM-52645'
#collections='LSSTCam/runs/DRP/20250421_20250921/w_2025_41/DM-52836' (new one that is not yet done....still needs processing?)
skymap_name = 'lsst_cells_v1'
day_obs_start = 20250401
day_obs_end = 20251230

# Generate cutouts and output to screen
plotImages = False
plotCutouts = False
verbose = 1         # 0, 1, 2, ...  Larger means more output to the screen.

# Use fgcm
use_fgcm_passbands = True

# Which flux to use?  psfFlux or calibFlux?
fluxName = 'psfFlux'
fluxerrName = 'psfFluxErr'
#fluxName = 'calibFlux'
#fluxerrName = 'calibFluxErr'

# Set environment variable to point to location of the rubin_sim_data 
#  (per Lynne Jones' Slack message on the #sciunit-photo-calib channel from 26 Nov 2024):
os.environ["RUBIN_SIM_DATA_DIR"] = "/sdf/data/rubin/shared/rubin_sim_data"


# calspec filename
calspec_filename = "./mag_CalSpec.csv"
#Star_Name = Star_Name
#Star_Name = "WDFS1930-52"
#Star_Name = "NGC6681-1"
#Star_Name = "WDFS1514+00"
#Star_Name = "WDFS1206-27"
#Star_Name = "VB8"
#Star_Name = "WDFS1055-36"
#Star_Name = "WDFS1837-70"
Star_Name = "C26202"
#Star_Name = "WDFS2317-29"
#Star_Name = "WDFS1434-28"
#Star_Name = "WDFS1535-77"

# location of the CalSpec SED FITS files...
calspec_sed_path = "~/Downloads"
calspec_sed_path = os.path.expanduser(calspec_sed_path)
print(calspec_sed_path)

# List of filters to examine
flist = ['u','g','r','i','z','y']

# Plot symbol colors to use for ugrizy
plot_filter_colors_white_background = {'u': '#0c71ff', 'g': '#49be61', 'r': '#c61c00', 'i': '#ffc200', 'z': '#f341a2', 'y': '#5d0000'}

/home/d/dltucker/Downloads


In [3]:
# Read calspec file and convert to a python dictionary

# Read CSV into a DataFrame
df = pd.read_csv(calspec_filename)

# Convert to list of dictionaries
data = df.to_dict(orient="records")

# Or: dictionary of dictionaries keyed by Star_Name
data_by_star = df.set_index("Star_Name").to_dict(orient="index")

print(data_by_star[Star_Name])

raDeg = data_by_star[Star_Name]["raDeg"]
decDeg = data_by_star[Star_Name]["decDeg"]

# Grab the row dictionary for this star
row = data_by_star[Star_Name]

# Build dictionary of file names
sedfile_dict = {}

# Loop over the last three columns
for col in ["STIS", "Model"]:
    val = row[col]
    if pd.notna(val) and val != "":
        # strip leading underscore if present
        key = val.strip("_")
        filename = f"{row['Name']}_{key}.fits"
        sedfile_dict[key] = os.path.join(calspec_sed_path , filename)

print(sedfile_dict)



{'RAHMS': '03 32 32.843', 'DECDMS': '-27 51 48.58', 'raDeg': 53.13684583333333, 'decDeg': -27.863494444444445, 'Vr': nan, 'PM_RA': nan, 'PM_DEC': nan, 'Simbad_Name': '2MASS J03323287-2751483', 'Alt_Simbad_Name': '[B2010] C26202', 'Sp_T': 'F8IV', 'G_Gaia': nan, 'V': 16.64, 'BV': 0.26, 'Name': 'c26202', 'Model': '_mod_008', 'STIS': '_stiswfcnic_007'}
{'stiswfcnic_007': '/home/d/dltucker/Downloads/c26202_stiswfcnic_007.fits', 'mod_008': '/home/d/dltucker/Downloads/c26202_mod_008.fits'}


### 1.3 Define useful classes and functions

In [4]:
# Useful class to stop "Run All" at a cell 
#  containing the command "raise StopExecution"
class StopExecution(Exception):
    def _render_traceback_(self):
        pass

In [5]:
def cutout_im(butler, ra, dec, datasetType, visit, detector, cutoutSideLength=51, **kwargs):
    
    """
    Produce a cutout from a preliminary_visit_image at the given ra, dec position.

    Adapted from cutout_coadd which was adapted from a DC2 tutorial
    notebook by Michael Wood-Vasey.

    """
    
    dataId = {'visit': visit, 'detector': detector}    
    radec = geom.SpherePoint(ra, dec, geom.degrees)
    cutoutSize = geom.ExtentI(cutoutSideLength, cutoutSideLength)
    wcs = butler.get('%s.wcs' % datasetType,**dataId)
    xy = geom.PointI(wcs.skyToPixel(radec))
    bbox = geom.BoxI(xy - cutoutSize // 2, cutoutSize)
    parameters = {'bbox': bbox}
    cutout_image = butler.get(datasetType, parameters=parameters, **dataId)

    return cutout_image

In [6]:
def warp_img(ref_img, img_to_warp, ref_wcs, wcs_to_warp):

    config = RegisterConfig()
    task = RegisterTask(name="register", config=config)
    warpedExp = task.warpExposure(img_to_warp, wcs_to_warp, ref_wcs,
                                  ref_img.getBBox())

    return warpedExp

In [7]:
def make_gif(frame_folder):
    frames = [Image.open(image) for image in sorted(glob.glob(f"{frame_folder}/*.png"))]
    frame_one = frames[0]
    frame_one.save("animation.gif", format="GIF", append_images=frames,
               save_all=True, duration=500, loop = 0)

### 1.4 Instantiate the Butler

In [8]:
butler = dafButler.Butler(repo, collections=collections)

## 2. Calculate Synthetic AB magnitudes for CalSpec star, based on official filter bandpasses

### 2.1 Change detectors from (default) LSST to ComCam

In [9]:
if use_fgcm_passbands:
    system = {}

    for band in ['u', 'g', 'r', 'i', 'z', 'y']:
        # Get the Astropy table for this filter
        passband = butler.get("standard_passband", band=band)

        # Create a Rubin Bandpass
        bp = Bandpass()

        # Convert wavelength to nanometers (Rubin convention)
        wavelen_nm = passband['wavelength'].to('nm').value

        # Convert throughput from percent → fraction
        throughput = np.array(passband['throughput']) / 100.0

        # Set up the Bandpass
        bp.set_bandpass(wavelen_nm, throughput)

        # Store in dictionary with key = filter band
        system[band] = bp

    
else:
    defaultDirs = st.setDefaultDirs()
    
    if instrument == "LSSTComCam":
        #Change detectors from (default) LSST to ComCam (ITL CCDs)
        defaultDirs['detector'] = defaultDirs['detector'].replace('/joint_minimum', '/itl')
    hardware, system = st.buildHardwareAndSystem(defaultDirs)

### 2.2 Calculate synthetic mags

In [10]:
mags = {}

# Loop through all SEDs in our sedfile dictionary
for sed_key in sedfile_dict:
    
    print(sed_key, sedfile_dict[sed_key])
    
    # Read the SED file associated with this SED
    sedfile = sedfile_dict[sed_key]
    seddata = fits.getdata(sedfile)

    # Transform the SED data into rubin_sim format
    wavelen = seddata['WAVELENGTH'] * u.angstrom.to(u.nanometer) # This is in angstroms - need in nanometers
    flambda = seddata['FLUX'] / (u.angstrom.to(u.nanometer)) # this is in erg/sec/cm^^2/ang but we want /nm     
    sed = pt.Sed(wavelen=wavelen, flambda=flambda)
    
    # Loop over the filters, calculating the synthetic mags for each filter for this SED
    mags[sed_key] = []
    for f in flist:
        # Append the synthetic mag for this filter to this mags list for this SED
        mags[sed_key].append(sed.calc_mag(system[f]))
    # Convert list of synthetic mags for this SED into a numpy array
    mags[sed_key] = np.array(mags[sed_key])
    
    

stiswfcnic_007 /home/d/dltucker/Downloads/c26202_stiswfcnic_007.fits
mod_008 /home/d/dltucker/Downloads/c26202_mod_008.fits


### 2.3 Convert mags numpy arrays into a pandas dataframe

In [11]:
df_mags = pd.DataFrame(mags, index=flist)
df_mags

Unnamed: 0,stiswfcnic_007,mod_008
u,17.578325,17.592414
g,16.68909,16.689975
r,16.361812,16.361413
i,16.260013,16.259363
z,16.243653,16.24366
y,16.238259,16.238294


## 3. Query USDF Butler for observations of CalSpec star

### 3.1 Find all the `visit_image`'s that overlap the sky position of CalSpec star

#### 3.1.1 Find the `dataId`'s for all `visit_image`'s in this repo/collection that overlap the RA, DEC of CalSpec star

In [12]:
datasetRefs = butler.query_datasets("visit_image", where="visit_detector_region.region OVERLAPS POINT(ra, dec)",
                                    bind={"ra": raDeg, "dec": decDeg})

for i, ref in enumerate(datasetRefs):    
    print(i, ref.dataId)
    if ((verbose < 2) & (i >= 10)): 
        print("...")
        break
    

print(f"\nFound {len(datasetRefs)} visit_images")

0 {instrument: 'LSSTCam', detector: 44, visit: 2025090600260, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
1 {instrument: 'LSSTCam', detector: 49, visit: 2025090600252, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
2 {instrument: 'LSSTCam', detector: 51, visit: 2025090600256, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
3 {instrument: 'LSSTCam', detector: 51, visit: 2025090600266, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
4 {instrument: 'LSSTCam', detector: 51, visit: 2025090600280, band: 'r', day_obs: 20250906, physical_filter: 'r_57'}
5 {instrument: 'LSSTCam', detector: 52, visit: 2025082600442, band: 'i', day_obs: 20250826, physical_filter: 'i_39'}
6 {instrument: 'LSSTCam', detector: 52, visit: 2025082600445, band: 'i', day_obs: 20250826, physical_filter: 'i_39'}
7 {instrument: 'LSSTCam', detector: 52, visit: 2025090600254, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
8 {instrument: 'LSSTCam', detector: 52, visit: 2025090600259, ba

#### 3.1.2 Plot a cutouts one of the `visit_image`'s

Find first r-band image from `datasetRefs`.

In [13]:
if plotImages:

    # Kudos to Claude-Sonnet-3.5 for finding a solution to identifying a 
    #  visit_image or preliminary_visit_image that has the necessary WCS info
    #  for plotting the images that follow.  (Current problems may be due to
    #  recent issues on USDF.)

    band_name = 'r'
    visit_image_type = ''
    
    for i, ref in enumerate(datasetRefs):
        if ref.dataId['band'] != band_name:
            continue
        
        visit = ref.dataId['visit']
        detector = ref.dataId['detector']
    
        try:
            visit_image = butler.get('visit_image', dataId={'visit': visit, 'detector': detector})
            visit_image_type = 'visit_image'
            visit_image.getWcs().getFitsMetadata()
            break  # Success with visit_image, exit loop
        except:
            try:
                visit_image = butler.get('preliminary_visit_image', dataId={'visit': visit, 'detector': detector})
                visit_image_type = 'preliminary_visit_image'
                visit_image.getWcs().getFitsMetadata()
                break  # Success with preliminary_visit_image, exit loop
            except:
                continue  # Both attempts failed, try next ref

    print(visit_image_type)
    print(ref.dataId)

In [14]:
if ( (plotImages) & (verbose > 1) ):
    visit_image_info = visit_image.getInfo()
    visit_info = visit_image_info.getVisitInfo()
    print(visit_info)
    summary_info = visit_image_info.getSummaryStats()
    print(summary_info)

In [15]:
if plotImages:
    fig = plt.figure()
    display = afwDisplay.Display(frame=fig)
    display.scale('asinh', 'zscale')
    display.mtv(visit_image.image)
    plt.show()

In [16]:
if plotImages:
    try:
        # Create cutout image...
        cutoutsize = 501 #Defining the size of the cutout box in pixels
        #cutout_visit_image = cutout_im(butler, raDeg, decDeg, 'preliminary_visit_image', visit, detector, cutoutSideLength=cutoutsize)
        cutout_visit_image = cutout_im(butler, raDeg, decDeg, visit_image_type, visit, detector, cutoutSideLength=cutoutsize)

        # Plot cutout image...
        fig = plt.figure()
        display = afwDisplay.Display(frame=fig)
        display.scale('asinh', 'zscale')
        display.mtv(cutout_visit_image.image)
        plt.show()
    except:
        print("Target might be too close to the edge of this visit_image")
        print("Continuing...")

In [17]:
if plotImages:
    fig = plt.figure()
    plt.subplot(projection=WCS(visit_image.getWcs().getFitsMetadata()))
    calexp_extent = (visit_image.getBBox().beginX, visit_image.getBBox().endX,
                     visit_image.getBBox().beginY, visit_image.getBBox().endY)
    im = plt.imshow(visit_image.image.array, cmap='gray', vmin=-200.0, vmax=400,
                    extent=calexp_extent, origin='lower')
    plt.grid(color='white', ls='solid')
    #plt.xlabel('Right Ascension')
    #plt.ylabel('Declination')
    plt.xlabel('')
    plt.ylabel('')
    plt.show()

In [18]:
if plotImages:

    # Kudos to Co-Pilot for the following code...
    
    # Your existing setup
    fig = plt.figure()
    ax = plt.subplot(projection=WCS(visit_image.getWcs().getFitsMetadata()))
    calexp_extent = (visit_image.getBBox().beginX, visit_image.getBBox().endX,
                     visit_image.getBBox().beginY, visit_image.getBBox().endY)
    im = ax.imshow(visit_image.image.array, cmap='gray_r', vmin=-200.0, vmax=400,
                   extent=calexp_extent, origin='lower')
    ax.grid(color='white', ls='solid')
    #ax.set_xlabel('Right Ascension')
    #ax.set_ylabel('Declination')
    plt.xlabel('')
    plt.ylabel('')

    # Add a title
    plot_title = """%s, filter %s, visit = %d, detector = %d""" % \
                    (Star_Name, band_name, visit, detector)
    ax.set_title(plot_title, fontsize=14, color='black')

    # Add a circle around the star (in sky coordinates)
    circle = patches.Circle((raDeg, decDeg), radius=0.01,  # adjust radius as needed
                            transform=ax.get_transform('world'),
                            edgecolor='red', facecolor='none', linewidth=2)
    ax.add_patch(circle)

    plt.show()


#### 3.1.3 Create a pandas Dataframe containing the `source2` info for all these `visit_image`'s

Now, loop over the `datasetRefs` again, but this time grab the contents of the `sourceTable` table for each `ref` and combine into all into one big pandas DataFrame.  

In [None]:
# Reference CALSPEC star coordinates
ref_coord = SkyCoord(ra=raDeg*u.degree, dec=decDeg*u.degree)

src_list = []

for i, ref in enumerate(datasetRefs):
    dataId = {'visit': ref.dataId['visit'], 'detector': ref.dataId['detector']}
    src = butler.get('source2', dataId=dataId).to_pandas()
#    src = butler.get('recalibrated_star_detector', dataId=dataId).to_pandas()
# NOTE - source2 has more matches and gives a slightly different offset to recalibrated - which is going away soon (less than 2 mmag)

    # Apply "good measurement" mask immediately
    mask = (~src.pixelFlags_bad) & (~src.pixelFlags_saturated) & \
           (~src.extendedness_flag)
    src_cleaned = src[mask]

    # Compute separations to CALSPEC star
    df_coords = SkyCoord(ra=src_cleaned['ra'].values*u.degree,
                         dec=src_cleaned['dec'].values*u.degree)
    separations = ref_coord.separation(df_coords)

    # Keep only sources within 3 arcsec
    mask_sep = separations < 3.0*u.arcsec
    nearby = src_cleaned[mask_sep].copy()
    nearby['separation_c26202'] = separations[mask_sep].arcsec

    
    if not nearby.empty:
        best = nearby.sort_values('separation_c26202').iloc[[0]]
        src_list.append(best)
        if ((verbose >= 2) | (i < 10)): 
            print(f"{i} Visit {ref.dataId['visit']}, Detector {ref.dataId['detector']}: "
                  f"Found {len(best)} candidate matches.")
        if ((verbose < 2) & (i == 10)): 
            print("...")
            
# Concatenate only the small filtered tables
if src_list:
    src_all = pd.concat(src_list, ignore_index=True)
    print(f"\nTotal combined catalog contains {len(src_all)} candidate sources.")
else:
    print("No matches found within 3 arcsec.")

best_df = src_all

0 Visit 2025090600260, Detector 44: Found 1 candidate matches.
1 Visit 2025090600252, Detector 49: Found 1 candidate matches.
2 Visit 2025090600256, Detector 51: Found 1 candidate matches.
3 Visit 2025090600266, Detector 51: Found 1 candidate matches.
5 Visit 2025082600442, Detector 52: Found 1 candidate matches.
6 Visit 2025082600445, Detector 52: Found 1 candidate matches.
7 Visit 2025090600254, Detector 52: Found 1 candidate matches.
9 Visit 2025090600253, Detector 53: Found 1 candidate matches.
...


Let's look at the result:

In [None]:
best_df

Add magCalib and magCalibErr columns:

In [None]:
# Flux in nano-Janskys to AB magnitudes:
best_df['magCalib'] = -2.5*np.log10(best_df['calibFlux']) + 31.4

# Flux error in nano-Janskys to AB magnitude error:
# Factor of 2.5/math.log(10) is explained here:  https://astronomy.stackexchange.com/questions/38371/how-can-i-calculate-the-uncertainties-in-magnitude-like-the-cds-does
best_df['magCalibErr'] = 2.5/math.log(10)*best_df['calibFluxErr']/best_df['calibFlux']

Display `visit`, `detector`, `band`, `calibFlux`, `calibFluxErr`, `magCalib`, `magCalibErr`, and `separation_c26202` from best_df, sorted by `visit` and `band`:

In [None]:
# Set pandas to show all rows...
if verbose > 2:
    pd.set_option("display.max_rows", None)

In [None]:
best_df[['visit', 'detector', 'band', 'calibFlux', 'calibFluxErr', 'magCalib', 'magCalibErr', 'separation_c26202']].sort_values(['visit', 'band'])

Display `visit`, `detector`, `band`, `calibFlux`, `calibFluxErr`, `magCalib`, `magCalibErr`, and `separation_c26202` from best_df, sorted by `visit` and `band`:

In [None]:
print("""Number of rows:  %d""" % (len(best_df['visit'])))

In [None]:
# Reset pandas to its default maximum rows to print to screen
if verbose > 2:
    pd.reset_option("display.max_rows")

#### 3.1.4 Save `best_df` as a CSV file

Let's save `best_df` as a CSV file that we can download and examine with TOPCAT:

In [None]:
#best_df.to_csv('LSSTComCam_C26202_fields.csv', index=False)

## 4. Measure differences between the calibrated observed magnitudes and the LSST Synthetic Mags for CalSpec star

In [None]:
# Group by the 'band' column in best_df calculate the counts of 'band' for each group
count_df = best_df.groupby('band')['magCalib'].count().reset_index()

# Rename the columns for clarity
count_df = count_df.rename(columns={'magCalib': 'n_band'})

if verbose > 2:
    count_df

In [None]:
# Group by the 'band' column in beset_df and calculate the median of 'magCalib' for each group
median_df = best_df.groupby('band')['magCalib'].median().reset_index()

# Rename the columns for clarity
median_df = median_df.rename(columns={'magCalib': 'median_magCalib'})

if verbose > 2:
    median_df

In [None]:
# Merge the count_df and merge_df dataframes based on the filter band name
combined_df = pd.merge(count_df, median_df, left_on='band', right_on='band')

if verbose > 2:
    combined_df

In [None]:
# Reset the df_mags index to turn the keys into a column
df_mags_reset = df_mags.reset_index()

# Merge the dataframes based on the filter name
combined_df = pd.merge(combined_df, df_mags_reset, left_on='band', right_on='index')

if verbose > 2:
    combined_df

In [None]:
# Calculate the differences and add the new columns
#combined_df['offset_stis'] = combined_df['median_magCalib'] - combined_df['stiswfcnic_007']
#combined_df['offset_mod'] = combined_df['median_magCalib'] - combined_df['mod_008']

for sed_key in sedfile_dict:
    offset_name = """offset_%s""" % (sed_key)
    combined_df[offset_name] = combined_df['median_magCalib'] - combined_df[sed_key]


if verbose > 2:
    combined_df

In [None]:
# Output final cleaned-up results...

# Define the desired order of 'band'
order = ['u', 'g', 'r', 'i', 'z', 'y']

# Remove the 'index' column
combined_df = combined_df.drop(columns=['index'])

# Reorder the dataframe based on the 'band' column
combined_df['band'] = pd.Categorical(combined_df['band'], categories=order, ordered=True)
combined_df = combined_df.sort_values('band').reset_index(drop=True)

combined_df

In [None]:
raise StopExecution

## 5. Sandbox

the code below is to loop over each of the calspec stars - NOT TESTED YET