# ABoffsets_LSSTComCam_c26202

Authors:  C. L. Adair, D. L. Tucker, with help from L. Jones, J. Carlin, and others

Created:  2024.11.15
Updated: 2025.10.14

## 1. Initial Setup...

### 1.1 Import useful python packages

In [1]:
# Generic python packages
import pylab as plt
import numpy as np
import pandas as pd
import glob
import math
import os
import gc
import warnings

# LSST Science Pipelines (Stack) packages
import lsst.daf.butler as dafButler
import lsst.afw.display as afwDisplay

# rubin_sim-related packages
import rubin_sim.phot_utils as pt
import syseng_throughputs as st
from rubin_sim.data import get_data_dir

# Astropy-related packages
from astropy import units as u
from astropy.io import fits
from astropy.coordinates import SkyCoord
import lsst.geom as geom

# Set a standard figure size to use
plt.rcParams['figure.figsize'] = (8.0, 8.0)
afwDisplay.setDefaultBackend('matplotlib')

# Set filter warnings to "ignore" to avoid a lot of "logorrhea" to the screen:
warnings.filterwarnings("ignore")

  import pkg_resources


### 1.2 Include user input

In [2]:
# Which repo, collection, instrument, and skymap to use.
# See https://rubinobs.atlassian.net/wiki/spaces/DM/pages/48834013/Campaigns#1.1.-ComCam
# and https://rubinobs.atlassian.net/wiki/spaces/DM/pages/226656354/LSSTComCam+Intermittent+Cumulative+DRP+Runs
#repo = 'embargo'
#repo = '/repo/dp1'
repo = '/repo/main'
#collections = 'LSSTComCam/runs/DRP/DP1/v29_0_0/DM-50260'
collections = 'LSSTCam/runs/DRP/20250604_20250921/w_2025_39/DM-52645'

#instrument = 'LSSTComCam'
instrument = 'LSSTCam'
skymap_name = 'lsst_cells_v1'
#day_obs_start = 20241101
#day_obs_end = 20241231
day_obs_start = 20250401
day_obs_end = 20251230
plotImages = False
plotCutouts = False

# Set environment variable to point to location of the rubin_sim_data 
#  (per Lynne Jones' Slack message on the #sciunit-photo-calib channel from 26 Nov 2024):
os.environ["RUBIN_SIM_DATA_DIR"] = "/sdf/data/rubin/shared/rubin_sim_data"


# calspec filename
calspec_filename = "./mag_CalSpec.csv"
#Star_Name = Star_Name
#Star_Name = "WDFS1930-52"
#Star_Name = "NGC6681-1"
#Star_Name = "WDFS1514+00"
#Star_Name = "WDFS1206-27"
#Star_Name = "VB8"
#Star_Name = "WDFS1055-36"
#Star_Name = "WDFS1837-70"
Star_Name = "C26202"
#Star_Name = "WDFS2317-29"
#Star_Name = "WDFS1434-28"
#Star_Name = "WDFS1535-77"


# DA Stars--------------------------------------------------------------------------------
###########################################################################################

# WDFS1930-52

# RA, DEC in degrees:
# raDeg = 292.57899
# decDeg = -52.06278
# sedfile_dict = {'stis_001' : '~/Downloads/wdfs1930_52_stis_001.fits'
#                'mod_001'        : '~/Downloads/wdfs1930_52_mod_001.fits'
#               }




# List of filters to examine
flist = ['u','g','r','i','z','y']

# Plot symbol colors to use for ugrizy
plot_filter_colors_white_background = {'u': '#0c71ff', 'g': '#49be61', 'r': '#c61c00', 'i': '#ffc200', 'z': '#f341a2', 'y': '#5d0000'}

In [3]:

# DA Stars--------------------------------------------------------------------------------
###########################################################################################

# WDFS1930-52

# RA, DEC in degrees:
# raDeg = 292.57899
# decDeg = -52.06278
# sedfile_dict = {'stis_001' : '~/Downloads/wdfs1930_52_stis_001.fits'
#                'mod_001'        : '~/Downloads/wdfs1930_52_mod_001.fits'
#               }

# Read CSV into a DataFrame
df = pd.read_csv(calspec_filename)

# Convert to list of dictionaries
data = df.to_dict(orient="records")

# Or: dictionary of dictionaries keyed by Star_Name
data_by_star = df.set_index("Star_Name").to_dict(orient="index")

print(data_by_star[Star_Name])


raDeg = data_by_star[Star_Name]["raDeg"]
decDeg = data_by_star[Star_Name]["decDeg"]




downloads_path = os.path.expanduser("~/Downloads")

# Grab the row dictionary for this star
row = data_by_star[Star_Name]

# Build dictionary of file names
sedfile_dict = {}

# Loop over the last three columns
for col in ["STIS", "Model"]:
    val = row[col]
    if pd.notna(val) and val != "":
        # strip leading underscore if present
        key = val.strip("_")
        filename = f"{row['Name']}_{key}.fits"
        sedfile_dict[key] = os.path.join(downloads_path, filename)

print(sedfile_dict)



{'RAHMS': '03 32 32.843', 'DECDMS': '-27 51 48.58', 'raDeg': 53.13684583333333, 'decDeg': -27.863494444444445, 'Vr': nan, 'PM_RA': nan, 'PM_DEC': nan, 'Simbad_Name': '2MASS J03323287-2751483', 'Alt_Simbad_Name': '[B2010] C26202', 'Sp_T': 'F8IV', 'G_Gaia': nan, 'V': 16.64, 'BV': 0.26, 'Name': 'c26202', 'Model': '_mod_008', 'STIS': '_stiswfcnic_007'}
{'stiswfcnic_007': '/home/d/dltucker/Downloads/c26202_stiswfcnic_007.fits', 'mod_008': '/home/d/dltucker/Downloads/c26202_mod_008.fits'}


### 1.3 Define useful classes and functions

In [4]:
# Useful class to stop "Run All" at a cell 
#  containing the command "raise StopExecution"
class StopExecution(Exception):
    def _render_traceback_(self):
        pass

In [5]:
def cutout_im(butler, ra, dec, datasetType, visit, detector, cutoutSideLength=51, **kwargs):
    
    """
    Produce a cutout from a preliminary_visit_image at the given ra, dec position.

    Adapted from cutout_coadd which was adapted from a DC2 tutorial
    notebook by Michael Wood-Vasey.

    """
    
    dataId = {'visit': visit, 'detector': detector}    
    radec = geom.SpherePoint(ra, dec, geom.degrees)
    cutoutSize = geom.ExtentI(cutoutSideLength, cutoutSideLength)
    wcs = butler.get('%s.wcs' % datasetType,**dataId)
    xy = geom.PointI(wcs.skyToPixel(radec))
    bbox = geom.BoxI(xy - cutoutSize // 2, cutoutSize)
    parameters = {'bbox': bbox}
    cutout_image = butler.get(datasetType, parameters=parameters, **dataId)

    return cutout_image

In [6]:
def warp_img(ref_img, img_to_warp, ref_wcs, wcs_to_warp):

    config = RegisterConfig()
    task = RegisterTask(name="register", config=config)
    warpedExp = task.warpExposure(img_to_warp, wcs_to_warp, ref_wcs,
                                  ref_img.getBBox())

    return warpedExp

In [7]:
def make_gif(frame_folder):
    frames = [Image.open(image) for image in sorted(glob.glob(f"{frame_folder}/*.png"))]
    frame_one = frames[0]
    frame_one.save("animation.gif", format="GIF", append_images=frames,
               save_all=True, duration=500, loop = 0)

## 2. Calculate Synthetic AB magnitudes for C26202, based on official filter bandpasses

### 2.1 Change detectors from (default) LSST to ComCam

In [8]:
defaultDirs = st.setDefaultDirs()
defaultDirs['detector'] = defaultDirs['detector'].replace('/joint_minimum', '/itl')
hardware, system = st.buildHardwareAndSystem(defaultDirs)


### 2.2 Calculate synthetic mags

In [9]:
mags = {}

# Loop through all SEDs in our sedfile dictionary
for sed_key in sedfile_dict:
    
    print(sed_key, sedfile_dict[sed_key])
    
    # Read the SED file associated with this SED
    sedfile = sedfile_dict[sed_key]
    seddata = fits.getdata(sedfile)

    # Transform the SED data into rubin_sim format
    wavelen = seddata['WAVELENGTH'] * u.angstrom.to(u.nanometer) # This is in angstroms - need in nanometers
    flambda = seddata['FLUX'] / (u.angstrom.to(u.nanometer)) # this is in erg/sec/cm^^2/ang but we want /nm     
    sed = pt.Sed(wavelen=wavelen, flambda=flambda)
    
    # Loop over the filters, calculating the synthetic mags for each filter for this SED
    mags[sed_key] = []
    for f in flist:
        # Append the synthetic mag for this filter to this mags list for this SED
        mags[sed_key].append(sed.calc_mag(system[f]))
    # Convert list of synthetic mags for this SED into a numpy array
    mags[sed_key] = np.array(mags[sed_key])
    
    

stiswfcnic_007 /home/d/dltucker/Downloads/c26202_stiswfcnic_007.fits
mod_008 /home/d/dltucker/Downloads/c26202_mod_008.fits


### 2.3 Convert mags numpy arrays into a pandas dataframe

In [10]:
df_mags = pd.DataFrame(mags, index=flist)
df_mags

Unnamed: 0,stiswfcnic_007,mod_008
u,17.5728,17.586964
g,16.691931,16.692687
r,16.362017,16.361654
i,16.260196,16.259542
z,16.243679,16.24369
y,16.238847,16.238887


## 3. Query USDF Butler for ComCam measurements of C26202

### 3.1 Instantiate Butler

In [11]:
butler = dafButler.Butler(repo, collections=collections)

### 3.2 Find all the `preliminary_visit_image`'s that overlap the sky position of C26202

#### 3.2.1 Find the `dataId`'s for all `preliminary_visit_image`'s in this repo/collection that overlap the RA, DEC of C26202

In [12]:
datasetRefs = butler.query_datasets("visit_image", where="visit_detector_region.region OVERLAPS POINT(ra, dec)",
                                    bind={"ra": raDeg, "dec": decDeg})

for i, ref in enumerate(datasetRefs):    
    print(i, ref.dataId)

print(f"\nFound {len(datasetRefs)} preliminary_visit_images")

0 {instrument: 'LSSTCam', detector: 44, visit: 2025090600260, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
1 {instrument: 'LSSTCam', detector: 49, visit: 2025090600252, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
2 {instrument: 'LSSTCam', detector: 51, visit: 2025090600256, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
3 {instrument: 'LSSTCam', detector: 51, visit: 2025090600266, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
4 {instrument: 'LSSTCam', detector: 51, visit: 2025090600280, band: 'r', day_obs: 20250906, physical_filter: 'r_57'}
5 {instrument: 'LSSTCam', detector: 52, visit: 2025082600442, band: 'i', day_obs: 20250826, physical_filter: 'i_39'}
6 {instrument: 'LSSTCam', detector: 52, visit: 2025082600445, band: 'i', day_obs: 20250826, physical_filter: 'i_39'}
7 {instrument: 'LSSTCam', detector: 52, visit: 2025090600254, band: 'z', day_obs: 20250906, physical_filter: 'z_20'}
8 {instrument: 'LSSTCam', detector: 52, visit: 2025090600259, ba

#### 3.2.3 Create a pandas Dataframe containing the `source2` info for all these `visit_image`'s

Now, loop over the `datasetRefs` again, but this time grab the contents of the `sourceTable` table for each `ref` and combine into all into one big pandas DataFrame.  

In [13]:
from astropy.coordinates import SkyCoord
import astropy.units as u
import pandas as pd

# Reference CALSPEC star coordinates
ref_coord = SkyCoord(ra=raDeg*u.degree, dec=decDeg*u.degree)

src_list = []

for i, ref in enumerate(datasetRefs):
    dataId = {'visit': ref.dataId['visit'], 'detector': ref.dataId['detector']}
    src = butler.get('source2', dataId=dataId).to_pandas()
#    src = butler.get('recalibrated_star_detector', dataId=dataId).to_pandas()
# NOTE - source2 has more matches and gives a slightly different offset to recalibrated - which is going away soon (less than 2 mmag)

    # Apply "good measurement" mask immediately
    mask = (~src.pixelFlags_bad) & (~src.pixelFlags_saturated) & \
           (~src.extendedness_flag)
    src_cleaned = src[mask]

    # Compute separations to CALSPEC star
    df_coords = SkyCoord(ra=src_cleaned['ra'].values*u.degree,
                         dec=src_cleaned['dec'].values*u.degree)
    separations = ref_coord.separation(df_coords)

    # Keep only sources within 3 arcsec
    mask_sep = separations < 3.0*u.arcsec
    nearby = src_cleaned[mask_sep].copy()
    nearby['separation_c26202'] = separations[mask_sep].arcsec

    
    if not nearby.empty:
        best = nearby.sort_values('separation_c26202').iloc[[0]]
        src_list.append(best)
        print(f"{i} Visit {ref.dataId['visit']}, Detector {ref.dataId['detector']}: "
              f"Found {len(best)} candidate matches.")

# Concatenate only the small filtered tables
if src_list:
    src_all = pd.concat(src_list, ignore_index=True)
    print(f"\nTotal combined catalog contains {len(src_all)} candidate sources.")
else:
    print("No matches found within 3 arcsec.")

best_df = src_all

0 Visit 2025090600260, Detector 44: Found 1 candidate matches.
1 Visit 2025090600252, Detector 49: Found 1 candidate matches.
2 Visit 2025090600256, Detector 51: Found 1 candidate matches.
3 Visit 2025090600266, Detector 51: Found 1 candidate matches.
5 Visit 2025082600442, Detector 52: Found 1 candidate matches.
6 Visit 2025082600445, Detector 52: Found 1 candidate matches.
7 Visit 2025090600254, Detector 52: Found 1 candidate matches.
9 Visit 2025090600253, Detector 53: Found 1 candidate matches.
10 Visit 2025090600270, Detector 53: Found 1 candidate matches.
11 Visit 2025090600285, Detector 53: Found 1 candidate matches.
12 Visit 2025090600277, Detector 60: Found 1 candidate matches.
14 Visit 2025090600278, Detector 61: Found 1 candidate matches.
15 Visit 2025090600262, Detector 86: Found 1 candidate matches.
16 Visit 2025081000328, Detector 90: Found 1 candidate matches.
17 Visit 2025081000329, Detector 90: Found 1 candidate matches.
18 Visit 2025081000330, Detector 90: Found 1 can

Let's look at the result:

In [14]:
best_df

Unnamed: 0,coord_ra,coord_dec,parentSourceId,x,y,xErr,yErr,ra,dec,raErr,...,hsmShapeRegauss_flag_no_pixels,hsmShapeRegauss_flag_not_contained,hsmShapeRegauss_flag_parent_source,sky_source,visit,detector,band,physical_filter,sourceId,separation_c26202
0,53.137020,-27.863444,0,3120.952901,898.311685,0.009363,0.008345,53.137020,-27.863444,4.472117e-07,...,False,False,False,False,2025090600260,44,z,z_20,25187647288770763,0.584583
1,53.137037,-27.863427,0,3522.051588,3813.636201,0.010376,0.010276,53.137037,-27.863427,5.073094e-07,...,False,False,False,False,2025090600252,49,z,z_20,25187646217650971,0.653691
2,53.137036,-27.863449,25187646755569994,3339.724660,1490.556760,0.008722,0.009695,53.137036,-27.863449,4.483496e-07,...,False,False,False,False,2025090600256,51,z,z_20,25187646755570722,0.625556
3,53.137038,-27.863425,0,1189.991493,2534.287422,0.007879,0.006267,53.137038,-27.863425,3.794090e-07,...,False,False,False,False,2025090600266,51,z,z_20,25187648097747592,0.662223
4,53.137025,-27.863437,0,1057.200159,2894.721267,0.004048,0.004155,53.137025,-27.863437,1.992035e-07,...,False,False,False,False,2025082600442,52,i,i_39,25139293208970777,0.607883
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,53.137030,-27.863437,25082104534008611,3994.252396,1598.965476,0.002811,0.002851,53.137030,-27.863437,1.393186e-07,...,False,False,False,False,2025081300337,95,z,z_20,25082104534010052,0.620700
128,53.137014,-27.863434,25139291087700181,2406.591463,512.423327,0.005698,0.005295,53.137014,-27.863434,2.644275e-07,...,False,False,False,False,2025082600426,102,g,g_6,25139291087701777,0.577109
129,53.137020,-27.863438,0,2275.111407,2894.639701,0.005020,0.005116,53.137020,-27.863438,2.505856e-07,...,False,False,False,False,2025082600427,102,g,g_6,25139291221918973,0.590262
130,53.137020,-27.863446,25139291356136334,2111.403677,2057.107811,0.005123,0.006071,53.137020,-27.863446,2.906888e-07,...,False,False,False,False,2025082600428,102,g,g_6,25139291356137586,0.581639


#### 3.2.4 Save `best_df` as a CSV file

Let's save `best_df` as a CSV file that we can download and examine with TOPCAT:

***(Rename this file to something else???)***

In [15]:
#best_df.to_csv('LSSTComCam_C26202_fields.csv', index=False)

Add magCalib and magCalibErr columns:

In [16]:
# Flux in nano-Janskys to AB magnitudes:
best_df['magCalib'] = -2.5*np.log10(best_df['calibFlux']) + 31.4

# Flux error in nano-Janskys to AB magnitude error:
# Factor of 2.5/math.log(10) is explained here:  https://astronomy.stackexchange.com/questions/38371/how-can-i-calculate-the-uncertainties-in-magnitude-like-the-cds-does
best_df['magCalibErr'] = 2.5/math.log(10)*best_df['calibFluxErr']/best_df['calibFlux']

Display `visit`, `detector`, `band`, `calibFlux`, `calibFluxErr`, `magCalib`, `magCalibErr`, and `separation_c26202` from best_df, sorted by `visit` and `band`:

In [17]:
# Set pandas to show all rows...
pd.set_option("display.max_rows", None)

In [18]:
best_df[['visit', 'detector', 'band', 'calibFlux', 'calibFluxErr', 'magCalib', 'magCalibErr', 'separation_c26202']].sort_values(['visit', 'band'])

Unnamed: 0,visit,detector,band,calibFlux,calibFluxErr,magCalib,magCalibErr,separation_c26202
32,2025071100810,99,g,764447.4,1939.002634,16.691631,0.002754,0.637541
124,2025071100810,99,g,764447.4,1939.002634,16.691631,0.002754,0.637541
33,2025071200713,92,r,1032365.0,1539.219313,16.365417,0.001619,0.622717
34,2025071200714,92,r,1032831.0,1518.033854,16.364927,0.001596,0.613588
35,2025071200735,92,z,1156340.0,2264.23902,16.242287,0.002126,0.611381
36,2025071200736,92,z,1155112.0,2274.770236,16.24344,0.002138,0.617362
37,2025071200737,92,z,1155694.0,2271.487041,16.242893,0.002134,0.595738
38,2025071200738,92,z,1150198.0,2289.503588,16.248069,0.002161,0.632586
39,2025071200739,92,z,1151946.0,2231.485982,16.24642,0.002103,0.616421
52,2025071700593,95,g,756507.9,949.659169,16.702966,0.001363,0.572067


In [19]:
print("""Number of rows:  %d""" % (len(best_df['visit'])))

Number of rows:  132


In [20]:
# Reset pandas to its default maximum rows to print to screen
pd.reset_option("display.max_rows")

### 3.4 Match pandas dataframe with observed ComCam magnitudes (best_df) with pandas dataframe with the synthetic magnitudes (df_mags)

In [21]:
# Reset the index to turn the keys into a column
df_mags_reset = df_mags.reset_index()

# Merge the dataframes based on the filter name
combined_df = pd.merge(best_df, df_mags_reset, left_on='band', right_on='index')

combined_df

Unnamed: 0,coord_ra,coord_dec,parentSourceId,x,y,xErr,yErr,ra,dec,raErr,...,detector,band,physical_filter,sourceId,separation_c26202,magCalib,magCalibErr,index,stiswfcnic_007,mod_008
0,53.137020,-27.863444,0,3120.952901,898.311685,0.009363,0.008345,53.137020,-27.863444,4.472117e-07,...,44,z,z_20,25187647288770763,0.584583,16.244934,0.002305,z,16.243679,16.243690
1,53.137037,-27.863427,0,3522.051588,3813.636201,0.010376,0.010276,53.137037,-27.863427,5.073094e-07,...,49,z,z_20,25187646217650971,0.653691,16.235560,0.002224,z,16.243679,16.243690
2,53.137036,-27.863449,25187646755569994,3339.724660,1490.556760,0.008722,0.009695,53.137036,-27.863449,4.483496e-07,...,51,z,z_20,25187646755570722,0.625556,16.248106,0.002292,z,16.243679,16.243690
3,53.137038,-27.863425,0,1189.991493,2534.287422,0.007879,0.006267,53.137038,-27.863425,3.794090e-07,...,51,z,z_20,25187648097747592,0.662223,16.245429,0.002138,z,16.243679,16.243690
4,53.137025,-27.863437,0,1057.200159,2894.721267,0.004048,0.004155,53.137025,-27.863437,1.992035e-07,...,52,i,i_39,25139293208970777,0.607883,16.246347,0.000983,i,16.260196,16.259542
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,53.137030,-27.863437,25082104534008611,3994.252396,1598.965476,0.002811,0.002851,53.137030,-27.863437,1.393186e-07,...,95,z,z_20,25082104534010052,0.620700,16.239222,0.001657,z,16.243679,16.243690
128,53.137014,-27.863434,25139291087700181,2406.591463,512.423327,0.005698,0.005295,53.137014,-27.863434,2.644275e-07,...,102,g,g_6,25139291087701777,0.577109,16.699827,0.000971,g,16.691931,16.692687
129,53.137020,-27.863438,0,2275.111407,2894.639701,0.005020,0.005116,53.137020,-27.863438,2.505856e-07,...,102,g,g_6,25139291221918973,0.590262,16.695918,0.000971,g,16.691931,16.692687
130,53.137020,-27.863446,25139291356136334,2111.403677,2057.107811,0.005123,0.006071,53.137020,-27.863446,2.906888e-07,...,102,g,g_6,25139291356137586,0.581639,16.699686,0.000983,g,16.691931,16.692687


In [22]:
print(df_mags)

   stiswfcnic_007    mod_008
u       17.572800  17.586964
g       16.691931  16.692687
r       16.362017  16.361654
i       16.260196  16.259542
z       16.243679  16.243690
y       16.238847  16.238887


In [23]:
# Group by the 'band' column and calculate the median of 'magCalib' for each group
median_values = combined_df.groupby('band')['magCalib'].median().reset_index()
median_values = median_values.rename(columns={'magCalib': 'median_magCalib'})

# Merge the median values back into the combined_df dataframe
combined_df = pd.merge(combined_df, median_values, on='band', how='left')
combined_df

Unnamed: 0,coord_ra,coord_dec,parentSourceId,x,y,xErr,yErr,ra,dec,raErr,...,band,physical_filter,sourceId,separation_c26202,magCalib,magCalibErr,index,stiswfcnic_007,mod_008,median_magCalib
0,53.137020,-27.863444,0,3120.952901,898.311685,0.009363,0.008345,53.137020,-27.863444,4.472117e-07,...,z,z_20,25187647288770763,0.584583,16.244934,0.002305,z,16.243679,16.243690,16.243454
1,53.137037,-27.863427,0,3522.051588,3813.636201,0.010376,0.010276,53.137037,-27.863427,5.073094e-07,...,z,z_20,25187646217650971,0.653691,16.235560,0.002224,z,16.243679,16.243690,16.243454
2,53.137036,-27.863449,25187646755569994,3339.724660,1490.556760,0.008722,0.009695,53.137036,-27.863449,4.483496e-07,...,z,z_20,25187646755570722,0.625556,16.248106,0.002292,z,16.243679,16.243690,16.243454
3,53.137038,-27.863425,0,1189.991493,2534.287422,0.007879,0.006267,53.137038,-27.863425,3.794090e-07,...,z,z_20,25187648097747592,0.662223,16.245429,0.002138,z,16.243679,16.243690,16.243454
4,53.137025,-27.863437,0,1057.200159,2894.721267,0.004048,0.004155,53.137025,-27.863437,1.992035e-07,...,i,i_39,25139293208970777,0.607883,16.246347,0.000983,i,16.260196,16.259542,16.242019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,53.137030,-27.863437,25082104534008611,3994.252396,1598.965476,0.002811,0.002851,53.137030,-27.863437,1.393186e-07,...,z,z_20,25082104534010052,0.620700,16.239222,0.001657,z,16.243679,16.243690,16.243454
128,53.137014,-27.863434,25139291087700181,2406.591463,512.423327,0.005698,0.005295,53.137014,-27.863434,2.644275e-07,...,g,g_6,25139291087701777,0.577109,16.699827,0.000971,g,16.691931,16.692687,16.699827
129,53.137020,-27.863438,0,2275.111407,2894.639701,0.005020,0.005116,53.137020,-27.863438,2.505856e-07,...,g,g_6,25139291221918973,0.590262,16.695918,0.000971,g,16.691931,16.692687,16.699827
130,53.137020,-27.863446,25139291356136334,2111.403677,2057.107811,0.005123,0.006071,53.137020,-27.863446,2.906888e-07,...,g,g_6,25139291356137586,0.581639,16.699686,0.000983,g,16.691931,16.692687,16.699827


In [24]:
# Calculate the number of rows for each filter band
row_counts = combined_df.groupby('band').size().reset_index(name='n_total')

# Merge the row counts back into the combined_df dataframe
combined_df = pd.merge(combined_df, row_counts, on='band', how='left')

combined_df

Unnamed: 0,coord_ra,coord_dec,parentSourceId,x,y,xErr,yErr,ra,dec,raErr,...,physical_filter,sourceId,separation_c26202,magCalib,magCalibErr,index,stiswfcnic_007,mod_008,median_magCalib,n_total
0,53.137020,-27.863444,0,3120.952901,898.311685,0.009363,0.008345,53.137020,-27.863444,4.472117e-07,...,z_20,25187647288770763,0.584583,16.244934,0.002305,z,16.243679,16.243690,16.243454,40
1,53.137037,-27.863427,0,3522.051588,3813.636201,0.010376,0.010276,53.137037,-27.863427,5.073094e-07,...,z_20,25187646217650971,0.653691,16.235560,0.002224,z,16.243679,16.243690,16.243454,40
2,53.137036,-27.863449,25187646755569994,3339.724660,1490.556760,0.008722,0.009695,53.137036,-27.863449,4.483496e-07,...,z_20,25187646755570722,0.625556,16.248106,0.002292,z,16.243679,16.243690,16.243454,40
3,53.137038,-27.863425,0,1189.991493,2534.287422,0.007879,0.006267,53.137038,-27.863425,3.794090e-07,...,z_20,25187648097747592,0.662223,16.245429,0.002138,z,16.243679,16.243690,16.243454,40
4,53.137025,-27.863437,0,1057.200159,2894.721267,0.004048,0.004155,53.137025,-27.863437,1.992035e-07,...,i_39,25139293208970777,0.607883,16.246347,0.000983,i,16.260196,16.259542,16.242019,36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,53.137030,-27.863437,25082104534008611,3994.252396,1598.965476,0.002811,0.002851,53.137030,-27.863437,1.393186e-07,...,z_20,25082104534010052,0.620700,16.239222,0.001657,z,16.243679,16.243690,16.243454,40
128,53.137014,-27.863434,25139291087700181,2406.591463,512.423327,0.005698,0.005295,53.137014,-27.863434,2.644275e-07,...,g_6,25139291087701777,0.577109,16.699827,0.000971,g,16.691931,16.692687,16.699827,25
129,53.137020,-27.863438,0,2275.111407,2894.639701,0.005020,0.005116,53.137020,-27.863438,2.505856e-07,...,g_6,25139291221918973,0.590262,16.695918,0.000971,g,16.691931,16.692687,16.699827,25
130,53.137020,-27.863446,25139291356136334,2111.403677,2057.107811,0.005123,0.006071,53.137020,-27.863446,2.906888e-07,...,g_6,25139291356137586,0.581639,16.699686,0.000983,g,16.691931,16.692687,16.699827,25


**calculate the median then combine the tables for stis and mod**


In [25]:
# Calculate the number of rows for each filter band
row_counts = best_df.groupby('band').size().reset_index(name='n_band')

# Merge the row counts back into the combined_df dataframe
combined_df = pd.merge(best_df, row_counts, on='band', how='left')

combined_df

Unnamed: 0,coord_ra,coord_dec,parentSourceId,x,y,xErr,yErr,ra,dec,raErr,...,sky_source,visit,detector,band,physical_filter,sourceId,separation_c26202,magCalib,magCalibErr,n_band
0,53.137020,-27.863444,0,3120.952901,898.311685,0.009363,0.008345,53.137020,-27.863444,4.472117e-07,...,False,2025090600260,44,z,z_20,25187647288770763,0.584583,16.244934,0.002305,40
1,53.137037,-27.863427,0,3522.051588,3813.636201,0.010376,0.010276,53.137037,-27.863427,5.073094e-07,...,False,2025090600252,49,z,z_20,25187646217650971,0.653691,16.235560,0.002224,40
2,53.137036,-27.863449,25187646755569994,3339.724660,1490.556760,0.008722,0.009695,53.137036,-27.863449,4.483496e-07,...,False,2025090600256,51,z,z_20,25187646755570722,0.625556,16.248106,0.002292,40
3,53.137038,-27.863425,0,1189.991493,2534.287422,0.007879,0.006267,53.137038,-27.863425,3.794090e-07,...,False,2025090600266,51,z,z_20,25187648097747592,0.662223,16.245429,0.002138,40
4,53.137025,-27.863437,0,1057.200159,2894.721267,0.004048,0.004155,53.137025,-27.863437,1.992035e-07,...,False,2025082600442,52,i,i_39,25139293208970777,0.607883,16.246347,0.000983,36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,53.137030,-27.863437,25082104534008611,3994.252396,1598.965476,0.002811,0.002851,53.137030,-27.863437,1.393186e-07,...,False,2025081300337,95,z,z_20,25082104534010052,0.620700,16.239222,0.001657,40
128,53.137014,-27.863434,25139291087700181,2406.591463,512.423327,0.005698,0.005295,53.137014,-27.863434,2.644275e-07,...,False,2025082600426,102,g,g_6,25139291087701777,0.577109,16.699827,0.000971,25
129,53.137020,-27.863438,0,2275.111407,2894.639701,0.005020,0.005116,53.137020,-27.863438,2.505856e-07,...,False,2025082600427,102,g,g_6,25139291221918973,0.590262,16.695918,0.000971,25
130,53.137020,-27.863446,25139291356136334,2111.403677,2057.107811,0.005123,0.006071,53.137020,-27.863446,2.906888e-07,...,False,2025082600428,102,g,g_6,25139291356137586,0.581639,16.699686,0.000983,25


In [26]:
# Group by the 'n_band' column and calculate the counts of 'band' for each group
count_df = best_df.groupby('band')['magCalib'].count().reset_index()

# Rename the columns for clarity
count_df = count_df.rename(columns={'magCalib': 'n_band'})

count_df

Unnamed: 0,band,n_band
0,g,25
1,i,36
2,r,31
3,z,40


In [27]:
# Group by the 'band' column and calculate the median of 'magCalib' for each group
median_df = best_df.groupby('band')['magCalib'].median().reset_index()

# Rename the columns for clarity
median_df = median_df.rename(columns={'magCalib': 'median_magCalib'})

median_df

Unnamed: 0,band,median_magCalib
0,g,16.699827
1,i,16.242019
2,r,16.362526
3,z,16.243454


In [28]:
# Merge the dataframes based on the filter name
combined_df = pd.merge(count_df, median_df, left_on='band', right_on='band')

combined_df

Unnamed: 0,band,n_band,median_magCalib
0,g,25,16.699827
1,i,36,16.242019
2,r,31,16.362526
3,z,40,16.243454


In [29]:
# Reset the index to turn the keys into a column
df_mags_reset = df_mags.reset_index()

# Merge the dataframes based on the filter name
combined_df = pd.merge(combined_df, df_mags_reset, left_on='band', right_on='index')

combined_df

Unnamed: 0,band,n_band,median_magCalib,index,stiswfcnic_007,mod_008
0,g,25,16.699827,g,16.691931,16.692687
1,i,36,16.242019,i,16.260196,16.259542
2,r,31,16.362526,r,16.362017,16.361654
3,z,40,16.243454,z,16.243679,16.24369


In [30]:
# Calculate the differences and add the new columns
combined_df['offset_stis'] = combined_df['median_magCalib'] - combined_df['stiswfcnic_007']
combined_df['offset_mod'] = combined_df['median_magCalib'] - combined_df['mod_008']

combined_df

Unnamed: 0,band,n_band,median_magCalib,index,stiswfcnic_007,mod_008,offset_stis,offset_mod
0,g,25,16.699827,g,16.691931,16.692687,0.007896,0.00714
1,i,36,16.242019,i,16.260196,16.259542,-0.018176,-0.017523
2,r,31,16.362526,r,16.362017,16.361654,0.000509,0.000872
3,z,40,16.243454,z,16.243679,16.24369,-0.000224,-0.000235


In [31]:
print(combined_df)

  band  n_band  median_magCalib index  stiswfcnic_007    mod_008  offset_stis  \
0    g      25        16.699827     g       16.691931  16.692687     0.007896   
1    i      36        16.242019     i       16.260196  16.259542    -0.018176   
2    r      31        16.362526     r       16.362017  16.361654     0.000509   
3    z      40        16.243454     z       16.243679  16.243690    -0.000224   

   offset_mod  
0    0.007140  
1   -0.017523  
2    0.000872  
3   -0.000235  


In [32]:
# Define the desired order of 'band'
order = ['u', 'g', 'r', 'i', 'z', 'y']

# Remove the 'index' column
combined_df = combined_df.drop(columns=['index'])

# Reorder the dataframe based on the 'band' column
combined_df['band'] = pd.Categorical(combined_df['band'], categories=order, ordered=True)
combined_df = combined_df.sort_values('band').reset_index(drop=True)

combined_df

Unnamed: 0,band,n_band,median_magCalib,stiswfcnic_007,mod_008,offset_stis,offset_mod
0,g,25,16.699827,16.691931,16.692687,0.007896,0.00714
1,r,31,16.362526,16.362017,16.361654,0.000509,0.000872
2,i,36,16.242019,16.260196,16.259542,-0.018176,-0.017523
3,z,40,16.243454,16.243679,16.24369,-0.000224,-0.000235


In [33]:
raise StopExecution

## 5. Sandbox

the code below is to loop over each of the calspec stars - NOT TESTED YET