# Transformation Equations:  ComCam <--> PanSTARRS-1 DR2

_Meagan N. Porter/Douglas L. Tucker_

_2025.6.3_

## Initial Setup

In [None]:
import numpy as np
import pandas as pd

import os
import sys
import glob
import math
import datetime

from collections import OrderedDict as odict

from astropy.io import fits
from astropy.table import Table

import fitsio

from scipy import interpolate
from scipy.optimize import leastsq

import healpy as hp

import plotly
from plotly.offline import download_plotlyjs, plot, iplot
import plotly.graph_objs as go

import matplotlib.pyplot as plt

%matplotlib inline

## Input Parameters

In [None]:
# List of bands on which to run the transform fit...
bandList = ['g']
#bandList = ['g', 'r', 'i', 'z', 'y']
    
# ComCam-->PS1 (toComCam=False) or PS1-->ComCam (toComCam=True)...
#toComCam = True
toComCam = False


# Color to fit against...
color_name_1 = 'gi'

# Name of color_name_1 as the label in the QA plots...
colorLabel_1 = '(g-i)'


if toComCam:
    # If PS1-->ComCam, then the color to fit against and its plot label are PS1...
    color_name_1 = """%s_ps1""" % (color_name_1)
    colorLabel_1 = """$%s_{ps1}$""" % colorLabel_1    
else:
    # If ComCam-->PS1, then the color to fit against and its plot label are ComCam...
    color_name_1 = """%s_ComCam""" % (color_name_1)
    colorLabel_1 = """$%s_{ComCam}$""" % colorLabel_1    


# Order of polynomial fits...
norder = 1

# Sigma-clipping parameters...
nsigma = 3.0
niter = 3

# Identify directory containing the match file...
matchDir = '/home/d/dltucker/DATA'

# Name of the match file in matchDir
matchFile = 'match_ComCam_PS1DR2_CDFS.fits'

# Base name of fit results output file...
if toComCam:
    resultsFileBaseName = 'transFit.ps1_to_ComCam'
else:
    resultsFileBaseName = 'transFit.ComCam_to_ps1'

# Base name of QA plot output files...
if toComCam:
    qaFileBaseName = 'qaPlot.ps1_to_ComCam.fit'
else:
    qaFileBaseName = 'qaPlot.ComCam_to_ps1.fit'

# Verbosity level (0, 1, 2, 3, ...)
verbose = 2

## Define Some Modules

In [None]:
# Kudos to Claude-3.5-Sonnet for improving on old outlier rejection code...

def poly_fit_with_sigma_clip(x, y, degree=1, sigma=3.0, maxiters=5):
    """
    Perform polynomial fit with iterative sigma clipping
    
    Parameters:
    -----------
    x : array-like
        Independent variable
    y : array-like 
        Dependent variable
    degree : int
        Degree of polynomial fit
    sigma : float
        Sigma clipping threshold
    maxiters : int
        Maximum number of sigma clipping iterations
        
    Returns:
    --------
    coeffs : array
        Polynomial coefficients
    mask : array
        Boolean mask indicating non-clipped points
    rms : float
        RMS of residuals
    """

    # Import relevant modules
    import numpy as np
    from astropy.stats import sigma_clip
    
    # Initial fit using all points
    x = np.asarray(x)
    y = np.asarray(y)
    mask = np.ones_like(x, dtype=bool)
    
    for _ in range(maxiters):
        # Fit polynomial to non-masked points
        coeffs = np.polyfit(x[mask], y[mask], degree)
        
        # Calculate residuals
        yfit = np.polyval(coeffs, x)
        residuals = y - yfit
        
        # Update mask with sigma clipping
        new_mask = ~sigma_clip(residuals, sigma=sigma).mask
        
        # Check for convergence
        if np.array_equal(mask, new_mask):
            break
            
        mask = new_mask
    
    # Calculate final RMS
    final_residuals = y[mask] - np.polyval(coeffs, x[mask])
    rms = np.sqrt(np.mean(final_residuals**2))
    
    return coeffs, mask, rms

In [None]:
##################################
#
# Define some functions for fitting dmag vs. color...
#
# These functions are based on a scripts found at 
# http://linuxgazette.net/115/andreasen.html (by Anders Andreasen)
# and at
# http://www.phy.uct.ac.za/courses/python/examples/fitresonance.py (University of Cape Town)


##################################

# Parametric function:  
#  p is the parameter vector; 
#  For fp1, we assume a polynomial function in one color...
def fp1(p,color1_array):
    #retValue = p[0] + p[1]*color1_array + p[2]*color1_array*color1_array
    norder = p.size-1
    retValue = p[0]
    for i in range(norder):
        retValue = retValue + p[i+1]*color1_array**(i+1)
    return retValue


##################################

# Error function:
def residuals1(p,color1_array,dmag_array):
    err = (dmag_array-fp1(p,color1_array))
    return err


##################################

# Fitting code:
def transformFit1(color1_array, dmag_array, norder=2, verbose=0):

    # Calculate the median of dmag for use as an initial guess
    # for the overall zeropoint offset..
    mdn = np.median( dmag_array, None )

    # Parameter names
    #pname = (['c_0', 'c_1', 'c_2'])
    pname = []
    for i in range(0,norder+1):
        pname.append("""c_%d""" % i)

    # Initial parameter values
    #p0 = [mdn, 0.0, 0.0]
    p0 = (1+norder)*[0.0]
    p0[0] = mdn

    if verbose > 0:
        print() 
        print( 'Initial parameter values:  ', p0)

    # Perform fit

    p,cov,infodict,mesg,ier = leastsq(residuals1, p0, 
                                      args=(color1_array, dmag_array), 
                                      maxfev=10000, full_output=1)

    if ( ier>=1 and ier <=4):
        if verbose > 0:  print("Converged")
    else:
        # Add an exception error or a non-zero return value?
        print("Not converged")
        print(mesg)


    # Calculate some descriptors of the fit 
    # (similar to the output from gnuplot 2d fits)

    chisq=sum(infodict['fvec']*infodict['fvec'])
    dof=len(dmag_array)-len(p)
    rms=math.sqrt(chisq/dof)
    
    if verbose > 0:
        print("Converged with chi squared ",chisq)
        print("degrees of freedom, dof ", dof)
        print("RMS of residuals (i.e. sqrt(chisq/dof)) ", rms)
        print("Reduced chisq (i.e. variance of residuals) ", chisq/dof)
        print()


    # uncertainties are calculated as per gnuplot, "fixing" the result
    # for non unit values of the reduced chisq.
    # values at min match gnuplot
    perr = []
    if verbose > 0:  
        print("Fitted parameters at minimum, with 68% C.I.:")
    for i,pmin in enumerate(p):
        if verbose > 0:  
            print("%-10s %13g +/- %13g   (%5f percent)" % (pname[i],pmin,math.sqrt(cov[i,i])*math.sqrt(chisq/dof),
                                                           100.*math.sqrt(cov[i,i])*math.sqrt(chisq/dof)/abs(pmin)))
        perr.append(math.sqrt(cov[i,i])*math.sqrt(chisq/dof))

    if verbose > 0: print()

    if verbose > 0:
        print( "Correlation matrix:")
        # correlation matrix close to gnuplot
        print( "               ",)
        for i in range(len(pname)): print( "%-10s" % (pname[i],),)
        print()
        for i in range(len(p)):
            print( "%-10s" % pname[i],)
            for j in range(i+1):
                print( "%10f" % (cov[i,j]/math.sqrt(cov[i,i]*cov[j,j]),),)
            #endfor
            print()
        #endfor
        print()
        print()
        print()
    
    return p, perr, rms


In [None]:
##################################

def createFitResultsOutputLine(norder, p, perr, rms, dmag_name, color1_name, color2_name=''):

    outputList = (2*(2*norder+1)+4)*[-9999.]
    outputList[0] = dmag_name
    outputList[1] = color1_name
    outputList[2] = color2_name
    for j in range(p.size):
        outputList[2*j+3] = p[j]
        outputList[2*j+4] = perr[j]
    outputList[2*(2*norder+1)+3] = rms
    outputLine = ','.join(map(str, outputList))
    return outputLine


##################################

def createFitResultsHeaderOutputLine(norder):

    outputList = (2*(2*norder+1)+4)*['c_']
    outputList[0] = 'dmag_name'
    outputList[1] = 'color1_name'
    outputList[2] = 'color2_name'
    for j in range(2*norder+1):
        outputList[2*j+3] = ("""c_%d""" % j)
        outputList[2*j+4] = ("""cerr_%d""" % j)
    outputList[2*(2*norder+1)+3] = 'rms'
    outputLine = ','.join(map(str, outputList))
    return outputLine


##################################

def transform1ColorQAPlots1(dmag, color1, res, norder, title, dmagName, colorLabel1, p, rms, outputFileName):

    # Prepare QA plots...
    #fig = plt.figure(figsize=(10,5))
    #fig = plt.figure(figsize=(40,20))
    fig = plt.figure(figsize=(20,10))
    fig.subplots_adjust(hspace=0.3)
    #fig.suptitle("This is a supertitle!")
    plt.rcParams.update({'font.size': 24})

   # We will exclude the lowest and highets 0.01% of color1, color2, 
    #  dmag, and residuals when plotting the QA figures...
    color1_desc = color1.describe(percentiles=[0.0001, 0.001, 0.01, 0.99, 0.999, 0.9999])
    dmag_desc = dmag.describe(percentiles=[0.0001, 0.001, 0.01, 0.99, 0.999, 0.9999])
    #res_desc = df.res.describe(percentiles=[0.0001, 0.001, 0.01, 0.99, 0.999, 0.9999])
    res_desc = res.describe(percentiles=[0.0001, 0.001, 0.01, 0.99, 0.999, 0.9999])
    #color1_min = color1_desc['1%']
    #color1_max = color1_desc['99%']
    #color1_min = color1_desc['min']
    #color1_max = color1_desc['max']
    #dmag_min = dmag_desc['1%']
    #dmag_max = dmag_desc['99%']
    #res_min = res_desc['1%']
    #res_max = res_desc['99%']
    color1_min = color1_desc['0.01%']
    color1_max = color1_desc['99.99%']
    dmag_min = dmag_desc['0.01%']
    dmag_max = dmag_desc['99.99%']
    res_min = res_desc['0.01%']
    res_max = res_desc['99.99%']

    # Plot 1:  Descriptive text...
    #plt.subplot(231)
    plt.subplot(221)
    if norder == 1:
        plot1Text = """%s \n\n%s = \n %.3f + \n %.3f*%s \n\n [rms: %.3f] \n\n [%.1f < %s < %.1f]""" % \
            (title, dmagName, p[0], p[1], colorLabel1, rms, color1_desc['min'], colorLabel1, color1_desc['max'])
    elif norder == 2:
        plot1Text = """%s \n\n%s = \n %.3f + \n %.3f*%s + \n %.3f*%s^2 \n\n [rms: %.3f] \n\n [%.1f < %s < %.1f]""" % \
            (title, dmagName, p[0], p[1], colorLabel1, p[2], colorLabel1, rms, color1_desc['min'], colorLabel1, color1_desc['max'])
    else:
        plot1Text = ''
    #plt.text(0.1,0.25,plot1Text)
    plt.text(0.1,0.00,plot1Text)
    plt.axis('off')

    
    # Plot 2:  2D hexbin histogram of dmag vs. color1...
    #plt.subplot(232) 
    plt.subplot(222)
    hb=plt.hexbin(color1, dmag, gridsize=100, bins='log', cmap='inferno')
    plt.axis([color1_min, color1_max, dmag_min, dmag_max])
    plt.xlabel(colorLabel1)
    plt.ylabel(dmagName)
    cb = fig.colorbar(hb)
    #cb.set_label('Number')
    cb.set_label('log10(N)')
    plt.grid(color='white')
    plt.grid(True)


    # Plot 3:  1d histogram of residuals...
    #plt.subplot(234) 
    plt.subplot(223) 
    #plt.hist(df.loc[:,'res'],bins=100)
    plt.hist(res,bins=100)
    plt.xlabel('residuals [mag]')
    plt.ylabel('Number')
    plt.grid(True)
    plt.grid(color='black')

    
    # Plot 4:  2d hexbin histogram of residuals vs. color1...
    #plt.subplot(235) 
    plt.subplot(224) 
    #hb = plt.hexbin(color1, df.loc[:,'res'], gridsize=100, cmap='inferno')
    hb = plt.hexbin(color1, res, gridsize=100, bins='log', cmap='inferno')
    plt.axis([color1_min, color1_max, res_min, res_max])
    plt.xlabel(colorLabel1)
    plt.ylabel('residuals [mag]')
    cb = plt.colorbar(hb)
    #cb.set_label('Number')
    cb.set_label('log10(N)')
    plt.grid(True)
    plt.grid(color='white')


    # Plot...
    plt.tight_layout()
    #plt.show()
    plt.savefig(outputFileName)

    return 0


##################################


## Read in Matched Catalog

In [None]:
matchFile = os.path.join(matchDir,matchFile)
print( matchFile)

In [None]:
# Check to make sure matchFile exists...
if os.path.isfile(matchFile)==False:
    print( """ERROR:  matchFile %s does not exist...""" % (matchFile))
if verbose > 0:
    print( 'matchFile: ', matchFile)


In [None]:
tab = Table.read(matchFile, format='fits')
tab

In [None]:
df = tab.to_pandas()
df

In [None]:
#df['u_psfMag'] = -2.5*np.log10(df['u_psfFlux']) + 31.4
df['g_psfMag'] = -2.5*np.log10(df['g_psfFlux']) + 31.4
df['r_psfMag'] = -2.5*np.log10(df['r_psfFlux']) + 31.4
df['i_psfMag'] = -2.5*np.log10(df['i_psfFlux']) + 31.4
df['z_psfMag'] = -2.5*np.log10(df['z_psfFlux']) + 31.4
df['y_psfMag'] = -2.5*np.log10(df['y_psfFlux']) + 31.4

In [None]:
#df['u_psfMagErr'] = 1.086*df['u_psfFluxErr']/df['u_psfFlux']
df['g_psfMagErr'] = 1.086*df['g_psfFluxErr']/df['g_psfFlux']
df['r_psfMagErr'] = 1.086*df['r_psfFluxErr']/df['r_psfFlux']
df['i_psfMagErr'] = 1.086*df['i_psfFluxErr']/df['i_psfFlux']
df['z_psfMagErr'] = 1.086*df['z_psfFluxErr']/df['z_psfFlux']
df['y_psfMagErr'] = 1.086*df['y_psfFluxErr']/df['y_psfFlux']

In [None]:
for col in df.columns:
    print(col)

In [None]:
# Rename columns...
df.rename(columns={'coord_ra':'RA_ComCam',
                   'coord_dec':'DEC_ComCam',
                   'g_psfMag':'g_ComCam',
                   'r_psfMag':'r_ComCam',
                   'i_psfMag':'i_ComCam',
                   'z_psfMag':'z_ComCam',
                   'y_psfMag':'y_ComCam',
                   'g_psfMagErr':'g_err_ComCam',
                   'r_psfMagErr':'r_err_ComCam',
                   'i_psfMagErr':'i_err_ComCam',
                   'z_psfMagErr':'z_err_ComCam',
                   'y_psfMagErr':'y_err_ComCam',
                   'gMeanPSFMag':'g_ps1',
                   'rMeanPSFMag':'r_ps1',
                   'iMeanPSFMag':'i_ps1',
                   'zMeanPSFMag':'z_ps1',
                   'yMeanPSFMag':'y_ps1',
                   'gMeanPSFMagErr':'g_err_ps1',
                   'rMeanPSFMagErr':'r_err_ps1',
                   'iMeanPSFMagErr':'i_err_ps1',
                   'zMeanPSFMagErr':'z_err_ps1',
                   'yMeanPSFMagErr':'y_err_ps1'
                  },inplace=True)

df.head(5)

## Add Columns to Matched Catalog Data Frame

In [None]:
# Add color columns...
df.loc[:,'gr_ComCam'] = df.loc[:,'g_ComCam'] - df.loc[:,'r_ComCam']
df.loc[:,'ri_ComCam'] = df.loc[:,'r_ComCam'] - df.loc[:,'i_ComCam']
df.loc[:,'iz_ComCam'] = df.loc[:,'i_ComCam'] - df.loc[:,'z_ComCam']
df.loc[:,'zy_ComCam'] = df.loc[:,'z_ComCam'] - df.loc[:,'y_ComCam']
df.loc[:,'gi_ComCam'] = df.loc[:,'g_ComCam'] - df.loc[:,'i_ComCam']

df.loc[:,'gr_ps1'] = df.loc[:,'g_ps1'] - df.loc[:,'r_ps1']
df.loc[:,'ri_ps1'] = df.loc[:,'r_ps1'] - df.loc[:,'i_ps1']
df.loc[:,'iz_ps1'] = df.loc[:,'i_ps1'] - df.loc[:,'z_ps1']
df.loc[:,'zy_ps1'] = df.loc[:,'z_ps1'] - df.loc[:,'y_ps1']
df.loc[:,'gi_ps1'] = df.loc[:,'g_ps1'] - df.loc[:,'i_ps1']


In [None]:
# Insert dmag column...
df.loc[:,'dmag'] = -9999.

## Create Initial Mask

In [None]:
mask = ( ( df[color_name_1] > -1. ) & ( df[color_name_1] < 4.0 ) )

## Make Backup Copies of Initial Mask and Original Data Frame

In [None]:
# Make a backup copy of original df...
df_orig = df.copy()

# Make a backup copy of original mask...
mask_orig = mask.copy()

In [None]:
XXXX

In [None]:
# Create results output file...
resultsFile = """%s.dmag.%s.norder%d.csv""" % (resultsFileBaseName, color_name_1, norder)

# Open fit results output file...
try:
    fout = open(resultsFile, 'w')
except IOError:
    sys.exit('Unable to write to file ' + resultsFile)

# Write header to fit results output file...
hdr = createFitResultsHeaderOutputLine(norder)
fout.write(hdr+'\n')

for band in bandList:
    
    print() 
    print() 
    print() 
    print( "# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ")
    print( band)
    print( "# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ")
    print() 
    
    magName_ComCam = """%s_ComCam""" % (band)
    magErrName_ComCam = """%s_err_ComCam""" % (band)
    magName_ps1 = """%s_ps1""" % (band)
    magErrName_ps1 = """%s_err_ps1""" % (band)
    
    # Grab the original version of df from the backup copy...
    df = df_orig.copy()

    # Grab the original version of mask from the backup copy...
    mask = mask_orig.copy()

    # Update dmag column for {$band}_des - {$band}_ComCam...
    if toComCam:
        df.loc[:,'dmag'] = df.loc[:,magName_ComCam] - df.loc[:,magName_ps1]
    else:
        df.loc[:,'dmag'] = df.loc[:,magName_ps1] - df.loc[:,magName_ComCam]
        
    # Update mask...
    mask1 = abs(df['dmag']) <= 10.
    mask2 = abs(df[magErrName_ComCam]) <= 0.02
    mask3 = abs(df[magErrName_ps1]) <= 0.01
    mask = mask & mask1 & mask2 & mask3

    # Iterate, with sigma-clipping...
    for i in range(niter):

        iiter = i + 1
        if verbose > 0:
            print( """   iter%d...""" % ( iiter ))

        # make a copy of original df, overwriting the old one...
        df = df[mask].copy()

        # Identify dmag and color1 series...
        dmag =  df.loc[:,'dmag']
        color1 = df.loc[:,color_name_1]

        print(color1)
        print(dmag)
        
        # Perform fit...
        p,perr,rms = transformFit1(color1, dmag, norder, verbose)
        df.loc[:,'res'] = residuals1(p, color1, dmag)

        # Identify outliers...|
        stddev = df['res'].std()
        mask = (np.abs(df.res)< nsigma*stddev)


    # Output results to the results file...
    outputLine = createFitResultsOutputLine(2, p, perr, rms, band, color_name_1)
    fout.write(outputLine+'\n')
    
    # Create title/names for use in QA plots...
    if toComCam:
        title = """$%s_{ps1}$ --> $%s_{ComCam}$""" % (band, band)
        dmagName = """$%s_{ComCam} - %s_{ps1}$""" % (band, band)
    else:
        title = """$%s_{ComCam}$ --> $%s_{ps1}$""" % (band, band)
        dmagName = """$%s_{ps1} - %s_{ComCam}$""" % (band, band)
    
    # Create QA plots...
    res =  df.loc[:,'res']
    outputFileName = """%s.dmag_%s.%s.norder%d.qa1.png""" % (qaFileBaseName, band, color_name_1, norder)
    status = transform1ColorQAPlots1(dmag, color1, res, norder, title, dmagName, colorLabel_1, p, rms, outputFileName)

fout.close()

In [None]:
# Example with plotting
import matplotlib.pyplot as plt

band = 'g'

magName_ComCam = """%s_ComCam""" % (band)
magErrName_ComCam = """%s_err_ComCam""" % (band)
magName_ps1 = """%s_ps1""" % (band)
magErrName_ps1 = """%s_err_ps1""" % (band)
    
# Grab the original version of df from the backup copy...
df = df_orig.copy()

# Grab the original version of mask from the backup copy...
mask = mask_orig.copy()

# Update dmag column for {$band}_des - {$band}_ComCam...
if toComCam:
    df.loc[:,'dmag'] = df.loc[:,magName_ComCam] - df.loc[:,magName_ps1]
else:
    df.loc[:,'dmag'] = df.loc[:,magName_ps1] - df.loc[:,magName_ComCam]
        
# Update mask...
mask1 = abs(df['dmag']) <= 10.
mask2 = abs(df[magErrName_ComCam]) <= 0.02
mask3 = abs(df[magErrName_ps1]) <= 0.01
mask = mask & mask1 & mask2 & mask3

# make a copy of original df, overwriting the old one...
df = df[mask].copy()

# Identify dmag and color1 series...
dmag =  df.loc[:,'dmag']
color1 = df.loc[:,color_name_1]


color1_array = color1
dmag_array = dmag

# Perform the fit
coeffs, mask, rms = poly_fit_with_sigma_clip(color1_array, dmag_array, degree=1)

# Generate smooth curve for plotting
x_smooth = np.linspace(min(color1_array), max(color1_array), 100)
y_smooth = np.polyval(coeffs, x_smooth)

# Plot
plt.figure(figsize=(10, 6))
plt.scatter(color1_array[mask], dmag_array[mask], label='Used points')
plt.scatter(color1_array[~mask], dmag_array[~mask], color='red', label='Rejected points')
plt.plot(x_smooth, y_smooth, 'k-', label='Fit')
plt.xlabel('Color')
plt.ylabel('dmag')
plt.legend()
plt.title(f'Polynomial fit (RMS = {rms:.3f})')
plt.show()

# Print coefficients
for i, c in enumerate(coeffs):
    print(f'c_{len(coeffs)-i-1} = {c:.6f}')