# Transformation Equations:  LSSTCam <--> SDSS DR18

_Meagan N. Porter, Douglas L. Tucker, Christina L. Adair_

_2025.08.05_

## 1. Import Modules

In [None]:
import numpy as np
import pandas as pd

from lsst.daf.butler import Butler
import lsst.geom as geom

import pyvo

import os
import sys
import glob
import math
import datetime

from collections import OrderedDict as odict

from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord
import astropy.units as u

import fitsio

from scipy import interpolate
from scipy.optimize import leastsq

import healpy as hp

import plotly
from plotly.offline import download_plotlyjs, plot, iplot
import plotly.graph_objs as go

import matplotlib.pyplot as plt

%matplotlib inline

## 2. Input Parameters

In [None]:
# lsst-->SDSS DR18 (toLSST=False) or SDSS DR18-->LSST (toLSST=True)...
toLSST = True
#toLSST = False

if toLSST:
    # List of LSST bands on which to run the transform fit...
    #bandList = ['g_LSST', 'r_LSST', 'i_LSST', 'z_LSST', 'y_LSST']
    bandList = ['u_LSST', 'g_LSST', 'r_LSST', 'i_LSST', 'z_LSST', 'y_LSST', 'gi_LSST']
    # If SDSS-->LSST, then the mag and color to fit against are SDSS...
    # Dictionary of corresponding bands from the other survey...
    matchBand_dict = {'u_LSST':'u_sdss', 'g_LSST':'g_sdss', 'r_LSST':'r_sdss', 'i_LSST':'i_sdss', 'z_LSST':'z_sdss', 'y_LSST':'z_sdss', 'gi_LSST':'gi_sdss'}
    # Color to fit against...
    color_name_1_dict = {'u_LSST':'gi_sdss', 'g_LSST':'gi_sdss', 'r_LSST':'gi_sdss', 'i_LSST':'gi_sdss', 'z_LSST':'gi_sdss', 'y_LSST':'gi_sdss', 'gi_LSST':'gi_sdss'}
    # Name of color_name_1 as the label in the QA plots...
    colorLabel_1_dict = {'u_LSST':'$(g-i)_{sdss}$', 'g_LSST':'$(g-i)_{sdss}$', 'r_LSST':'$(g-i)_{sdss}$', 'i_LSST':'$(g-i)_{sdss}$', 'z_LSST':'$(g-i)_{sdss}$', 'y_LSST':'$(g-i)_{sdss}$', 'gi_LSST':'$(g-i)_{sdss}$'}
    # Color limits defining disjoint branches of the dmag vs. color plots
    #  (each branch will be fit separately)...
    color_limits_1_dict = {'u_LSST':[-10.,10.], 
                           'g_LSST':[-10.,10.], 
                           'r_LSST':[-10.,10.], 
                           'i_LSST':[-10.,10.],
                           'z_LSST':[-10.,10.],
                           'y_LSST':[-10.,10.],
                           'gi_LSST':[-10.,10.]
                          }
    
    
else:
    # List of SDSS bands on which to run the transform fit...
    bandList = ['u_sdss', 'g_sdss', 'r_sdss', 'i_sdss', 'z_sdss','gi_sdss']
    # If LSST-->SDSS, then the mag and color to fit against are LSST...
    # Dictionary of corresponding bands from the other survey...
    matchBand_dict = {'u_sdss':'u_LSST', 'g_sdss':'g_LSST', 'r_sdss':'r_LSST', 'i_sdss':'i_LSST', 'z_sdss':'z_LSST', 'gi_sdss':'gi_LSST'}
    # Color to fit against...
    color_name_1_dict = {'u_sdss':'gi_LSST', 'g_sdss':'gi_LSST', 'r_sdss':'gi_LSST', 'i_sdss':'gi_LSST', 'z_sdss':'gi_LSST', 'gi_sdss':'gi_LSST'}
    # Name of color_name_1 as the label in the QA plots...
    colorLabel_1_dict = {'u_sdss':'$(g-i)_{LSST}$', 'g_sdss':'$(g-i)_{LSST}$', 'r_sdss':'$(g-i)_{LSST}$', 'i_sdss':'$(g-i)_{LSST}$', 'z_sdss':'$(g-i)_{LSST}$', 'gi_sdss':'$(g-i)_{LSST}$'}
    # Color limits defining disjoint branches of the dmag vs. color plots
    #  (each branch will be fit separately)...    
    color_limits_1_dict = {'u_sdss':[-10.,10.], 
                           'g_sdss':[-10.,10.], 
                           'r_sdss':[-10.,10.], 
                           'i_sdss':[-10.,10.],
                           'z_sdss':[-10.,10.],
                           'gi_sdss':[-10., 10.]
                          }


# Order of polynomial fits...
norder = 1

# Sigma-clipping parameters...
nsigma = 3.0
niter = 3

# LSST data
collection='LSSTCam/runs/DRP/20250501_20250609/w_2025_26/DM-51580'
repo = '/repo/embargo'
skymap = 'lsst_cells_v1'
instrument = 'LSSTCam'

# Name of the SDSS file (if it exists)
sdssFile = '/home/d/dltucker/DATA/LSST_COSMOS_dtucker.csv'
    
#use match file?
useMatchFile = False

# Name of the match file
#matchFile = '/home/d/dltucker/DATA/match.lsst_stars_all.w_2025_10.DM-49359a.SDSS_DR18.csv'
#matchFile = '/home/d/dltucker/DATA/match.LSST_stars_all.DP1.SDSS_DR18.csv'
matchFile = '/home/d/dltucker/DATA/match.LSST_COSMOS.DM-51580.SDSS_DR18.csv'


# Base name of fit results output file...
#if toLSST:
#    resultsFileBaseName = 'transFit.PS1DR2_to_LSST'
#else:
#    resultsFileBaseName = 'transFit.LSST_to_PS1DR2'

# Base name of QA plot output files...
if toLSST:
    qaFileBaseName = 'qaPlot.SDSS_to_LSST.fit'
else:
    qaFileBaseName = 'qaPlot.LSST_to_SDSS.fit'

# Verbosity level (0, 1, 2, 3, ...)
verbose = 2


# COSMOS field
tract_list = [9813]
tract_dict={9813: 'COSMOS'}

## 3. Define Useful Functions

In [None]:
# Useful class to stop "Run All" at a cell 
#  containing the command "raise StopExecution"
class StopExecution(Exception):
    def _render_traceback_(self):
        pass

In [None]:
def cross_match_catalogs(df1, df2, ra_name_1, dec_name_1, ra_name_2, dec_name_2):

    # Create SkyCoord objects for both dataframes
    coords1 = SkyCoord(ra=df1[ra_name_1].values*u.degree, 
                       dec=df1[dec_name_1].values*u.degree)

    coords2 = SkyCoord(ra=df2[ra_name_2].values*u.degree, 
                       dec=df2[dec_name_2].values*u.degree)

    # Match coordinates
    max_sep = 3 * u.arcsec  # Maximum separation
    idx, d2d, d3d = coords1.match_to_catalog_sky(coords2)

    # Create mask for matches within max_sep
    mask = d2d < max_sep

    # Additional mask to ensure indices are valid
    valid_idx_mask = idx[mask] < len(df2)
    combined_mask = mask.copy()
    combined_mask[mask] = valid_idx_mask
    
    # Create a new dataframe with matches
    matches = df1[combined_mask].copy()
    matches['match_idx'] = idx[combined_mask]  # Index of matching object in df2
    matches['separation_arcsec'] = d2d[combined_mask].arcsec  # Separation in arcseconds

    # Add columns from df2 for the matches
    for col in df2.columns:
        #matches[f'match_{col}'] = df2.loc[idx[mask], col].values
        # This is a safer way to to this, avoid out-of-bound indices:
        matches[f'match_{col}'] = df2.iloc[idx[combined_mask]][col].values

    # If multiple matches exist for the same source in df1, keep only the closest one
    matches = matches.loc[matches.groupby(matches.index)['separation_arcsec'].idxmin()]

    # If you want to see which objects in df1 had no matches:
    unmatched = df1[~combined_mask]

    return matches, unmatched

In [None]:
def transform1ColorQAPlots1a(dmag, color1, res, norder, title, plotText, dmagName, colorLabel1, rms, outputFileName):

    # Prepare QA plots...
    #fig = plt.figure(figsize=(10,5))
    #fig = plt.figure(figsize=(40,20))
    fig = plt.figure(figsize=(20,10))
    fig.subplots_adjust(hspace=0.3)
    #fig.suptitle("This is a supertitle!")
    plt.rcParams.update({'font.size': 24})
    
    # We will exclude the lowest and highets 0.01% of color1, color2, 
    #  dmag, and residuals when plotting the QA figures...
    color1_desc = color1.describe(percentiles=[0.0001, 0.001, 0.01, 0.99, 0.999, 0.9999])
    dmag_desc = dmag.describe(percentiles=[0.0001, 0.001, 0.01, 0.99, 0.999, 0.9999])
    #res_desc = df.res.describe(percentiles=[0.0001, 0.001, 0.01, 0.99, 0.999, 0.9999])
    res_desc = res.describe(percentiles=[0.0001, 0.001, 0.01, 0.99, 0.999, 0.9999])
    #color1_min = color1_desc['1%']
    #color1_max = color1_desc['99%']
    #color1_min = color1_desc['min']
    #color1_max = color1_desc['max']
    #dmag_min = dmag_desc['1%']
    #dmag_max = dmag_desc['99%']
    #res_min = res_desc['1%']
    #res_max = res_desc['99%']
    color1_min = color1_desc['0.01%']
    color1_max = color1_desc['99.99%']
    dmag_min = dmag_desc['0.01%']
    dmag_max = dmag_desc['99.99%']
    res_min = res_desc['0.01%']
    res_max = res_desc['99.99%']
    # What the heck; let's just set this to -0.10 mag --> +0.10 mag...
    #res_min = -0.10
    #res_max = +0.10

    
    # Plot 1:  Descriptive text...
    #plt.subplot(231)
    plt.subplot(221)
    plt.text(0.1,0.80,title,fontsize=24)
    plt.text(0.00,0.40,plot1Text,fontsize=12)
    plt.axis('off')

    
    # Plot 2:  2D hexbin histogram of dmag vs. color1...
    #plt.subplot(232) 
    plt.subplot(222)
    if len(dmag) < 10000:
        plt.scatter(color1, dmag, alpha=0.75)
        #hb=plt.hexbin(color1, dmag, gridsize=100, cmap='inferno_r')
    else:
        hb=plt.hexbin(color1, dmag, gridsize=100, bins='log', cmap='inferno')
    plt.axis([color1_min, color1_max, dmag_min, dmag_max])
    plt.xlabel(colorLabel1)
    plt.ylabel(dmagName)
    if len(dmag) >= 10000:
        cb = fig.colorbar(hb)
        cb.set_label('log10(N)')
    plt.grid(color='blue')
    plt.grid(True)


    # Plot 3:  1d histogram of residuals...
    #plt.subplot(234) 
    plt.subplot(223) 
    #plt.hist(df.loc[:,'res'],bins=100)
    if len(res) < 100:
        plt.hist(res,bins=10)
    else:
        plt.hist(res,bins=100)
    plt.xlabel('residuals [mag]')
    plt.ylabel('Number')
    plt.grid(True)
    plt.grid(color='blue')

    
    # Plot 4:  2d hexbin histogram of residuals vs. color1...
    #plt.subplot(235) 
    plt.subplot(224)
    if len(res) < 10000:
        plt.scatter(color1, res, alpha=0.75)
        #hb = plt.hexbin(color1, res, gridsize=100, cmap='inferno_r')
    else:
        hb = plt.hexbin(color1, res, gridsize=100, bins='log', cmap='inferno')
    plt.axis([color1_min, color1_max, res_min, res_max])
    plt.xlabel(colorLabel1)
    plt.ylabel('residuals [mag]')
    if len(res) >= 10000:
        cb = plt.colorbar(hb)
        cb.set_label('log10(N)')
    plt.grid(True)
    plt.grid(color='blue')

    
    # Plot...
    plt.tight_layout()
    #plt.show()
    plt.savefig(outputFileName)

    return 0


##################################


In [None]:
# Kudos to Claude-3.5-Sonnet for improving on old outlier rejection code...

def poly_fit_with_sigma_clip(x, y, degree=1, sigma=3.0, maxiters=5):
    """
    Perform polynomial fit with iterative sigma clipping
    
    Parameters:
    -----------
    x : array-like
        Independent variable
    y : array-like 
        Dependent variable
    degree : int
        Degree of polynomial fit
    sigma : float
        Sigma clipping threshold
    maxiters : int
        Maximum number of sigma clipping iterations
        
    Returns:
    --------
    coeffs : array
        Polynomial coefficients
    mask : array
        Boolean mask indicating non-clipped points
    rms : float
        RMS of residuals
    """

    # Import relevant modules
    import numpy as np
    from astropy.stats import sigma_clip
    
    # Initial fit using all points
    x = np.asarray(x)
    y = np.asarray(y)
    mask = np.ones_like(x, dtype=bool)
    
    for _ in range(maxiters):
        print(len(x[mask]), len(y[mask]), len(mask))

        # Fit polynomial to non-masked points
        coeffs, cov = np.polyfit(x[mask], y[mask], degree, cov=True)
        
        # Calculate residuals
        yfit = np.polyval(coeffs, x)
        residuals = y - yfit
        
        # Update mask with sigma clipping
        new_mask = ~sigma_clip(residuals, sigma=sigma).mask
        
        # Check for convergence
        if np.array_equal(mask, new_mask):
            break
        
        mask = new_mask
    
    # Calculate final RMS
    final_residuals = y[mask] - np.polyval(coeffs, x[mask])
    rms = np.sqrt(np.mean(final_residuals**2))

    print(len(x[mask]), len(y[mask]), len(mask))

    # Calculate coefficient errors from diagonal of covariance matrix
    coeff_errors = np.sqrt(np.diag(cov))
        
    return coeffs, coeff_errors, x[mask], y[mask], final_residuals, rms

## X. Read in Matched Catalog

In [None]:
#matchFile = os.path.join(matchDir,matchFile)
#print(matchFile)

In [None]:
## Check to make sure matchFile exists...
if useMatchFile:
    if os.path.isfile(matchFile)==False:
        print("""ERROR:  matchFile %s does not exist...""" % (matchFile))
    if verbose > 0:
        print('matchFile: ', matchFile)


In [None]:
if useMatchFile:
    tab = Table.read(matchFile, format='csv')
    display(tab)

In [None]:
if useMatchFile:
    matches = tab.to_pandas()
    display(matches)

## 4. Query LSST Catalog

In [None]:
# Create two butlers, one including the skymap.
#  (Creating two should not be necessary, but...)
if not useMatchFile:
    butler = Butler(repo, collections=collection)
    skybutler = Butler(repo, collections=collection, skymap=skymap)

In [None]:
# Define which of the nearly 1000 columns to download from the LSST ObjectTable...
if not useMatchFile:
    INCOLS = [
        'coord_ra',
        'coord_dec',
        'tract',
        'patch'
    ]
    bands="ugrizy"
    for band in bands:
        INCOLS += [
            f'{band}_psfFlux',
            f'{band}_psfFluxErr',
            f'{band}_ap12Flux',
            f'{band}_ap12FluxErr',
            f'{band}_extendedness',
            f'{band}_psfFlux_flag'
        ]


In [None]:
# Download all DP1 stars with SNR>5 in the r-band psfFluxstars...
if not useMatchFile:
    LSST_stars_list = []
    ra_min = {}
    ra_max = {}
    dec_min = {}
    dec_max = {}


    for tractId in tract_list:

        print(tractId, tract_dict[tractId])

        try:
    
            raw_LSST = butler.get('object', dataId={'skymap': 'lsst_cells_v1', 'tract': tractId}, 
                                    collections=[collection],
                                    parameters={"columns":INCOLS}).to_pandas()

            # Insert tractId as the first column
            raw_LSST.insert(0, 'tractId', tractId)  
    
            # Insert field name -- if known -- as the second column
            if tractId in tract_dict:
                field = tract_dict[tractId]
            else:
                field = 'unknown'
            raw_LSST.insert(1, 'field', field)  

            # Clean the catalog
            #sel  = (raw_LSST['detect_isPrimary'] == True)
            sel = (raw_LSST['r_psfFlux']/raw_LSST['r_psfFluxErr'] > 5)
            for band in ['g','r','i']:
                sel &= (raw_LSST[f'{band}_psfFlux_flag'] == 0)

            LSST = raw_LSST[sel]

            # Find just the (most likely) stars...
            sel_LSST_stars = (LSST['g_extendedness'] < 0.5) & (LSST['r_extendedness'] < 0.5)
            LSST_stars = LSST[sel_LSST_stars] 
            print(f"Number of objects: {len(LSST)}")
            print(f"Number of stars: {len(LSST_stars)}")

            # Find the bounding (ra,dec)'s for each DP1 tractId: 
            ra_min[tractId] = LSST_stars.coord_ra.min()
            ra_max[tractId] = LSST_stars.coord_ra.max()
            dec_min[tractId] = LSST_stars.coord_dec.min()
            dec_max[tractId] = LSST_stars.coord_dec.max()
            print(tractId, ra_min[tractId], ra_max[tractId], dec_min[tractId], dec_max[tractId])

            # Append the dataframe to the list
            LSST_stars_list.append(LSST_stars) 

        # Catch any exception
        except Exception as e:

            print(f"An error occurred for tractId {tractId}: {e}")


    # Concatenate all dataframes in the list
    LSST_stars_all = pd.concat(LSST_stars_list, ignore_index=True)  
    print(f"Total number of stars: {len(LSST_stars_all)}")


In [None]:
if not useMatchFile:
    display(LSST_stars_all)


## 5. Query SDSS DR18 Catalog

***THIS SECTION NEEDS WORK!***

Ideally, we would query the SDSS CasJobs directly from this current Jupyter notebook via the SciScript-Python library ( https://github.com/sciserver/SciScript-Python ), following methodology described in this example SciServer Jupyter notebook:  https://github.com/sciserver/Example-Notebooks/blob/main/SciServer%20Components%20-%20Python%20Examples/CasJobs.ipynb


Here, we merely queried SDSS CasJobs directly using this query:

```
SELECT  
  dbo.fIAUFromEq(s.ra, s.dec) as name, 
  s.ra,s.dec,
  s.psfMag_u,s.psfMag_g,s.psfMag_r,s.psfMag_i,s.psfMag_z,
  s.psfMagErr_u,s.psfMagErr_g,s.psfMagErr_r,s.psfMagErr_i,s.psfMagErr_z,
  r.run,r.stripe  
INTO mydb.LSST_DP1_AREAS_2
FROM Star s, Run r
WHERE
  s.run = r.run
  AND ( (s.ra BETWEEN 147.0 AND  153.0 AND s.dec BETWEEN -1.0 AND 5.0) )
  AND ((s.flags_u & 0x10000000) != 0) AND ((s.flags_g & 0x10000000) != 0) AND ((s.flags_r & 0x10000000) != 0) AND ((s.flags_i & 0x10000000) != 0) AND ((s.flags_z & 0x10000000) != 0) 
  AND ((s.flags_u & 0x8100000c00a4) = 0) AND ((s.flags_g & 0x8100000c00a4) = 0) AND ((s.flags_r & 0x8100000c00a4) = 0) AND ((s.flags_i & 0x8100000c00a4) = 0) AND ((s.flags_z & 0x8100000c00a4) = 0)     
  AND (((s.flags_u & 0x400000000000) = 0) or (s.psfmagerr_u <= 0.2)) AND (((s.flags_g & 0x400000000000) = 0) or (s.psfmagerr_g <= 0.2)) AND (((s.flags_r & 0x400000000000) = 0) or (s.psfmagerr_r <= 0.2)) 
  AND (((s.flags_i & 0x400000000000) = 0) or (s.psfmagerr_i <= 0.2)) AND (((s.flags_z & 0x400000000000) = 0) or (s.psfmagerr_z <= 0.2)) 
  AND (((s.flags_u & 0x100000000000) = 0) or (s.flags_u & 0x1000) = 0) AND (((s.flags_g & 0x100000000000) = 0) or (s.flags_g & 0x1000) = 0) AND (((s.flags_r & 0x100000000000) = 0) or (s.flags_r & 0x1000) = 0) 
  AND (((s.flags_i & 0x100000000000) = 0) or (s.flags_i & 0x1000) = 0) AND (((s.flags_z & 0x100000000000) = 0) or (s.flags_z & 0x1000) = 0)
```

and downloaded the result as `LSST_COSMOS_dtucker.csv`.

In [None]:
# Check to make sure sdssFile exists...
if not useMatchFile:
    if os.path.isfile(sdssFile)==False:
        print("""ERROR:  sdssFile %s does not exist...""" % (sdssFile))
    if verbose > 0:
        print('sdssFile: ', sdssFile)


In [None]:
# Read sdssFile into Astropy table...
if not useMatchFile:
    tab = Table.read(sdssFile, format='csv')
    display(tab)

In [None]:
# Convert Astropy table of sdssFile contents into a Pandas DataFrame...
if not useMatchFile:
    combined_df = tab.to_pandas()
    display(combined_df)
    

## 6. Match LSST and SDSS DR18 stars

In [None]:
if not useMatchFile:
    matches, unmatched = cross_match_catalogs(LSST_stars_all, combined_df, 
                                              'coord_ra', 'coord_dec', 
                                              'ra', 'dec')

In [None]:
if not useMatchFile:
    display(matches)

In [None]:
if not useMatchFile:
    matches.to_csv(matchFile,index=False)

In [None]:
if not useMatchFile:
    display(unmatched)

##  7. Add ABmag Columns to Matched Catalog Data Frame

In [None]:
# Copy matches to df...
df = matches.copy()

In [None]:
# Define common parameters
flux_bands = ['u', 'g', 'r', 'i', 'z', 'y']
offset = 31.4 # For magnitude calculation
sentinel_value = -9999.0

# Loop through each band to calculate both magnitude and magnitude error
for band in flux_bands:
    
    flux_col = f'{band}_psfFlux'
    flux_err_col = f'{band}_psfFluxErr'
    mag_col = f'{band}_psfMag'
    mag_err_col = f'{band}_psfMagErr'

    # Condition for valid flux (must be positive for log10 and division)
    valid_flux_condition = ((df[flux_col] > 0) & (df[flux_col].notna()))

    # Calculate magnitude
    df[mag_col] = np.where(valid_flux_condition,
                           -2.5 * np.log10(df[flux_col]) + offset,
                           sentinel_value)

    # Calculate magnitude error
    df[mag_err_col] = np.where(valid_flux_condition,
                               1.086 * df[flux_err_col] / df[flux_col],
                               sentinel_value)



In [None]:
df

In [None]:
# Rename columns...
df.rename(columns={'coord_ra':'RA_LSST',
                   'coord_dec':'DEC_LSST',
                   'u_psfMag':'u_LSST',
                   'g_psfMag':'g_LSST',
                   'r_psfMag':'r_LSST',
                   'i_psfMag':'i_LSST',
                   'z_psfMag':'z_LSST',
                   'y_psfMag':'y_LSST',
                   'u_psfMagErr':'u_err_LSST',
                   'g_psfMagErr':'g_err_LSST',
                   'r_psfMagErr':'r_err_LSST',
                   'i_psfMagErr':'i_err_LSST',
                   'z_psfMagErr':'z_err_LSST',
                   'y_psfMagErr':'y_err_LSST',
                   'match_psfMag_u':'u_sdss',
                   'match_psfMag_g':'g_sdss',
                   'match_psfMag_r':'r_sdss',
                   'match_psfMag_i':'i_sdss',
                   'match_psfMag_z':'z_sdss',
                   'match_psfMagErr_u':'u_err_sdss',
                   'match_psfMagErr_g':'g_err_sdss',
                   'match_psfMagErr_r':'r_err_sdss',
                   'match_psfMagErr_i':'i_err_sdss',
                   'match_psfMagErr_z':'z_err_sdss'
                  },inplace=True)

df.head(5)

In [None]:
for col in df.columns:
    print(col)

## 8. Add Color Columns to Matched Catalog Data Frame

In [None]:
# Add color columns...
df.loc[:,'ug_LSST'] = df.loc[:,'u_LSST'] - df.loc[:,'g_LSST']
df.loc[:,'gr_LSST'] = df.loc[:,'g_LSST'] - df.loc[:,'r_LSST']
df.loc[:,'ri_LSST'] = df.loc[:,'r_LSST'] - df.loc[:,'i_LSST']
df.loc[:,'iz_LSST'] = df.loc[:,'i_LSST'] - df.loc[:,'z_LSST']
df.loc[:,'zy_LSST'] = df.loc[:,'z_LSST'] - df.loc[:,'y_LSST']
df.loc[:,'gi_LSST'] = df.loc[:,'g_LSST'] - df.loc[:,'i_LSST']

df.loc[:,'ug_sdss'] = df.loc[:,'u_sdss'] - df.loc[:,'g_sdss']
df.loc[:,'gr_sdss'] = df.loc[:,'g_sdss'] - df.loc[:,'r_sdss']
df.loc[:,'ri_sdss'] = df.loc[:,'r_sdss'] - df.loc[:,'i_sdss']
df.loc[:,'iz_sdss'] = df.loc[:,'i_sdss'] - df.loc[:,'z_sdss']
df.loc[:,'gi_sdss'] = df.loc[:,'g_sdss'] - df.loc[:,'i_sdss']


In [None]:
# Insert dmag column...
df.loc[:,'dmag'] = -9999.

In [None]:
df

## 9. Create Initial Mask

In [None]:
mask1 = (df["u_sdss"] > 15.) & (df["u_sdss"] < 30.)
mask2 = (df["g_sdss"] > 15.) & (df["g_sdss"] < 30.)
mask3 = (df["r_sdss"] > 15.) & (df["r_sdss"] < 30.)
mask4 = (df["i_sdss"] > 15.) & (df["i_sdss"] < 30.)
mask5 = (df["z_sdss"] > 15.) & (df["z_sdss"] < 30.)
#mask6 = df["u_err_sdss"] <= 0.06
#mask7 = df["g_err_sdss"] <= 0.02
#mask8 = df["r_err_sdss"] <= 0.02
#mask9 = df["i_err_sdss"] <= 0.02
#mask10 = df["z_err_sdss"] <= 0.02
mask6 = df["u_err_sdss"] <= 0.10
mask7 = df["g_err_sdss"] <= 0.05
mask8 = df["r_err_sdss"] <= 0.05
mask9 = df["i_err_sdss"] <= 0.05
mask10 = df["z_err_sdss"] <= 0.10
mask_sdss = mask1 & mask2 & mask3 & mask4 & mask5 & mask6 & mask7 & mask8 & mask9 & mask10

mask1 = (df["g_LSST"] > 16.5) & (df["g_LSST"] < 30.)
mask2 = (df["r_LSST"] > 16.5) & (df["r_LSST"] < 30.)
mask3 = (df["i_LSST"] > 16.5) & (df["i_LSST"] < 30.)
mask4 = (df["z_LSST"] > 16.5) & (df["z_LSST"] < 30.)
mask5 = (df["y_LSST"] > 16.0) & (df["y_LSST"] < 30.)
#mask6 = df["g_err_LSST"] <= 0.02
#mask7 = df["r_err_LSST"] <= 0.02
#mask8 = df["i_err_LSST"] <= 0.02
#mask9 = df["z_err_LSST"] <= 0.02
#mask10 = df["y_err_LSST"] <= 0.02
mask6 = df["g_err_LSST"] <= 0.05
mask7 = df["r_err_LSST"] <= 0.05
mask8 = df["i_err_LSST"] <= 0.05
mask9 = df["z_err_LSST"] <= 0.05
mask10 = df["y_err_LSST"] <= 0.10

# No LSST y-band for overlapping SDSS!
#mask_LSST = mask1 & mask2 & mask3 & mask4 & mask5 & mask6 & mask7 & mask8 & mask9 & mask10
#mask_LSST = mask1 & mask2 & mask3 & mask4 & mask6 & mask7 & mask8 & mask9
# No LSST y-band and little LSST z-band for overlapping SDSS!
mask_LSST = mask1 & mask2 & mask3 & mask6 & mask7 & mask8


mask = mask_sdss & mask_LSST
#mask = mask_LSST


## 10. Make Backup Copies of Initial Mask and Original Data Frame

In [None]:
# Make a backup copy of original df...
df_orig = df.copy()

# Make a backup copy of original mask...
mask_orig = mask.copy()

## 11. Run Fit in Each Filter Band

In [None]:
for band in bandList:

    print("")
    print("")
    print("")
    print("# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ")
    print(band)
    print("# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ")
    print("")

    magName = band
    magName_match = matchBand_dict[band]
    color_name_1 = color_name_1_dict[band]
    colorLabel_1 = colorLabel_1_dict[band]
    
    # Create title/names for use in QA plots...
    title = """%s --> %s""" % (magName_match, magName)
    dmagName = """%s - %s""" % (magName, magName_match)

    # Grab the original version of df from the backup copy...
    df = df_orig.copy()

    # Grab the original version of mask from the backup copy...
    mask = mask_orig.copy()

    # Update dmag column for {$band} - {$band}_match...
    df.loc[:,'dmag'] = df.loc[:,magName] - df.loc[:,magName_match]

    # Update mask...
    mask1 = ( abs(df['dmag']) <= 10. )
    mask2 = ( ( df[color_name_1] > -3. ) & ( df[color_name_1] < 6.0 ) & (df[color_name_1].notna()) )
    #mask3 = ( ( df[magName] >= mag_limits_dict[magName][0] ) & ( df[magName] <= mag_limits_dict[magName][1] ) & (df[magName].notna()) )
    mask = mask & mask1 & mask2
    #mask = mask & mask1 & mask2 & mask3

    # Apply the new mask to df...
    df = df[mask]

    ## Sanity check
    #df.plot(color_name_1, 'dmag', kind='scatter')

    # Calculate number of disjoint branches to fit...
    nbranches = len(color_limits_1_dict[band]) - 1
    if verbose > 0: print(band, color_limits_1_dict[band], nbranches)

    
    # Reset bluest color limit in bluest branch to color1_min
    #  and reddest color limit in reddest branch to color1_max, 
    #  after excluding above mask...
    #dftmp = df[mask]
    dftmp = df
    color1_desc = dftmp[color_name_1].describe(percentiles=[0.0001, 0.9999])
    color1_min = math.floor(10*(color1_desc['0.01%']-0.05))/10.
    color1_max = math.ceil(10*(color1_desc['99.99%']+0.05))/10.
    color_limits_1_dict[band][0] = color1_min
    color_limits_1_dict[band][nbranches] = color1_max
    if verbose > 0: print(band, color_limits_1_dict[band], nbranches)

    p_branch_list = []
    
    # Iterate, with sigma-clipping...
    df_list = []
    color1_list = []
    dmag_list = []
    res_list = []
    mask_list = []
    plot1Text = ''
    outputLine = ''
        
    for ibranch in range(nbranches):
            
        print('*********************************')
        print(ibranch, color_limits_1_dict[band][ibranch], color_limits_1_dict[band][ibranch+1])
        print('*********************************')

        # Extract branch...
        mask_branch = ( (df[color_name_1] >  color_limits_1_dict[band][ibranch]) & \
                        (df[color_name_1] <= color_limits_1_dict[band][ibranch+1]) )

        df_branch = df[mask_branch].copy()

        # ... and extract dmag's and color1's for this branch...
        dmag_branch   = df_branch.loc[:,'dmag']
        color1_branch = df_branch.loc[:,color_name_1]

        # If there are no valid colors on this branch, continue to next branch...
        if len(color1_branch) < 1:
            continue
                
        # Perform fit for each disjoint branch...
        print("mask_branch length (before): " , len(mask_branch))
        print("dmag_branch length:  ", len(dmag_branch))
        print("color1_branch length:  ", len(color1_branch))
        p_branch, perr_branch, color1_branch, dmag_branch, res_branch, stddev_branch = \
                                poly_fit_with_sigma_clip(color1_branch, dmag_branch, degree=norder)
        print("mask_branch length (after): " , len(mask_branch))

        # Print coefficients and estimated statistical errors in the coefficients
        for i, (p, perr) in enumerate(zip(p_branch, perr_branch)):
            print(f'p_{len(p_branch)-i-1} = {p:.6f} Â± {perr:.6f}')
      
        # Prepare some text output for plots...
        #  Recall that np.polyfit returns the coefficients from highest order to lowest
        #  (This is opposite of the order the coefficients in older versions of this notebook
        #   that did not use np.polyfit for the polynomial fits)
        if norder == 1:
            plot1Text1 = """%s = %.3f + %.3f*%s [%.1f < %s <= %.1f] [rms: %.3f]""" % \
                (dmagName, p_branch[1], p_branch[0], colorLabel_1, \
                 color_limits_1_dict[band][ibranch], colorLabel_1, color_limits_1_dict[band][ibranch+1], \
                 stddev_branch)
        elif norder == 2:
            plot1Text1 = """%s = %.3f + %.3f*%s + %.3f*%s^2  [%.1f < %s <= %.1f] [rms: %.3f]""" % \
                (dmagName, p_branch[2], p_branch[1], colorLabel_1, p_branch[0], colorLabel_1, \
                 color_limits_1_dict[band][ibranch], colorLabel_1, color_limits_1_dict[band][ibranch+1], \
                 stddev_branch)
        else:
            plot1Text1 = ''
            
        plot1Text = """%s\n%s""" % (plot1Text, plot1Text1)
        
        print(plot1Text1)                        
 
            
        # Append branch df and mask to the df_list and mask_list lists, respectively...
        #df_list.append(df_branch.copy())
        #mask_list.append(mask_branch.copy())
        color1_list.append(color1_branch.copy())
        dmag_list.append(dmag_branch.copy())
        res_list.append(res_branch.copy())
        mask_list.append(mask_branch.copy())
        
    # Concatenate the color1, dmag, res, and mask lists for all the branches...
    color1 = pd.Series(np.concatenate(color1_list))
    dmag = pd.Series(np.concatenate(dmag_list))
    res = pd.Series(np.concatenate(res_list))
    mask = pd.Series(np.concatenate(mask_list))
    
    # Calculate the standard deviation for the full piecewise fit...
    stddev = res.std()


    # Output best fits to screen...
    if verbose > 0:
        print("")
        print(title)
        print(plot1Text)
        print("")
    
    # Create QA plots...
    #res =  df.loc[:,'res']
    #dmag =  df.loc[:,'dmag']
    #color1 = df.loc[:,color_name_1]
    #stddev = df['res'].std()
    outputFileName = """%s.dmag_%s-%s.%s.norder%d.qa1.png""" % \
        (qaFileBaseName, magName, magName_match, color_name_1, norder)
    status = transform1ColorQAPlots1a(dmag, color1, res, norder, title, plot1Text, 
                                 dmagName, colorLabel_1, stddev, outputFileName)  
    
            



In [None]:
raise StopExecution

## 12.  Sandbox

In [None]:
# Example with plotting
import matplotlib.pyplot as plt

band = 'g'

magName_LSST = """%s_LSST""" % (band)
magErrName_LSST = """%s_err_LSST""" % (band)
magName_ps1 = """%s_ps1""" % (band)
magErrName_ps1 = """%s_err_ps1""" % (band)
    
# Grab the original version of df from the backup copy...
df = df_orig.copy()

# Grab the original version of mask from the backup copy...
mask = mask_orig.copy()

# Update dmag column for {$band}_des - {$band}_LSST...
if toLSST:
    df.loc[:,'dmag'] = df.loc[:,magName_LSST] - df.loc[:,magName_ps1]
else:
    df.loc[:,'dmag'] = df.loc[:,magName_ps1] - df.loc[:,magName_LSST]
        
# Update mask...
mask1 = abs(df['dmag']) <= 10.
mask2 = abs(df[magErrName_LSST]) <= 0.02
mask3 = abs(df[magErrName_ps1]) <= 0.01
mask = mask & mask1 & mask2 & mask3

# make a copy of original df, overwriting the old one...
df = df[mask].copy()

# Identify dmag and color1 series...
dmag =  df.loc[:,'dmag']
color1 = df.loc[:,color_name_1]


color1_array = color1
dmag_array = dmag

# Perform the fit
coeffs, mask, res, rms = poly_fit_with_sigma_clip(color1_array, dmag_array, degree=1)

# Generate smooth curve for plotting
x_smooth = np.linspace(min(color1_array), max(color1_array), 100)
y_smooth = np.polyval(coeffs, x_smooth)

# Plot
plt.figure(figsize=(10, 6))
plt.scatter(color1_array[mask], dmag_array[mask], label='Used points')
plt.scatter(color1_array[~mask], dmag_array[~mask], color='red', label='Rejected points')
plt.plot(x_smooth, y_smooth, 'k-', label='Fit')
plt.xlabel('Color')
plt.ylabel('dmag')
plt.legend()
plt.title(f'Polynomial fit (RMS = {rms:.3f})')
plt.show()

# Print coefficients
for i, c in enumerate(coeffs):
    print(f'c_{len(coeffs)-i-1} = {c:.6f}')