<img align="left" src = https://project.lsst.org/sites/default/files/Rubin-O-Logo_0.png width=250 style="padding: 10px"
alt="Rubin Observatory logo, a graphical representation of turning stars into data."> <br><br>
**Description:** Generate parameters for the _Rubin Observatory DP1_ paper. <br>
**Authors:** Leanne Guy, James Mullaney <br>
**Last verified to run:** 2025-05-14 <br>
**LSST Science Pipelines version:** v29.0.0<br>
**Container size:** medium <br>

# DP1 parameters

## Setup

In [1]:
! pip install inflect > /dev/null 2>&1

In [2]:
# Generic python packages
import os
import csv
import requests
import yaml
import numpy as np
import pandas as pd
import numpy as np
from pathlib import Path


# LSST Science Pipelines 
from lsst.daf.butler import Butler

# RSP
from lsst.rsp import get_tap_service

import warnings
warnings.filterwarnings("ignore")

## Define utility functions

In [3]:
# Create a dictionary to convert numbers to words.
# Used for those numbers best expressed in words.
num2word = {1: 'One', 2: 'Two', 3: 'Three', 4: 'Four', 5: 'Five', \
            6: 'Six', 7: 'Seven', 8: 'Eight', 9: 'Nine', 10: 'Ten', \
            11: 'Eleven', 12: 'Twelve', 13: 'Thirteen', 14: 'Fourteen', \
            15: 'Fifteen', 16: 'Sixteen', 17: 'Seventeen', 18: 'Eighteen', \
            19: 'Nineteen', 20: 'Twenty', 30: 'Thirty', 40: 'Forty', \
            50: 'Fifty', 60: 'Sixty', 70: 'Seventy', 80: 'Eighty', \
            90: 'Ninety', 0: 'Zero'}    

In [4]:
# Function to round to N significant figures
def round_sf(x, sig=3):
    return np.round(x, sig - int(np.floor(np.log10(abs(x)))) - 1)

In [5]:
def formatParameter(params, name):
    '''
    Formats a named parameter ready for write-out to tex file.
    '''
    
    value = params[0][name]

    if isinstance(value, float):
        value = f'{value:.3f}'.rstrip('0').rstrip('.')
    
    if name in params[1]:
        unit = params[1][name]
        if unit not in ['\\arcsec', '\\arcmin']:
            unit = f'\\xspace {unit}'
    else:
        unit = ''
        
    return f'\\newcommand{{\\{name}}}{{{value}{unit}\\xspace}}\n'

In [6]:
def addParameter(params, name, value, unit=None, **kwargs):

    '''
    Adds a named parameter to the params tuple of dictionaries.

    Ensures that the namings between the values and units dictionaries are consistent.
    '''
    
    if 'sig' in kwargs:
        value = round_sf(value, **kwargs)
    
    params[0][name] = value
    if unit:
        params[1][name] = unit
    
    return params

In [7]:
def imageStats(params, imageType):
    '''
    Given an image dataset type name, add the following to the params:
    - Number of dataset types in DP1;
    - HDD size of the first instance of the dataset type in MB;
    - Number of pixels in each x/y dimension;
    - Platescale of image;
    - On-sky field of view of image, both in x/y and area.  
    '''
    
    imageName = imageType.replace('_', '')
    
    # Number of images:
    refs = list(registry.queryDatasets(imageType))
    params = addParameter(params, f'n{imageName}s', len(list(refs)))
    
    # Image HDD size:
    filepath = butler.getURI(refs[0])
    roughFileSize = round_sf(os.path.getsize(filepath.path) / 1e6, sig=2)
    params = addParameter(params, f'{imageName}hdd', f'$\\sim${roughFileSize:.0f}', unit='MB')

    # Number of pixels
    image = butler.get(refs[0])
    params = addParameter(params, f'n{imageName}pixx', image.getDimensions().x)
    params = addParameter(params, f'n{imageName}pixy', image.getDimensions().y)
    
    # Platescale:
    platescale = image.getWcs().getPixelScale().asArcseconds()
    params = addParameter(params, f'{imageName}platescale', f'$\\sim${platescale:.1f}', unit='arcsec. per pixel')
    
    # Field of view
    fovx = image.getDimensions().x * image.getWcs().getPixelScale().asArcminutes()
    fovy = image.getDimensions().y * image.getWcs().getPixelScale().asArcminutes()
    params = addParameter(params, f'{imageName}fovx', f'{fovx:.0f}', unit='\\arcmin')
    params = addParameter(params, f'{imageName}fovy', f'{fovy:.0f}', unit='\\arcmin')
    area = fovx * fovy
    roughArea = round_sf(area, sig=3)
    params = addParameter(params, f'{imageName}fov', f'{roughArea:.0f}', unit='sq. arcmin.')

    return params

## Set up DP1 Butler

In [8]:
instrument = 'LSSTComCam'
collections = ['LSSTComCam/DP1/defaults', 
               'LSSTComCam/runs/DRP/DP1/v29_0_0/DM-50260',
               'skymaps', ]
skymapName = 'lsst_cells_v1'
butler = Butler("/repo/dp1",
                instrument=instrument, 
                collections=collections, 
                skymap=skymapName)
registry = butler.registry
skymap = butler.get('skyMap', skymap=skymapName)

In [9]:
## Consdb
cdb = get_tap_service("consdbtap")
assert cdb is not None

## Set up parameters dictionary

In [10]:
# Use a tuple of Python dicts to hold parameter values and units.
# The first dict in the tuple holds values, the second holds units.

# The dictionary keys are the names of the parameters in the parameter.tex files,
# but without the leading backslash.

# Pass the params tuple to the addParameter function to add a parameter value
# with an optional unit.
params = (dict(), dict())

## Add the parameters in the manualParameters.csv file.
There are some parameters that either can't be generated automatically, or take a long time to compute. Such parameters are contained in the `../manualParameters.csv` file, which is read-in using the following block. 

In the case of the those parameters that take a long time to compute, the code blocks to generate them can be found toward the bottom of this notebook. To re-calculate these parameters, the corresponding line should be removed from the `../manualParameters.csv` file. 


In [11]:
with open('../manualParameters.csv', newline='') as f:
    reader = csv.reader(f, delimiter=',')
    for line in reader:
        if line[2] == '':
            line[2]=None
        params = addParameter(params, line[0], line[1], unit=line[2])

## Compute parameters

### Information about the observing campaign:

In [12]:
# Number of visits:
visitRecords = registry.queryDimensionRecords('visit')
params = addParameter(params, 'nvisits', len(list(visitRecords)))

# Number of exposures. This should be the same as the number of visits.
exposureRecords = registry.queryDimensionRecords('exposure')
params = addParameter(params, 'nexposures', len(list(visitRecords)))               

# Number of target fields:
# Note: slew_icrs covered ECDFS, so they are the same field:
fields = set([record.target_name for record in exposureRecords if record.target_name != 'slew_icrs'])
params = addParameter(params, 'nfields', num2word[len(fields)].lower())

# First and last date of DP1 observations:
visit_table = butler.get('visit_table')
firstVisitTime = min(visit_table['obsStart'])
lastVisitTime = max(visit_table['obsStart'])
lastVisitNight = lastVisitTime - np.timedelta64(1,'D') # The last night started the date before the last observation datetime. 
params = addParameter(params, 'dp1startdate', np.datetime_as_string(firstVisitTime, unit='D'))
params = addParameter(params, 'dp1enddate', np.datetime_as_string(lastVisitNight, unit='D'))

# Median exposure time:
params = addParameter(params, 'exposuretime', f'{np.median(visit_table['expTime']):.0f}', unit='s')

### Observation quality

In [13]:
# Best seeing:
visit_detector_table = butler.get('visit_detector_table')
minSeeing = np.min(visit_detector_table['seeing'][visit_detector_table['nPsfStar']>100])
params = addParameter(params, 'bestimagequality', minSeeing, unit='\\arcsec', sig=2)

# Median seeing:
medSeeing = np.median(visit_detector_table['seeing'][visit_detector_table['nPsfStar']>100])
params = addParameter(params, 'medianimagequalityallbands', medSeeing, unit='\\arcsec', sig=3)

### Statistics for different types of image dataset

In [14]:
datasetNames = ['raw',
                'visit_image',
                'deep_coadd',
                'template_coadd',
                'difference_image',
               ]
for datasetName in datasetNames:
    params = imageStats(params, datasetName)

# The total number of pixels in a single deep_coadd image:
params = addParameter(params, 'ndeepcoaddpixtotal',
                      params[0]['ndeepcoaddpixx']*params[0]['ndeepcoaddpixy']/1e6,
                      sig=3, unit='million')

### Skymap data 

In [15]:
# Total number of tracts across the entire sky:
params = addParameter(params, 'ntotaltracts', len(skymap))

# The number of tracts covered by DP1:
tractRecords = list(registry.queryDimensionRecords('tract', where='visit > 0'))
tractIds = set([record.id for record in tractRecords])
params = addParameter(params, 'ntracts', len(tractIds))
params = addParameter(params, 'ncoveredtracts', len(tractIds))

# Average area of each tract:
skyArea = 4*np.pi*(180./np.pi)**2
tractArea = round_sf(skyArea / len(skymap), sig=2)
roughTractArea = round_sf(tractArea, sig=2)
params = addParameter(params, 'tractarea', f'$\\sim${roughTractArea:.1f}', unit='sq. deg.')

# Number of patches:
numXPatches, numYPatches = skymap[0].getNumPatches()
numPatches = numXPatches * numYPatches
params = addParameter(params, 'npatchx', numXPatches)
params = addParameter(params, 'npatchy', numYPatches)
params = addParameter(params, 'npatch', numPatches)

# Area of each patch:
patchArea = (tractArea / numPatches) * 3600.
roughPatchArea = round_sf(patchArea, sig=2)
params = addParameter(params, 'patcharea', f'$\\sim${roughPatchArea:.0f}', unit='sq. arcmin.')

### Visit image selection criteria for incorporation into deep coadd

In [16]:
# The seeing criterion is communicated in the config file: 
refs = list(registry.queryDatasets('selectDeepCoaddVisits_config'))
config = butler.get(refs[0])
params = addParameter(params, 'deepcoaddmaxfwhm', config.maxPsfFwhm, sig=2, unit='\\arcsec')

### The number of datasets of each type of catalog 

In [17]:
refs = list(registry.queryDatasets('source'))
params = addParameter(params, 'nsourcecatalogs', len(refs))

refs = list(registry.queryDatasets('object'))
params = addParameter(params, 'nobjectcatalogs', len(refs))

refs = list(registry.queryDatasets('dia_object'))
params = addParameter(params, 'ndiaobjectcatalogs', len(refs))

refs = list(registry.queryDatasets('dia_source'))
params = addParameter(params, 'ndiasourcecatalogs', len(refs))

refs = list(registry.queryDatasets('ss_source'))
params = addParameter(params, 'nsolarsystemsourcecatalogs', len(refs))

refs = list(registry.queryDatasets('visit_table'))
params = addParameter(params, 'nvisitsummarytables', len(refs))

refs = list(registry.queryDatasets('visit_detector_table'))
params = addParameter(params, 'nvisitdetectorsummarytables', len(refs))

refs = list(registry.queryDatasets('object_forced_source'))
params = addParameter(params, 'nobjectforcedcatalogs', len(refs))

refs = list(registry.queryDatasets('dia_object_forced_source'))
params = addParameter(params, 'ndiaobjectforcedcatalogs', len(refs))

### The number of entries in the visit, visit detector, and solar system source tables 

In [18]:
# There is only one of each of these dataset types:
refs = list(registry.queryDatasets('visit_table'))
table = butler.get(refs[0])
params = addParameter(params, 'nvisitsummaries', len(table))

refs = list(registry.queryDatasets('visit_detector_table'))
table = butler.get(refs[0])
params = addParameter(params, 'nvisitdetectorsummaries', len(table))

refs = list(registry.queryDatasets('ss_source'))
catalog = butler.get(refs[0]) # There is only 1 ss_source catalog in DP1.
params = addParameter(params, 'nsolarsystemsources', len(catalog))

### Miscellaneous

#### The number of raws that failed processing to visit images

In [19]:
# Reprt the number of failed raw-to-visit_image processings as the difference in the
# number of raws and the number of visit_images.
params = addParameter(params, 'nsfpfails', params[0]['nraws'] - params[0]['nvisitimages'])

## Longer calculations
The following code blocks take more than a few seconds to run, in some cases many minutes. They run only if the respective parameter does not already exist in the params tuple, i.e., they have been removed from the "add by hand" block above. 

### Total area covered by DP1

In [20]:
# Takes around 10 minutes to run:
if 'totalarea' not in params[0]:
    coaddRefs = list(registry.queryDatasets('deep_coadd', where="band = 'r'"))
    pixcount = 0
    for coaddRef in coaddRefs:
        im = butler.get('deep_coadd.mask', dataId = coaddRef.dataId)
        pixcount +=  im.array.size - np.sum((im.array & im.getPlaneBitMask('NO_DATA')) > 0)
    area = pixcount * ((0.2 / 3600)**2)

    params = addParameter('totalarea', f'$\\sim${np.round(area):.0f}', unit='sq. deg.')

### Total number of objects

In [21]:
# Takes around 8s to run:
if 'nobjects' not in params[0]:

    runningTotal = 0
    refs = list(registry.queryDatasets('object'))
    for ref in refs:
        catalog = butler.get(ref, parameters = {'columns':'objectId'})
        runningTotal += len(catalog)

    params = addParameter(params, 'nobjects', runningTotal/1e6, sig=2, unit='million')

### Total number of sources

In [22]:
# Takes around 1m20s to run:
if 'nsources' not in params[0]:

    refs = list(registry.queryDatasets('source'))
    runningTotal = 0
    for ref in refs:
        catalog = butler.get(ref, parameters = {'columns':'sourceId'})
        runningTotal += len(catalog)

    params = addParameter(params, 'nsources', runningTotal/1e6, sig=2, unit='million')

### Total number of DIA objects

In [23]:
# Takes less than a second, but placed here to group with similar tasks:
if 'ndiaobjects' not in params[0]:

    refs = list(registry.queryDatasets('dia_object'))
    runningTotal = 0
    for ref in refs:
        catalog = butler.get(ref, parameters = {'columns':'diaObjectId'})
        runningTotal += len(catalog)

    params = addParameter(params, 'ndiaobjects', runningTotal/1e6, sig=2, unit='million')

### Total number of DIA sources

In [24]:
# Takes less than a second, but placed here to group with similar tasks:
if 'ndiasources' not in params[0]:

    refs = list(registry.queryDatasets('dia_source'))
    runningTotal = 0
    for ref in refs:
        catalog = butler.get(ref, parameters = {'columns':'diaSourceId'})
        runningTotal += len(catalog)

    params = addParameter(params, 'ndiasources', runningTotal/1e6, sig=2, unit='million')

### Total number of forced objects and sources

In [25]:
# Takes around 15s to run:
if 'nforcedsources' not in params[0]:

    refs = list(registry.queryDatasets('object_forced_source'))
    runningTotalSrc = 0
    runningTotalObj = 0
    for ref in refs:
        catalog = butler.get(ref, parameters = {'columns':'objectId'})
        runningTotalSrc += len(catalog)
        runningTotalObj += len(catalog.to_pandas()['objectId'].unique())

    params = addParameter(params, 'nforcedsources', runningTotalSrc/1e6, sig=3, unit='million')
    params = addParameter(params, 'nforcedobjects', runningTotalObj/1e6, sig=2, unit='million')

### Total number of DIA forced objects and sources

In [26]:
# Takes around 1m30s to run:
if 'ndiaforcedsources' not in params[0]:

    refs = list(registry.queryDatasets('dia_object_forced_source'))
    runningTotalSrc = 0
    runningTotalObj = 0
    for ref in refs:
        catalog = butler.get(ref, parameters = {'columns':'diaObjectId'})
        runningTotalSrc += len(catalog)
        runningTotalObj += len(catalog.to_pandas()['diaObjectId'].unique())

    params = addParameter(params, 'ndiaforcedsources', runningTotalSrc/1e6, sig=3, unit='million')
    params = addParameter(params, 'ndiaforcedobjects', runningTotalObj/1e6, sig=2, unit='million')

### Total number of extended objects

In [27]:
# Takes around 10s to run:
if 'nextendedobjects' not in params[0].keys():

    nGals = 0
    nStars = 0
    nAll = 0 

    refs = list(registry.queryDatasets('object'))
    columns = [band + '_extendedness' for band in ['u', 'g', 'r', 'i', 'z', 'y']]
    for ref in refs:
        objectTable = butler.get(ref, parameters = {'columns':columns})
        if len(objectTable) > 0:
            galSelection = (
                ((objectTable['u_extendedness'] > 0.5) & ~objectTable['u_extendedness'].mask) |
                ((objectTable['g_extendedness'] > 0.5) & ~objectTable['g_extendedness'].mask)  |
                ((objectTable['r_extendedness'] > 0.5) & ~objectTable['r_extendedness'].mask)  |
                ((objectTable['i_extendedness'] > 0.5) & ~objectTable['i_extendedness'].mask)  |
                ((objectTable['z_extendedness'] > 0.5) & ~objectTable['z_extendedness'].mask)  |
                ((objectTable['y_extendedness'] > 0.5) & ~objectTable['y_extendedness'].mask) 
            )
            nGals += np.sum(galSelection.data)

            starSelection = (
                ((objectTable['u_extendedness'] <= 0.5) & ~objectTable['u_extendedness'].mask) &
                ((objectTable['g_extendedness'] <= 0.5) & ~objectTable['g_extendedness'].mask)  &
                ((objectTable['r_extendedness'] <= 0.5) & ~objectTable['r_extendedness'].mask)  &
                ((objectTable['i_extendedness'] <= 0.5) & ~objectTable['i_extendedness'].mask)  &
                ((objectTable['z_extendedness'] <= 0.5) & ~objectTable['z_extendedness'].mask)  &
                ((objectTable['y_extendedness'] <= 0.5) & ~objectTable['y_extendedness'].mask) 
            )
            nStars += np.sum(starSelection.data)

    params = addParameter(params, 'nextendedobjects', nGals/1e6, sig=2, unit='million')

## Write the parameters to file

In [28]:
with open("../parameters.tex", "w") as f:
    f.write('% These parameters are automatically generated by the dp1_parameters notebook.\n')
    f.write('% Do NOT manually edit this file.\n')
    f.write('% If you need to change/add a parameter, please edit the dp1_parameters notebook and re-run it.\n\n')
    for name in params[0]:
        f.write(formatParameter(params, name))
f.close()

## ConsDB access 

In [3]:
# JRM: I can't seem to access this. Do I need a token?
# Date range for campaign
list_butler = []
query = '''SELECT * FROM cdb_lsstcomcam.exposure
#where esposure_id in list_butler
#ORDER BY day_obs DESC
#        '''
exposures = cdb.search(query).to_table().to_pandas()
exposures.columns
exposures['day_obs'].unique()