# Extract LATISS PostISRCCD to convert in fits in Spectroscopy mode and view them with firefly

- Goal View Auxtel images
- Restart the Kernel first

- author : Sylvie Dagoret-campagne
- affiliation : IJCLab/in2p3/CNRS
- creation date : 2025-02-06
- update : 2025-02-06

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.ticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm, SymLogNorm
from matplotlib.colors import ListedColormap
from matplotlib import colors

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.gridspec as gridspec
from spectractor.tools import from_lambda_to_colormap, wavelength_to_rgb

# %matplotlib widget
import h5py
from scipy import interpolate
from astropy.time import Time
from datetime import datetime, timedelta
import seaborn as sns

from itertools import cycle, islice
import os
from astropy.io import fits

In [None]:
plt.rcParams["figure.figsize"] = (18, 8)
plt.rcParams["axes.labelsize"] = "xx-large"
plt.rcParams["axes.titlesize"] = "xx-large"
plt.rcParams["xtick.labelsize"] = "xx-large"
plt.rcParams["ytick.labelsize"] = "xx-large"
plt.rcParams["legend.fontsize"] = 12
plt.rcParams["font.size"] = 12

In [None]:
from matplotlib.ticker import MultipleLocator, FormatStrFormatter, AutoMinorLocator

from astropy.visualization import (
    MinMaxInterval,
    SqrtStretch,
    ZScaleInterval,
    PercentileInterval,
    ImageNormalize,
    imshow_norm,
)
from astropy.visualization.stretch import (
    SinhStretch,
    LinearStretch,
    AsinhStretch,
    LogStretch,
)

transform = AsinhStretch() + PercentileInterval(99.0)

In [None]:
from lsst.daf.butler import Butler
import astropy.units as u
import numpy as np
import pandas as pd

pd.set_option("display.max_columns", None)
from astropy.time import Time

import scipy.stats

import matplotlib

%matplotlib inline
from matplotlib import pyplot as plt

In [None]:
# LSST Display
# The advantage to use firefly is that the firefly display can handle a lotsof images
import lsst.afw.display as afwDisplay

afwDisplay.setDefaultBackend("firefly")

## Configuration

- Use the tag report to guess the collections in which we exect data :  https://usdf-rsp-dev.slac.stanford.edu/times-square/github/lsst-dm/vv-team-notebooks/TargetReport


In [None]:
repo = "embargo"
instrument = "LATISS"
butler = Butler(repo)

# define a first collection, but this path will be checked just after
collection = "u/jneveu/holo_202501_BG40_order3_v3.1.0_doGainsPTC_rebin1_sigmaClip5/20250204T104600Z"

## Query about the collections available

- select collections from Jeremy

In [None]:
for _ in butler.registry.queryCollections():
    if "u/jneveu" in _:
        print(_)

- From the list available today , we probably have to look inside the colleciton `LSSTComCam/nightlyValidation`

## Select the collection

In [None]:
the_collection = collection

## Create the butler on the selected collection

In [None]:
butler = Butler(repo, collections=the_collection)
registry = butler.registry

### Below I check all the data structures that are inside the selected collection

- it includes the wanted `postISRCCD`
- it includes also all the spectractor data structures

In [None]:
for datasetType in registry.queryDatasetTypes():
    if registry.queryDatasets(datasetType, collections=the_collection).any(
        execute=False, exact=False
    ):
        # Limit search results to the data products
        if (
            ("_config" not in datasetType.name)
            and ("_log" not in datasetType.name)
            and ("_metadata" not in datasetType.name)
            and ("_resource_usage" not in datasetType.name)
        ):
            print(datasetType)

- Notice no visit-Table is available. Thus to know which visiId are existing, we will use the registry later

### Here I want a list of postISRCCD exposures inside the collection

- can provide the date if wanted
- the required data are stored in the records list and refs list 

In [None]:
where_expr = "instrument='LATISS'"
DATE = 20250114
where_expr = f"instrument='LATISS' AND exposure.day_obs={DATE}"
records = list(
    butler.registry.queryDimensionRecords(
        "visit", datasets="postISRCCD", where=where_expr, collections=the_collection
    )
)
refs = list(
    set(
        butler.registry.queryDatasets(
            "postISRCCD", where=where_expr, collections=the_collection
        )
    )
)

In [None]:
records[0]

In [None]:
refs[0]

In [None]:
print(f"Nexposures = {len(refs)}")

## Extract the list of visitId from the butler's registry 

- The goal just below is to have a mini-database where the exposures are
  listed with many informations
- This mini-database of exposure is buid from registry

### Get the list of information that can be extracted from the registryfor each exposure

In [None]:
print(registry.dimensions["exposure"].RecordClass.fields)

### Create the pandas dataframe from the information inside the butler's registry

In [None]:
df_exposure = pd.DataFrame(
    columns=[
        "id",
        "obs_id",
        "day_obs",
        "seq_num",
        "time_start",
        "time_end",
        "type",
        "target",
        "filter",
        "zenith_angle",
        "expos",
        "ra",
        "dec",
        "skyangle",
        "azimuth",
        "zenith",
        "science_program",
        "jd",
        "mjd",
    ]
)

### Fill the pandas dataframe from the records inside the butler's registry for each exposure

In [None]:
where_expr = "instrument='LATISS' AND exposure.day_obs>=20250101"

In [None]:
for count, info in enumerate(
    registry.queryDimensionRecords("exposure", where=where_expr)
):
    try:
        id_ = info.id
        obs_id_ = info.obs_id
        day_obs_ = info.day_obs
        seq_num_ = info.seq_num
        timespan_ = info.timespan

        timespan_begin_ = pd.to_datetime(info.timespan.begin.to_datetime())
        timespan_end_ = pd.to_datetime(info.timespan.end.to_datetime())
        timespan_jd_ = timespan_.begin.jd
        timespan_mjd_ = timespan_.begin.mjd

        df_exposure.loc[count] = [
            info.id,
            info.obs_id,
            info.day_obs,
            info.seq_num,
            timespan_begin_,
            timespan_end_,
            info.observation_type,
            info.target_name,
            info.physical_filter,
            info.zenith_angle,
            info.exposure_time,
            info.tracking_ra,
            info.tracking_dec,
            info.sky_angle,
            info.azimuth,
            info.zenith_angle,
            info.science_program,
            timespan_jd_,
            timespan_mjd_,
        ]
    except:
        print(">>>   Unexpected error:", sys.exc_info()[0])
        info_timespan_begin_to_string = "2021-01-01 00:00:00.00"
        info_timespan_end_to_string = "2051-01-01 00:00:00.00"
        info_timespan_begin_jd = 0
        info_timespan_begin_mjd = 0
        df_exposure.loc[count] = [
            info.id,
            info.obs_id,
            info.day_obs,
            info.seq_num,
            pd.to_datetime(info_timespan_begin_to_string),
            pd.to_datetime(info_timespan_end_to_string),
            info.observation_type,
            info.target_name,
            info.physical_filter,
            info.zenith_angle,
            info.exposure_time,
            info.tracking_ra,
            info.tracking_dec,
            info.sky_angle,
            info.azimuth,
            info.zenith_angle,
            info.science_program,
            info_timespan_begin_jd,
            info_timespan_begin_mjd,
        ]

    if count < 2:
        print(
            "-----------------------------------------------------",
            count,
            "---------------------------------------------------------",
        )
        print(info)
        print("\t id:                  ", info.id)
        print("\t day_obs:             ", info.day_obs)
        print("\t seq_num:             ", info.seq_num)
        print("\t type-of-observation: ", info.observation_type)
        print("\t target:              ", info.target_name)

        timespan_ = info.timespan
        timespan_begin_ = pd.to_datetime(info.timespan.begin.to_datetime())
        timespan_end_ = pd.to_datetime(info.timespan.end.to_datetime())
        timespan_jd_ = timespan_.begin.jd
        timespan_mjd_ = timespan_.begin.mjd

        mjd = timespan_mjd_
        jd = timespan_jd_
        print("MJD,JD : ", mjd, jd)

#### convert some columns into integer

In [None]:
df_exposure = df_exposure.astype({"id": int, "day_obs": int, "seq_num": int})

### Find the different type of exposures

In [None]:
df_exposure["type"].unique()

### Select the science exposure

In [None]:
df_science = df_exposure[df_exposure.type == "science"]

In [None]:
df_science

In [None]:
df_science["science_program"].unique()

In [None]:
df_science = df_science[df_science["science_program"] == "spec-survey"]

In [None]:
df_science

### Select the observation corresponding to the date

In [None]:
df_dateobs = df_science[df_science.day_obs == DATE]

In [None]:
df_dateobs

### Select those observation with holo4

In [None]:
def select_holo(row):
    """ """
    filter_str = row["filter"]
    if "holo" in filter_str:
        return True
    else:
        return False

In [None]:
cut = (
    (df_dateobs["filter"] == "empty~holo4_003")
    | (df_dateobs["filter"] == "BG40_65mm_1~holo4_003")
    | (df_dateobs["filter"] == "OG550_65mm_1~holo4_003")
)

In [None]:
# df_dateobs["filter"].apply((lambda x : select_holo(x["filter"])),axis=1,result_type='expand')
df_holo = df_dateobs[cut]

In [None]:
df_holo

### Select a date and a visitId

### Extract the exposures postiSRCCD form the selected visitID

In [None]:
# where_clause = f"instrument=\'LATISS\' AND exposure.day_obs={DATE}"
# dataId = {'visit': visitId, 'instrument':instrument}
# datasetRefs = registry.queryDatasets('postISRCCD', dataId=dataId, collections  = the_collection)
datasetRefs = registry.queryDatasets(
    "postISRCCD", where=where_expr, collections=the_collection
)
# one dictionnary for the focal surface
title_dict = {}
postisr_dict = {}
header_dict = {}

for i, ref in enumerate(datasetRefs):
    exposure = ref.dataId["exposure"]
    detector = ref.dataId["detector"]
    physical_filter = ref.dataId["physical_filter"]
    # retrieve the postISRCCD
    postisrccd = butler.get(ref)
    # Need raw image to have the header required
    raw_img = butler.get(
        "raw",
        dataId={"exposure": exposure, "instrument": "LATISS", "detector": 0},
        collections=[
            "LATISS/calib",
            "LATISS/raw/all",
        ],
    )
    header = raw_img.getMetadata().toDict()
    print(ref.dataId, postisrccd)
    the_title = f"id : {exposure}, det = {detector}, b = {physical_filter}"
    title_dict[exposure] = the_title
    postisr_dict[exposure] = postisrccd
    header_dict[exposure] = header
N = len(title_dict)
print(f"Number of images = {N}")

### Simple summary

In [None]:
N

In [None]:
list_of_exposures = list(title_dict.keys())

## View all the images in Firefly

In [None]:
for count, expos in enumerate(list_of_exposures):
    display = afwDisplay.Display(frame=count)
    display.scale("asinh", "zscale")
    display.mtv(postisr_dict[expos].image, title=title_dict[expos])

### Clear Firefly here

In [None]:
if 0:
    display.clearViewer()
    afwDisplay.setDefaultBackend("firefly")

## Convert into fits

In [None]:
top_path_out = "./output"
isExist = os.path.exists(top_path_out)
if not isExist:
    os.mkdir(top_path_out)
sub_path_out = f"{top_path_out}/{DATE}"
isExist = os.path.exists(sub_path_out)
if not isExist:
    os.mkdir(sub_path_out)

In [None]:
for count, expos in enumerate(list_of_exposures):
    the_image = postisr_dict[expos].image
    the_md = header_dict[expos]
    rotated_array = the_image.array[::-1, ::-1]  # rotate the array 180 degrees

    the_object = the_md["OBJECT"]
    the_am = the_md["AMSTART"]
    the_filter = the_md["FILTER"]

    if the_object == "MU-COL":
        the_md["OBJECT"] = "HD38666"
        the_object = the_md["OBJECT"]

    filename_out = f"exposure_{expos}_dmpostisrccd.fits"
    fullfilename_out = os.path.join(sub_path_out, filename_out)

    print(f">>>>  output filename {filename_out} object {the_object}")

    # Header in output file
    hdr = fits.Header()

    for key, value in the_md.items():
        if key == "OBJECT":
            print(key, value)

        if len(key) <= 8:
            hdr[str(key)] = value
        else:
            print(f"Skip key {key}")

    # need this
    hdr["AMEND"] = hdr["AMSTART"]

    # be aware weather data may be missing
    if hdr["AIRTEMP"] == None:
        hdr["AIRTEMP"] = 10.0
        print("AIRTEMP key missing")

    if hdr["PRESSURE"] == None:
        hdr["PRESSURE"] = 744.0
        print("PRESSURE key missing")

    if hdr["HUMIDITY"] == None:
        hdr["HUMIDITY"] = 50.0
        print("HUMIDITY key missing")

    if hdr["WINDSPD"] == None:
        hdr["WINDSPD"] = 5.0
        print("WINDSPD key missing")

    if hdr["WINDDIR"] == None:
        hdr["WINDDIR"] = 0.0
        print("WINDDIR key missing")

    if hdr["SEEING"] == None:
        hdr["SEEING"] = 0.9
        print("SEEING key missing")

    # Be carefull for Spectractor Standalone, 2 hdu units are necessary

    primary_hdu = fits.PrimaryHDU(header=hdr)
    image_hdu = fits.ImageHDU(rotated_array)
    hdu_list = fits.HDUList([primary_hdu, image_hdu])
    hdu_list.writeto(fullfilename_out, overwrite=True)

print("finished")