In [1]:
import json
import numpy as np
from pathlib import Path
import pandas as pd

# All folders of interest

In [2]:
def get_config_information(
        selected_dataset,
        target_name,
        data_type):
    
    if data_type not in ["AGPM_T1", "AGPM_T2", "AGPM_T4", "DITHER_T2",
                         "DITHER_T1"]:
        raise ValueError("data_type unknown.")

    # Create the final dict
    summary_dict = dict()
    summary_dict["target_name"] = target_name
    summary_dict["data_type"] = data_type

    # Global information
    summary_dict["Prog_ID"] = list(set(selected_dataset.index.get_level_values(0)))[0]
    summary_dict["Night_NR"] = list(set(selected_dataset.index.get_level_values(1)))[0]

    # Find Science and PSF data
    science_datasets = selected_dataset.xs(
        "OBJECT", level=3).xs(
        "SCIENCE", level=3)

    time_idx_list = list(enumerate(science_datasets[("total_time", "sum")]))
    time_idx_list = sorted(time_idx_list, key=lambda x: x[1])
    summary_dict["total_time"] = str(time_idx_list[-1][1])

    # Check for the PSF data
    if not data_type == "DITHER_T1":
        psf_idx = time_idx_list[-2][0]

        _, _, _, summary_dict["exp_time_psf"], nd_psf, _, pix_x_psf, pix_y_psf = \
            science_datasets.index[psf_idx]

        psf_data = science_datasets.iloc[psf_idx]
        summary_dict["image_size_psf"] = (
            int(pix_x_psf),
            int(pix_y_psf))

    # Get Files lists for the science data
    science_idx = time_idx_list[-1][0]
    _, _, _, summary_dict[
        "exp_time_science"], _, _, pix_x_science, pix_y_science = \
        science_datasets.index[science_idx]

    science_data = science_datasets.iloc[science_idx]

    if data_type in ["DITHER_T2", "DITHER_T1"]:
        pass

    elif data_type == "AGPM_T4":
        summary_dict["image_size_sky"] = (
            int(pix_x_science),
            int(pix_y_science))

    elif data_type == "AGPM_T2":
        sky_datasets = selected_dataset.xs(
            "SKY", level=3).xs(
            "SCIENCE", level=3)
        _, _, _, _, _, _, pix_x_sky, pix_y_sky = \
            sky_datasets.index[0]

        summary_dict["image_size_sky"] = (int(pix_x_sky), int(pix_y_sky))
    else:
        raise NotImplementedError()

    summary_dict["image_size_science"] = (
        int(pix_x_science),
        int(pix_y_science))
    
    # additional data
    summary_dict["cond_wind_speed"] = science_data[('WIND_SPEED', 'mean')]
    summary_dict["cond_seeing_fwhm"] = science_data[('AMBI_FWHM', 'mean')]
    summary_dict["cond_water_vapor"] = science_data[('Water_Vapor', 'mean')]
    summary_dict["cond_airmass"] = science_data[('Airmass', 'mean')]
    summary_dict["cond_tau_0"] = science_data[('AVG_Tau0', 'mean')]
    summary_dict["cond_adi_rotation"] = science_data[('POSANG_START', 'Total_ADI_rotation')]

    return summary_dict

# 1.) Load the dataset tables

In [4]:
dataset_table_dir = Path("/fast/mbonse/NACO/50_code/dataset_tables/")
rood_result_dir = Path("/fast/tgebhard/catnip/data/")

In [5]:
setups = [("agpm_type2_table.pkl", "AGPM_T2"),
          ("agpm_type4_table.pkl", "AGPM_T4"),
          #("dither_type1_table.pkl", "DITHER_T1"),
          ("dither_type2_table.pkl", "DITHER_T2")]

In [6]:
for pkl_file, dataset_type in setups:
    tmp_subtable_file = dataset_table_dir / Path(pkl_file)
    tmp_subtable = pd.read_pickle(tmp_subtable_file)
    
    stars_of_interest = list(set(tmp_subtable.index.get_level_values(0)))
    
    for tmp_target_name in stars_of_interest:
        print(tmp_target_name)
        sel_dataset = tmp_subtable.xs(tmp_target_name, level=0)

        for name, tmp_df in sel_dataset.groupby(level=["Program ID", "Night_NR"]):
            tmp_json = get_config_information(tmp_df, tmp_target_name.replace(" ", "-"), dataset_type)
            clean_prog_id = tmp_json["Prog_ID"].replace("(", "_").replace(")", "_").replace(".", "_")
            tmp_dataset_name = tmp_json["target_name"] + "_" + str(tmp_json["Night_NR"]) + "_" + clean_prog_id[:-1]

            tmp_json_file = rood_result_dir / Path(tmp_dataset_name + "/meta_information.json")
            if not tmp_json_file.is_file():
                print(tmp_json_file)
                continue

            # Load the current json
            with open(tmp_json_file) as f:
                d = json.load(f)
            tmp_json.update(d)

            # save the updated information
            with open(tmp_json_file, 'w', encoding='utf-8') as f:
                json.dump(tmp_json, f, ensure_ascii=False, indent=4)

HD209253
HD37806
HD222368
/fast/tgebhard/catnip/data/HD222368_192_1101_C-0092_G/meta_information.json
HD9672
HD190073
HD36112
/fast/tgebhard/catnip/data/HD36112_17_1101_C-0092_D/meta_information.json
HIP93449
/fast/tgebhard/catnip/data/HIP93449_158_1101_C-0092_A/meta_information.json
/fast/tgebhard/catnip/data/HIP93449_140_199_C-0065_A/meta_information.json
HD95881
HD188228
HD38120
HD870
HD183324
HD92945
HD61005
/fast/tgebhard/catnip/data/HD61005_15_096_C-0571_A/meta_information.json
HD131511
HD101412
HD172555
HD27290
GAIA DR3 3314312226646863488
HD7570
HD16743
HD107146
GAIA DR3 6731196273845607168
HD115892
HD221853
HD191849
HD145964
HD166348
HD163296
HD11171
HD126062
HD40136
HD19994
HD197481
GAIA DR3 6049145880875631744
HD10472
HD72106
HD135379
HD1466
HD152404
HD90905
HD202730
HD1581
/fast/tgebhard/catnip/data/HD1581_294_1101_C-0092_C/meta_information.json
HD100546
HD97048
HD145689
HD38206
GJ699
HD142527
HD36910
HD12039
HD69830
HD223352
HD213398
HD169142
HD5133
HD179520
HD197890
HD342