In [None]:
import logging
from pathlib import Path

import numpy as np
import pandas as pd
from ebas.io.file.nasa_ames import EbasNasaAmes

import avoca
from avoca.export_nas import flags_to_ebas
from avoca.flags import QA_Flag

In [None]:
ebas_flag_to_avoca = {
    ebas_flag: avoca_flag
    for avoca_flag, ebas_flag in flags_to_ebas.items()
}
ebas_flag_to_avoca

In [None]:
# This allows us to check the nas file is correct
new_nas = EbasNasaAmes()

# Find the last submited files the format is:
file = Path(*avoca.__path__, '..', 'data', 'CH0001G.20240219123300.20240307132229.online_gc.NMHC.air.16d.61mn.CH01L_Agilent_GC-MS-MEDUSA_Medusa-12_JFJ.CH01L_gc_ms.lev0.nas')
new_nas.read(str(file), ignore_parameter=True  )

In [None]:
"""Read the ebas file into a pandas dataframe for @voc@.

Doing this will remove some specific flag information.
In particular, @voc@ only accepts flags per compound and not per variable as in ebas.

This for each compound, the flag collects all the compounds of the variables.

@voc@ also requires a runtype for each run.
We use for that the calibration flag suggested by ebas.
We have to assume that this flag is the same for all compounds.
"""




logger = logging.getLogger(__name__)
clean_for_df = {}

data_level = new_nas.metadata["datalevel"]

compounds = []


for var in new_nas.variables:
    if "metadata" not in var:
        continue

    metadata = var["metadata"]
    if "comp_name" not in metadata:
        continue

    values = var["values_"]

    title = metadata["title"]

    # Split the title on the _

    title = title.split("_")

    if len(title) != 2:

        print("passing", title)
        continue

    compund, variable = title
    if compund not in compounds:
        compounds.append(compund)

    clean_for_df[(compund, variable)] = values

    flag_serie = pd.Series(
        [
            sum([ebas_flag_to_avoca[f].value for f in flag_row])
            for flag_row in var["flags"]
        ]
    )
    flag_col = (compund, "flag")
    if flag_col not in clean_for_df:
        clean_for_df[flag_col] = flag_serie
    else:
        clean_for_df[flag_col] |= flag_serie


# Use the start of the intervals as the datetime (use 1 for the end)
clean_for_df[("-", "datetime")] = [dt[0] for dt in new_nas.sample_times]

is_calibration = {
    compound: (QA_Flag.CALIBRATION.value & clean_for_df[(compound, "flag")]).astype(
        bool
    )
    for compound in compounds
}

# Assert all the calibration flags are the same
ref_calib = is_calibration[compounds[0]]
for compound in compounds[1:]:
    if not np.all(is_calibration[compounds[0]] == is_calibration[compound]):
        logger.warning(
            f"Calibration flags are not the same for all compounds: {compound} "
            f"is different from reference compound {compounds[0]}"
            f"at rows {np.argwhere(mask_different).reshape(-1)}"
        )
        # Show at which index the flags are different
        mask_different = is_calibration[compounds[0]] != is_calibration[compound]
        # Combine the calib in both samples
        ref_calib = ref_calib | is_calibration[compound]

runtype = pd.Series(is_calibration[compounds[0]].map({True: "std", False: "air"}))
clean_for_df[("-", "runtype")] = runtype


# print(title)

df_avoca = pd.DataFrame(clean_for_df).set_index(("-", "datetime"))
df_avoca

In [None]:


data = AvocaCheckSetting(start="3d", name="test", assigner="test")

data

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from avoca.factory import AssignerFactory
from avoca.settings import read_setting_file

settings = read_setting_file(r"C:\Users\coli\Documents\avoca\data\.avoca\config.yaml")

assigners = [AssignerFactory.create(setting) for setting in settings]

In [None]:
assigners