In [None]:
from pathlib import Path

from pandas import (
    read_csv,
)

from post_processing.dataclass.data_aplose import DataAplose
from post_processing.utils.fpod_utils import resample_dpm, build_range, process_tl, filter_tl, preserved_data

### Load data
DPM = Detection Positive Minutes \
FB = Feeding buzzes \
üê¨ = input to modify


In [None]:
yaml_file = Path(r"resource\CPOD-FPOD_yaml.yml") #Change the file path in the yaml sheet.üê¨
data_list = DataAplose.from_yaml(file=yaml_file)

d_beg_end = read_csv(r"U:\Deb_Fin_CA4.csv") #Beginning and end of recording for every phase. üê¨

tl_path = Path(r"U:\TimeLost\tl_ca4\phases")
tl_df = process_tl(tl_path)
tl_df = tl_df.drop_duplicates(subset=['ChunkEnd'], keep="first")

### Data metric
Here you need to choose the format in which you want to visualise the positive detections. This aims to replace the functionalities available in Chelonia's softwares.

In [None]:
frq = "h" #Determine the format in which you want to visualise your data. Use "D", "h" or "10min". üê¨
tl = 100 #%TimeLost threshold. If you do not want to set a filter, set tl to 100.

In [None]:
resamp = resample_dpm(data_list.df, frq=frq, cols={"DPM":"sum","Foraging":"sum","deploy.name":"first"}) #Resample your DPMs according to the chosen frq.

### Feeding buzzes processing
Use "Dauphin", Marsouin" or "Commerson" to get different ICI processing.

In [None]:
d_0 = build_range(d_beg_end, frq) #Create a dataframe from beginning to end of every phase filled with 0s.
d_tot = d_0.merge(resamp, on=["start_datetime", "deploy.name"], how="left")

### Add additional metrics columns

In [None]:
d_tot[["DPM","Foraging"]] = d_tot[["DPM","Foraging"]].fillna(0)
d_tot["FBR"] = d_tot["Foraging"] / d_tot["DPM"] #The Feeding Buzz Ratio corresponds to the proportion of FB among the recorded clicks.
d_tot["FBR"] = d_tot["FBR"].fillna(0)
d_tot[f"DP{frq}"] = (d_tot["DPM"] >0).astype(int) #1 if the hour counts at least 1 DPM, else 0.

In [None]:
d_tot["DPH_fb"] = (d_tot["Foraging"] >0).astype(int)
d_tot["FBR_h"] = d_tot["DPH_fb"] / d_tot["DPH"]
d_tot["FBR_h"] = d_tot["FBR_h"].fillna(0)

### Add time columns

In [None]:
d_tot = d_tot.assign(**{attr: getattr(d_tot["start_datetime"].dt, attr.lower())
                         for attr in ['Year', 'Month', 'Day', 'Hour']})
d_tot['YM'] = d_tot["start_datetime"].dt.to_period('M').dt.to_timestamp()
d_tot['Date'] = d_tot["start_datetime"].dt.to_period('D').dt.to_timestamp()

### Filter your data
Chose a threshold of %TimeLost to remove all data exceeding it.

In [None]:
full_df = d_tot.merge(
    tl_df[["start_datetime", "%TimeLost"]],
    on="start_datetime", how="left")

In [None]:
filtered_df = filter_tl(full_df, tl)
preserved_data(filtered_df, full_df)

### Extract your processed data
This dataframe is now compatible for analyses on the next notebook and on R.

In [None]:
filtered_df.to_csv(r"U:\Hours_DPM_FBUZZ_CA4.csv", index=False) #Name your file. üê¨