In [1]:
from pathlib import Path

import pytz
from pandas import (
    concat,
    read_csv,
    to_datetime,
)

from post_processing.dataclass.data_aplose import DataAplose
from post_processing.utils.fpod_utils import csv_folder, cpod2aplose, fpod2aplose, actual_data, meta_cut_aplose, dpm_to_dp10m, dpm_to_dph, dpm_to_dpd,fb_folder, build_range, extract_site, percent_calc, site_percent, year_percent, ym_percent, month_percent, hour_percent
from post_processing.utils.core_utils import json2df,get_season

### Pre-processing



Import your csv files. All files for one site must be stored in the same folder.

In [2]:
path = csv_folder(r"U:\Cetiroise")
print(path.head())

df_0 = path.dropna()

                                                File  podN          ChunkEnd  \
0  CETIROISEPHASE4POINTC 2023 02 03 FPOD_6669 fil...  6669  07/02/2023 05:34   
1  CETIROISEPHASE4POINTC 2023 02 03 FPOD_6669 fil...  6669  07/02/2023 05:35   
2  CETIROISEPHASE4POINTC 2023 02 03 FPOD_6669 fil...  6669  07/02/2023 05:36   
3  CETIROISEPHASE4POINTC 2023 02 03 FPOD_6669 fil...  6669  07/02/2023 22:40   
4  CETIROISEPHASE4POINTC 2023 02 03 FPOD_6669 fil...  6669  24/02/2023 15:32   

     Minute  DPM  MinsOn      deploy.name  
0  64748494    1       1  Point C_Phase 4  
1  64748495    1       1  Point C_Phase 4  
2  64748496    1       1  Point C_Phase 4  
3  64749520    1       1  Point C_Phase 4  
4  64773572    1       1  Point C_Phase 4  


In [3]:
d_beg_end = actual_data(df_0, col_timestamp="ChunkEnd")

In [4]:
df_1 = df_0[df_0["DPM"] !=0 ]

### APLOSE format

Chose the right function, depending on the instrument you are working with.

#### *CPOD*

In [5]:
df_aplose = cpod2aplose(df_1, pytz.utc, "Site D Simone", "Commerson")
print(df_aplose.head())

         dataset filename  start_time  end_time  start_frequency  \
0  Site D Simone                    0        60                0   
1  Site D Simone                    0        60                0   
2  Site D Simone                    0        60                0   
3  Site D Simone                    0        60                0   
4  Site D Simone                    0        60                0   

   end_frequency annotation annotator                start_datetime  \
0              0  Commerson      FPOD  2023-02-07T05:34:00.000+0000   
1              0  Commerson      FPOD  2023-02-07T05:35:00.000+0000   
2              0  Commerson      FPOD  2023-02-07T05:36:00.000+0000   
3              0  Commerson      FPOD  2023-02-07T22:40:00.000+0000   
4              0  Commerson      FPOD  2023-02-24T15:32:00.000+0000   

                   end_datetime  is_box      deploy.name  
0  2023-02-07T05:35:00.000+0000       0  Point C_Phase 4  
1  2023-02-07T05:36:00.000+0000       0  Point

#### *FPOD*

In [None]:
df_aplose = fpod2aplose(df_1, pytz.utc, "CETIROISE", "Marsouin")
print(df_aplose.head())

### Remove non usable lines

Import the .json file available on metadatax.

In [None]:
json = Path(r"C:\Users\fouinel\Downloads\deployment_cetiroise.json") #Path to your metadata file.
metadatax = json2df(json_path=json)

metadatax["deploy.name"] = (metadatax["site.name"].astype(str) + "_" +
                           metadatax["campaign.name"].astype(str))

cleared = meta_cut_aplose(df_aplose, metadatax) #Remove lines captures outside the instrument submersion.

Export your file to the aplose format. You can change the name of the file to match the project you are working on.

In [None]:
cleared.to_csv(r"U:\APLOSE_CETIROISE_pos.csv", index=False) #You can stock all DPM for a site in a DataAplose file.

### Load data

Use the yaml file to import your aplose files one at a time.

In [None]:
yaml_file = Path(r"resource\CPOD-FPOD_yaml.yml")
data_list = DataAplose.from_yaml(file=yaml_file)
print(data_list.df.head())

### Format choice


Here you need to choose the format in which you want to visualise the positive detections. This aims to replace the functionality available in CPOD.exe.

In [None]:
dp10 = dpm_to_dp10m(data_list.df, extra_columns=["deploy.name"])

In [None]:
dph = dpm_to_dph(data_list.df, extra_columns=["deploy.name"])

In [None]:
dpd = dpm_to_dpd(data_list.df, extra_columns=["deploy.name"])

### Add the feeding buzzes

Import your click details files. All files for one site must be stacked in the same folder.

In [None]:
frq = "D" #This argument will determine the format in which you want to visualise your data. Use "D", "h" or "10min".

In [None]:
fb_files = Path(r"U:\fb_fpod_cetiroise_c") #Path to your click details folder.
fb_all = fb_folder(fb_files, "Marsouin")

fb_all["start_datetime"] = fb_all["start_datetime"].dt.floor(frq)
fb = fb_all.groupby("start_datetime")["Foraging"].sum().reset_index()
fb["start_datetime"] = to_datetime(fb["start_datetime"], utc=True)

In [None]:
d_tot = dpd.merge(fb, on="start_datetime", how="left")
#This function aims to reindent 0 between the positive detections. It will be useful to produce first visualization graphs and use this dataset in R.
d_hour = build_range(d_beg_end, frq)

In [None]:
d_fin = d_hour.merge(d_tot, on=["start_datetime", "deploy.name"], how="left")

In [None]:
d_fin[["DPM","Foraging"]] = d_fin[["DPM","Foraging"]].fillna(0)
print(d_fin.head())

## Add time columns

In [None]:
d_fin["Year"] = d_fin["start_datetime"].dt.year
d_fin["Month"] = d_fin["start_datetime"].dt.month
d_fin['YM'] = d_fin["Year"].astype(str) + '-' + d_fin["Month"].astype(str)
d_fin['YM'] = to_datetime(d_fin['YM'])
d_fin["Day"] = d_fin["start_datetime"].dt.day
d_fin["Hour"] = d_fin["start_datetime"].dt.hour

d_fin["FBR"] = d_fin["Foraging"] / d_fin["DPM"]
d_fin["DPH"] = (d_fin["DPM"] >0).astype(int)

In [None]:
d_fin["FBR"] = d_fin["FBR"].fillna(0)
d_fin.to_csv(r"U:\Hours_DPM_FBUZZ_CETIROISE.csv", index=False)

## Overview

#### *Import datasets*

In [None]:
ca4 = read_csv(r"U:\Hours_DPM_FBUZZ_SiteCA4.csv")
walde = read_csv(r"U:\Hours_DPM_FBUZZ_SiteWalde.csv")

data_c = concat([ca4, walde])
data_c["start_datetime"] = to_datetime(data_c["start_datetime"])
data_c["start_datetime"] = data_c["start_datetime"].apply(lambda x : x.tz_convert("Europe/Paris"))
data_c["Hour"] = data_c["start_datetime"].dt.hour

In [None]:
a = read_csv(r"U:\Hours_DPM_FBUZZ_SiteA.csv")
b = read_csv(r"U:\Hours_DPM_FBUZZ_SiteB.csv")
c = read_csv(r"U:\Hours_DPM_FBUZZ_SiteC.csv")
d = read_csv(r"U:\Hours_DPM_FBUZZ_SiteD.csv")

data_k = concat([a, b, c, d])
data_k["start_datetime"] = to_datetime(data_k["start_datetime"])
data_k["start_datetime"] = data_k["start_datetime"].apply(lambda x : x.tz_convert("Indian/Kerguelen"))
data_k["Hour"] = data_k["start_datetime"].dt.hour

In [None]:
ceti = read_csv(r"U:\Hours_DPM_FBUZZ_CETIROISE.csv")

ceti["start_datetime"] = to_datetime(ceti["start_datetime"])
ceti["start_datetime"] = ceti["start_datetime"].apply(lambda x : x.tz_convert("CET")) #TimeZone Central European Time
ceti["Hour"] = ceti["start_datetime"].dt.hour

#### *Generate graphs*

In [None]:
data = ceti #Precise which dataset you are working with

In [None]:
data = extract_site(data)
y_per = percent_calc(data, "Year")
ym_per = percent_calc(data, "YM")
ym_per["YM"] = to_datetime(ym_per["YM"])
ym_per["Season"] = ym_per["YM"].apply(lambda x: get_season(x)[0])
m_per = percent_calc(data, "Month")
h_per = percent_calc(data, "Hour")
s_per = percent_calc(data)

In [None]:
site_percent(s_per, "%click")

In [None]:
year_percent(y_per, "%click")

In [None]:
ym_percent(ym_per, "%click")

In [None]:
month_percent(m_per, "%buzzes")

In [None]:
hour_percent(h_per, "%click")