In [1]:
from pathlib import Path

import pytz
from pandas import (
    concat,
    read_csv,
    to_datetime,
)

from post_processing.dataclass.data_aplose import DataAplose
from post_processing.utils.fpod_utils import csv_folder, pod2aplose, actual_data, meta_cut_aplose, resample_dpm, txt_folder, build_range, extract_site, percent_calc, site_percent, year_percent, ym_percent, month_percent, hour_percent, \
    feeding_buzz
from post_processing.utils.core_utils import json2df,get_season

### Pre-processing



Import your csv files. All files for one site must be stored in the same folder.

In [11]:
pod_files = Path(r"U:\Walde")
path = csv_folder(pod_files)
print(path.head())

df_0 = path.dropna()

json = Path(r"C:\Users\fouinel\Downloads\deployment_calais.json") #Path to your metadata file.
metadatax = json2df(json_path=json)

metadatax["deploy.name"] = (metadatax["site.name"].astype(str) + "_" +
                           metadatax["campaign.name"].astype(str))

                 File          ChunkEnd  DPM    Nall  MinsOn   deploy.name
0  POD2399 file01.CP3  14/05/2014 08:03  0.0     0.0     1.0  Walde_Phase1
1  POD2399 file01.CP3  14/05/2014 08:04  0.0   799.0     1.0  Walde_Phase1
2  POD2399 file01.CP3  14/05/2014 08:05  0.0     0.0     1.0  Walde_Phase1
3  POD2399 file01.CP3  14/05/2014 08:06  0.0  3361.0     1.0  Walde_Phase1
4  POD2399 file01.CP3  14/05/2014 08:07  0.0   421.0     1.0  Walde_Phase1


In [12]:
d_beg_end = actual_data(df_0, metadatax)

In [13]:
d_beg_end.to_csv(r"U:\Deb_Fin_Walde.csv", index=False)

In [14]:
df_1 = df_0[df_0["DPM"] !=0 ]

### APLOSE format

Chose the right function, depending on the instrument you are working with.

#### *CPOD*

In [15]:
df_aplose = pod2aplose(df_1, pytz.utc, "Walde", "Marsouin", "CPOD")
print(df_aplose.head())

  dataset filename  start_time  end_time  start_frequency  end_frequency  \
0   Walde                    0        60                0              0   
1   Walde                    0        60                0              0   
2   Walde                    0        60                0              0   
3   Walde                    0        60                0              0   
4   Walde                    0        60                0              0   

  annotation annotator                start_datetime  \
0   Marsouin      FPOD  2014-05-15T16:56:00.000+0000   
1   Marsouin      FPOD  2014-05-17T22:53:00.000+0000   
2   Marsouin      FPOD  2014-05-17T22:54:00.000+0000   
3   Marsouin      FPOD  2014-05-18T14:05:00.000+0000   
4   Marsouin      FPOD  2014-05-21T14:19:00.000+0000   

                   end_datetime  is_box   deploy.name  
0  2014-05-15T16:57:00.000+0000       0  Walde_Phase1  
1  2014-05-17T22:54:00.000+0000       0  Walde_Phase1  
2  2014-05-17T22:55:00.000+0000       

#### *FPOD*

In [7]:
df_aplose = pod2aplose(df_1, pytz.utc, "CETIROISE", "Marsouin", "FPOD")
print(df_aplose.head())

  dataset filename  start_time  end_time  start_frequency  end_frequency  \
0     CA4                    0        60                0              0   
1     CA4                    0        60                0              0   
2     CA4                    0        60                0              0   
3     CA4                    0        60                0              0   
4     CA4                    0        60                0              0   

  annotation annotator                start_datetime  \
0   Marsouin      FPOD  2014-05-17T03:52:00.000+0000   
1   Marsouin      FPOD  2014-05-17T04:47:00.000+0000   
2   Marsouin      FPOD  2014-05-19T17:06:00.000+0000   
3   Marsouin      FPOD  2014-05-20T11:07:00.000+0000   
4   Marsouin      FPOD  2014-05-20T11:16:00.000+0000   

                   end_datetime  is_box deploy.name  
0  2014-05-17T03:53:00.000+0000       0  CA4_Phase1  
1  2014-05-17T04:48:00.000+0000       0  CA4_Phase1  
2  2014-05-19T17:07:00.000+0000       0  CA4

### Remove non usable lines

Import the .json file available on metadatax.

In [16]:


cleared = meta_cut_aplose(df_aplose, metadatax) #Remove lines captures outside the instrument submersion.

Export your file to the aplose format. You can change the name of the file to match the project you are working on.

In [17]:
cleared.to_csv(r"U:\APLOSE_Walde_pos.csv", index=False) #You can stock all DPM for a site in a DataAplose file.

### Load data

Use the yaml file to import your aplose files one at a time.

In [None]:
yaml_file = Path(r"resource\CPOD-FPOD_yaml.yml")
data_list = DataAplose.from_yaml(file=yaml_file)
print(data_list.df.head())

### Format choice


Here you need to choose the format in which you want to visualise the positive detections. This aims to replace the functionality available in CPOD.exe.

In [None]:
frq = "D" #This argument will determine the format in which you want to visualise your data. Use "D", "h" or "10min".

In [None]:
resamp = resample_dpm(data_list.df, frq=frq, extra_columns=["deploy.name"])

### Add the feeding buzzes

Import your click details files. All files for one site must be stacked in the same folder.

In [None]:
fb_files = Path(r"U:\fb_fpod_cetiroise_c") #Path to your click details folder.
fb_all = txt_folder(fb_files)
fb_all = feeding_buzz(fb_all, "Marsouin")

fb_all["start_datetime"] = fb_all["start_datetime"].dt.floor(frq)
fb = fb_all.groupby("start_datetime")["Foraging"].sum().reset_index()
fb["start_datetime"] = to_datetime(fb["start_datetime"], utc=True)

In [None]:
d_tot = resamp.merge(fb, on="start_datetime", how="left")
#This function aims to reindent 0 between the positive detections. It will be useful to produce first visualization graphs and use this dataset in R.
d_hour = build_range(d_beg_end, frq)

In [None]:
d_fin = d_hour.merge(d_tot, on=["start_datetime", "deploy.name"], how="left")

In [None]:
d_fin[["DPM","Foraging"]] = d_fin[["DPM","Foraging"]].fillna(0)
print(d_fin.head())

## Add time columns

In [None]:
d_fin["Year"] = d_fin["start_datetime"].dt.year
d_fin["Month"] = d_fin["start_datetime"].dt.month
d_fin['YM'] = d_fin["Year"].astype(str) + '-' + d_fin["Month"].astype(str)
d_fin['YM'] = to_datetime(d_fin['YM'])
d_fin["Day"] = d_fin["start_datetime"].dt.day
d_fin["Hour"] = d_fin["start_datetime"].dt.hour

d_fin["FBR"] = d_fin["Foraging"] / d_fin["DPM"]
d_fin["DPH"] = (d_fin["DPM"] >0).astype(int)

In [None]:
d_fin["FBR"] = d_fin["FBR"].fillna(0)
d_fin.to_csv(r"U:\Hours_DPM_FBUZZ_CETIROISE.csv", index=False)

## Overview

#### *Import datasets*

In [None]:
ca4 = read_csv(r"U:\Hours_DPM_FBUZZ_SiteCA4.csv")
walde = read_csv(r"U:\Hours_DPM_FBUZZ_SiteWalde.csv")

data_c = concat([ca4, walde])
data_c["start_datetime"] = to_datetime(data_c["start_datetime"])
data_c["start_datetime"] = data_c["start_datetime"].apply(lambda x : x.tz_convert("Europe/Paris"))
data_c["Hour"] = data_c["start_datetime"].dt.hour

In [None]:
a = read_csv(r"U:\Hours_DPM_FBUZZ_SiteA.csv")
b = read_csv(r"U:\Hours_DPM_FBUZZ_SiteB.csv")
c = read_csv(r"U:\Hours_DPM_FBUZZ_SiteC.csv")
d = read_csv(r"U:\Hours_DPM_FBUZZ_SiteD.csv")

data_k = concat([a, b, c, d])
data_k["start_datetime"] = to_datetime(data_k["start_datetime"])
data_k["start_datetime"] = data_k["start_datetime"].apply(lambda x : x.tz_convert("Indian/Kerguelen"))
data_k["Hour"] = data_k["start_datetime"].dt.hour

In [None]:
ceti = read_csv(r"U:\Hours_DPM_FBUZZ_CETIROISE.csv")

ceti["start_datetime"] = to_datetime(ceti["start_datetime"])
ceti["start_datetime"] = ceti["start_datetime"].apply(lambda x : x.tz_convert("CET")) #TimeZone Central European Time
ceti["Hour"] = ceti["start_datetime"].dt.hour

#### *Generate graphs*

In [None]:
data = ceti #Precise which dataset you are working with

In [None]:
data = extract_site(data)
y_per = percent_calc(data, "Year")
ym_per = percent_calc(data, "YM")
ym_per["YM"] = to_datetime(ym_per["YM"])
ym_per["Season"] = ym_per["YM"].apply(lambda x: get_season(x)[0])
m_per = percent_calc(data, "Month")
h_per = percent_calc(data, "Hour")
s_per = percent_calc(data)

In [None]:
site_percent(s_per, "%click")

In [None]:
year_percent(y_per, "%click")

In [None]:
ym_percent(ym_per, "%click")

In [None]:
month_percent(m_per, "%buzzes")

In [None]:
hour_percent(h_per, "%click")