In [1]:
from pathlib import Path

import pytz
from pandas import (
    concat,
    read_csv,
    to_datetime,
)

from post_processing.dataclass.data_aplose import DataAplose
from post_processing.utils.fpod_utils import cpod2aplose, fpod2aplose, dpm_to_dp10m, dpm_to_dph, dpm_to_dpd, fb_folder,csv_folder, meta_cut_aplose, date_format,extract_site,percent_calc,site_percent, year_percent, ym_percent, month_percent, hour_percent, actual_data, build_hour_range
from post_processing.utils.core_utils import json2df,get_season

### Pre-processing



In [3]:
data = csv_folder(r"U:\D")
print(data.head())

df_0 = data.dropna()

                 File          ChunkEnd  DPM  Nall  MinsOn  \
0  POD3042 file01.CP3  21/10/2019 12:11    0     0       0   
1  POD3042 file01.CP3  21/10/2019 12:12    0     0       0   
2  POD3042 file01.CP3  21/10/2019 12:13    0     0       0   
3  POD3042 file01.CP3  21/10/2019 12:14    0     0       0   
4  POD3042 file01.CP3  21/10/2019 12:15    0     0       0   

            deploy.name  
0  Site D Simone_Phase1  
1  Site D Simone_Phase1  
2  Site D Simone_Phase1  
3  Site D Simone_Phase1  
4  Site D Simone_Phase1  


In [5]:
d_beg_end = actual_data(df_0, col_timestamp="ChunkEnd")

In [6]:
df_1 = df_0[df_0["DPM"] !=0 ]

## APLOSE format

#### *CPOD*

In [None]:
df_aplose = cpod2aplose(df_1, pytz.utc, "Site D Simone", "Commerson", extra_columns=["deploy.name"])
print(df_aplose.head())

#### *FPOD*

In [None]:
df_aplose = fpod2aplose(df_1, pytz.utc, "CETIROISE", "Marsouin")
print(df_aplose.head())

## Remove non usable lines

In [None]:
json = Path(r"C:\Users\fouinel\Downloads\deployment_kerguelen.json")
metadatax = json2df(json_path=json)

metadatax["deploy.name"] = (metadatax["site.name"].astype(str) + "_" +
                           metadatax["campaign.name"].astype(str))

cleared = meta_cut_aplose(df_aplose, metadatax)

In [None]:
cleared.to_csv(r"U:\APLOSE_D.csv", index=False)

## Load data


In [None]:
yaml_file = Path(r"resource\CPOD-FPOD_yaml.yml")
data_list = DataAplose.from_yaml(file=yaml_file)
print(data_list.df.head())

## Format choice


In [None]:
dp10 = dpm_to_dp10m(data_list.df, extra_columns=["deploy.name"])
dp10 = date_format(dp10)

In [None]:
dph = dpm_to_dph(data_list.df, extra_columns=["deploy.name"])
dph = date_format(dph)

In [None]:
dpd = dpm_to_dpd(data_list.df, extra_columns=["deploy.name"])
dpd = date_format(dpd)

## Add the feeding buzzes

In [None]:
fb_all = fb_folder(r"U:\fb_D_NBHF", "Commerson")
fb_all["start_datetime"] = fb_all["start_datetime"].dt.floor("h")
fb = fb_all.groupby("start_datetime")["Foraging"].sum().reset_index()


In [None]:
d_hour = build_hour_range(d_beg_end)
d_tot = dph.merge(fb, on="start_datetime", how="left")

In [None]:
d_hour[["DPM","Foraging"]] = d_hour[["DPM","Foraging"]].fillna(0)
print(d_hour.head())

## Add time columns

In [None]:
d_hour["Year"] = d_hour["start_datetime"].dt.year
d_hour["Month"] = d_hour["start_datetime"].dt.month
d_hour['YM'] = d_hour['Year'].astype(str) + '-' + d_hour['Month'].astype(str)
d_hour['YM'] = to_datetime(d_hour['YM'])
d_hour["Day"] = d_hour["start_datetime"].dt.day
d_hour["Hour"] = d_hour["start_datetime"].dt.hour

d_hour["FBR"] = d_hour["Foraging"] / d_hour["DPM"]
d_hour["DPH"] = (d_hour["DPM"] >0).astype(int)

In [None]:
d_hour["FBR"] = d_hour["FBR"].fillna(0)
d_hour.to_csv(r"U:\Hours_DPM_FBUZZ_SiteD.csv", index=False)

## Overview

In [None]:
ca4 = read_csv(r"U:\Hours_DPM_FBUZZ_SiteCA4.csv")
walde = read_csv(r"U:\Hours_DPM_FBUZZ_SiteWalde.csv")

data_k = concat([ca4, walde])
data_k['YM'] = data_k['Year'].astype(str) + '-' + data_k['Month'].astype(str)
data_k['YM'] = to_datetime(data_k['YM'])

In [None]:
a = read_csv(r"U:\Hours_DPM_FBUZZ_SiteA.csv")
b = read_csv(r"U:\Hours_DPM_FBUZZ_SiteB.csv")
c = read_csv(r"U:\Hours_DPM_FBUZZ_SiteC.csv")
d = read_csv(r"U:\Hours_DPM_FBUZZ_SiteD.csv")

data_k = concat([a, b, c, d])
data_k["start_datetime"] = to_datetime(data_k["start_datetime"])
data_k["start_datetime"] = data_k["start_datetime"].apply(lambda x : x.tz_convert("Indian/Kerguelen"))
data_k["Hour"] = data_k["start_datetime"].dt.hour

In [None]:
data_k = extract_site(data_k)
y_per = percent_calc(data_k, "Year")
ym_per = percent_calc(data_k, "YM")
ym_per["YM"] = to_datetime(ym_per["YM"])
ym_per["Season"] = ym_per["YM"].apply(lambda x: get_season(x)[0])
m_per = percent_calc(data_k, "Month")
h_per = percent_calc(data_k, "Hour")
s_per = percent_calc(data_k)

In [None]:
site_percent(s_per, "%click")

In [None]:
year_percent(y_per, "%click")

In [None]:
ym_percent(ym_per, "%click")

In [None]:
month_percent(m_per, "%buzzes")

In [None]:
hour_percent(h_per, "%click")