# Process ferrybox pCO2 data from HydroC on R/V Svea

### First cell loads modules needed for the processing

In [None]:
%load_ext autoreload
%autoreload 2
import warnings
import json
import os
import datetime as dt
import scipy.stats as stats
import numpy as np
import pandas as pd
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.plotting import show, save
from bokeh.io import output_notebook
from bokeh.layouts import column, row, gridplot
from bokeh.models import CrosshairTool
warnings.filterwarnings('ignore')

import datahandler
import plot_data

### Load information about cruise start and stop

The processing is set up to process data from one cruise at a time. The information on start and stop date is stored in a .json-file named linear_regression_log.json. This file needs to be loaded before the processing. The file can be changed either be opening it and editing or as shown further down.

In [None]:
linear_regression_log = json.load(open("./linear_regression_log.json"))
#datahandler.print_linear_regression_log(linear_regression_log)

In [None]:
data_path = "D:/data/ferrybox/processed_data"
today = dt.datetime.today().strftime('%Y-%m-%d')
# or set another date to read processed data from
# today = "2022-12-05"

In [None]:
### Load  data from the processed data directory
data_collection = {}
for data_source in ["measurements_df_processed", "zerocycles_mean_df_processed"]:
    data_collection[data_source] = {}
    for obj in os.scandir(f"{data_path}/{today}"):
        if not obj.is_file():
            continue
        if not data_source in obj.name:
            continue
        df = pd.read_csv(obj.path, sep="\t",
                    parse_dates=["timestamp"],
                    infer_datetime_format=True,
                    dtype={"Quality": "int"})
        data_collection[data_source][obj.name.strip(f"{data_source}.txt")] = df
    print(data_source, list(data_collection[data_source].keys()))

LENA: 

- Lägg till så man ser state measurement 2 beam signal tillsammans med zero cycle two beam signal.
- Räta linjen från varje expe ska extrapoleras.



#### First plot zeroing data to check for drift.

In [None]:
figure_title = "mean of zerocycles"
y_data = "two_beam_signal"
p = plot_data.datetime_figure(figure_title, y_data_name=y_data)
color = plot_data.color
cruise_no = 0
data_source = "zerocycles_mean_df_processed"
for cruise, data in data_collection[data_source].items():
    timestamps = data["timestamp"].tolist()
    p.circle(
            x=data["timestamp"],
            y=data[y_data],
            color= color[cruise_no],
            fill_alpha=0.6,
            legend_label=f"{cruise}",
            size=2,
        )

    if not "zero" in data_source:
        cruise_no += 1
        continue
    data['slope']
    data = data.assign(linearcorrection_y = lambda x: (x['slope']*x['timenumeric'] + x['intercept']))

    p.line(
        x = data["timestamp"],
        y = data['linearcorrection_y'],
        color = color[cruise_no],
        legend_label = f"linear fit {cruise_no}",
        width = 1,
        )

    cruise_no += 1

p.legend.click_policy = "hide"
p.add_layout(p.legend[0], 'right')
save(p, f"D:/figures/ferrybox/{figure_title}.html")

## plot measurements

Put all measurement data into one dataframe and show by quality and state

In [None]:
data_source = "measurements_df_processed"
data_list = []
for cruise, data in data_collection[data_source].items():
    data_list.append(data)

all_data = pd.concat(data_list)

In [None]:
figure_title = "measurements"
y_data = "pco2"
color = plot_data.color

## first a plot with all data from State measure colored by Quality flag
p1 = plot_data.datetime_figure(figure_title = f"{figure_title} only state measure", y_data_name=y_data)

cruise_no = 0
flagcolor_dict = {flag: i for i, flag in enumerate(all_data['Q_flag'].unique())}
for flag, data in all_data.loc[(all_data["state"] =="State_Measure")].groupby('Q_flag'):
    timestamps = data["timestamp"].tolist()
    p1.circle(
            x=data["timestamp"],
            y=data[y_data],
            color= color[flagcolor_dict[flag]],
            fill_alpha=0.6,
            legend_label=f"{flag}",
            size=2,
        )
    cruise_no += 1
p1.legend.click_policy = "hide"
p1.add_layout(p1.legend[0], 'right')

## next a plot with all data from Quality flag 0 colored by State
p2 = plot_data.datetime_figure(figure_title = f"{figure_title} only flag 0: Operate", y_data_name=y_data)
previous_color = flagcolor_dict[flag]
cruise_no = 0
statecolor_dict = {flag: i+previous_color for i, flag in enumerate(all_data['state'].unique())}

for state, data in all_data.loc[(all_data["Q_flag"] == "0: Operate")].groupby('state'):
    timestamps = data["timestamp"].tolist()
    p2.circle(
            x=data["timestamp"],
            y=data[y_data],
            color= color[statecolor_dict[state]],
            fill_alpha=0.6,
            legend_label=f"{state}",
            size=2,
        )
    cruise_no += 1

p2.legend.click_policy = "hide"
p2.add_layout(p2.legend[0], 'right')

## next a plot with all data from all states colored by Quality flag
p3 = plot_data.datetime_figure(figure_title = f"{figure_title} all states", y_data_name=y_data)
previous_color = flagcolor_dict[flag]
cruise_no = 0

for qflag, data in all_data.groupby('Q_flag'):
    timestamps = data["timestamp"].tolist()
    p3.circle(
            x=data["timestamp"],
            y=data[y_data],
            color= color[flagcolor_dict[qflag]],
            fill_alpha=0.6,
            legend_label=f"{qflag}",
            size=2,
        )
    cruise_no += 1

p3.legend.click_policy = "hide"
p3.add_layout(p3.legend[0], 'right')

## put the two plots into the same grid with sharead axes
p3.x_range = p2.x_range = p1.x_range
p3.y_range = p2.y_range = p1.y_range

grid = gridplot([p1, p2, p3], ncols=1)

save(grid, f"D:/figures/ferrybox/{figure_title}.html")