# Process ferrybox pCO2 data from HydroC on R/V Svea

### First cell loads modules needed for the processing

In [None]:
%load_ext autoreload
%autoreload 2
import warnings
import json
warnings.filterwarnings('ignore')

import datahandler
from read_data import Cruise
from read_data import ProcessData

### Load information about cruise start and stop

The processing is set up to process data from one cruise at a time. The information on start and stop date is stored in a .json-file named linear_regression_log.json. This file needs to be loaded before the processing. The file can be changed either be opening it and editing or as shown further down.

In [None]:
linear_regression_log = json.load(open("./linear_regression_log.json"))
# if you want to view the content of the linear regression log uncomment this line (remove the #)
# datahandler.print_linear_regression_log(linear_regression_log)

### Change period of a cruise

To edit the linear_regression log without opening the json file uncomment the code below and adjust to your needs.

Här visas hur du ändrar i linear_regression_log.

- För att lägga till en rad:
    - *datahandler.modify_linear_regression_log(linear_regression_log, year, id, start, stop, action='add')*
- För att ta bort en rad:
    - *datahandler.modify_linear_regression_log(linear_regression_log, year, id, start, stop, action='remove')*

Linear regression log säger hur data ska delas upp efter datum och i nästa steg när data läses in kommer den att ge ett fel om data för någon period inte finns i mappen som du läser data ifrån. ¨

In [None]:
# year = '2022'
# id = '10_01'
# start = '2021-01-03'
# stop = '2021-01-14'

# datahandler.modify_linear_regression_log(linear_regression_log, year, id, # start, stop, action='remove')
# json.dump(linear_regression_log, open("./linear_regression_log.json", 'w'))
# datahandler.print_linear_regression_log(linear_regression_log)

### Read, process and save.

- Read raw data (default path is D:/data/ferrybox)
- Save raw data with all parameters together, one file for zeroing data (1 sec resolution) and one for measurements (1 min resolution) (default path is D:/data/ferrybox/processed_data).
- Process raw data and save processed data

In [None]:
data_path = "D:/data/ferrybox" # change this if your raw data is stored elsewhere
path_co2 = "DeviceData/CO2FT_A"
save_processed_data = f"{data_path}/processed_data"
processed_data = 0
for year, year_log in linear_regression_log.items():
    if int(year) != 2022:
        continue
    path = f"{data_path}/{str(year)}/{path_co2}"
    for cruise_key, my_expedition in year_log.items():
        if "10_0x" in cruise_key:
            continue
        for cruise_period in my_expedition["cruise_period"]:
            try:
                cruise = Cruise(path, cruise_period, data_save_path=save_processed_data)
                measurement_data, ZeroCycle = cruise.get_data_package()
            except AssertionError as e:
                print(e)
                continue
            process = ProcessData(data_save_path=save_processed_data, measurements = measurement_data, zerocycles = ZeroCycle, regression_period = cruise_period)
            process.process_data()
            if process.valid:
                processed_data += 1
            else:
                print(f"invalid data for {year}, {cruise_key}, {cruise_period}")
