# Load data from WBTS and save in output_dir

Set the `output_dir` in `config.yaml` before starting.

## Import all necessary packages

In [None]:
import numpy as np
import pandas as pd
import os
import xarray as xr
import datetime
import matplotlib.pyplot as plt
import yaml

Note: This assumes you have already installed the package using
``` 
pip install -e .
```

An alternative could be:
```
import pathlib
import sys
script_dir = pathlib.Path().parent.absolute()
parent_dir = script_dir.parents[0]
sys.path.append(str(parent_dir))
sys.path.append(str(parent_dir) + 'load_data')
```

which assumes that you're running the notebook from within `WBTSdata/notebooks/`.


In [None]:
### Import the functions to load the calibration files and the 
from load_data import load_cal_files, load_vel_files, merge_datasets, tools
from load_data import missing_datetime_2005_05 as mdt

## Print basepath and safepath that are defined in the configuration file. The basepath should contain the data of the WBTS and the safepath is the directory here the created files will be stored in

In [None]:
config = tools.get_config()
input_dir = config['input_dir']
output_dir = config['output_dir']
print('Input directory: ',input_dir,'\nOutput directory: ',output_dir)

### Define all directories of the calibration data

In [None]:
dir_list_ADCP = merge_datasets.dir_list_ADCP(input_dir)
dir_list_CTD = merge_datasets.dir_list_CTD(input_dir)


### make directories within the safepath for the ADCP, CTD and the merge datasets

In [None]:
os.makedirs(os.path.join(output_dir, 'CTD'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'ADCP'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'Merged'), exist_ok=True)

## Create all dataset for each year and safe it in the dedicated file in safepath

In [None]:
### save the data for each individual year
# Set the directory for yaml files as the root directory + 'load_data/' --> Could be in 'config/' instead
if 0:
    for i in dir_list_CTD:
        ds = load_cal_files.create_Dataset(i, config)
        file_name = 'WBTS_' + i.split('GC_')[1][:7] + '_CTD.nc'
        if os.path.exists(os.path.join(output_dir, 'CTD', file_name)):
            os.remove(os.path.join(output_dir, 'CTD', file_name))
        ds.to_netcdf(os.path.join(output_dir, 'CTD', file_name))
        print('Saved: ', file_name)

In [None]:
if 0:
    for i in dir_list_ADCP:
        ds = load_vel_files.create_Dataset(i, config)
        file_name = 'WBTS_' + i.split('GC_')[1][:7] + '_ADCP.nc'
        if os.path.exists(os.path.join(output_dir, 'ADCP', file_name)):
            os.remove(os.path.join(output_dir, 'ADCP', file_name))
        ds.to_netcdf(os.path.join(output_dir, 'ADCP', file_name))
        print('Saved: ', file_name)

### merge and save the datasets of ADCP and CTD for each year

In [None]:
### load CTD and ADCP datasets for years having both data and merge them
for cal_dir in dir_list_CTD:
    year = cal_dir.split('GC_')[1][:7]
    for vel_dir in dir_list_ADCP:
        if year in vel_dir:
            print('Merging CTD and ADCP data for year: ', year)
            merged_ds = merge_datasets.merge_datasets(cal_dir, vel_dir)
            file_name = 'WBTS_' + year + '_CTD_LADCP.nc'
            if os.path.exists(os.path.join(output_dir, 'Merged', file_name)):
                os.remove(os.path.join(output_dir, 'Merged', file_name))
                print(f"Deleted existing file: {file_name}")
            merged_ds.to_netcdf(os.path.join(output_dir, 'Merged', file_name))
        else:
            continue