# This Notebook loads all data file from the Western Boundary Time Series and safes them at a new directory

## Import all necessary packages

In [1]:
import numpy as np
import pandas as pd
import os
import xarray as xr
import datetime
import matplotlib.pyplot as plt
import yaml

In [2]:
### Import the functions to load the calibration files and the 
from load_data import load_cal_files, load_vel_files, merge_datasets
from load_data import missing_datetime_2005_05 as mdt

## Print basepath and safepath that are defined in the configuration file. The basepath should contain the data of the WBTS and the safepath is the directory here the created files will be stored in

In [3]:
### import basepath from mission_config.yaml
with open('load_data/config.yaml', 'r') as file:
        config = yaml.safe_load(file)
basepath = config['basepath']
safepath = config['safepath']
print('Basepath: ',basepath ,'\nSafepath: ',safepath)

Basepath:  /Users/tillmoritz/Desktop/Work/WBTSData 
Safepath:  /Users/tillmoritz/Desktop/Work/WBTSData/Created_files


### Define all directories of the calibration data

In [4]:
dir_list_ADCP = merge_datasets.dir_list_ADCP(basepath)
dir_list_CTD = merge_datasets.dir_list_CTD(basepath)


### make directories within the safepath for the ADCP, CTD and the merge datasets

In [5]:
os.makedirs(os.path.join(safepath, 'CTD'), exist_ok=True)
os.makedirs(os.path.join(safepath, 'ADCP'), exist_ok=True)
os.makedirs(os.path.join(safepath, 'Merged'), exist_ok=True)

## Create all dataset for each year and safe it in the dedicated file in safepath

In [6]:
### save the data for each individual year
for i in dir_list_CTD:
    ds = load_cal_files.create_Dataset(i)
    file_name = 'WBTS_' + i.split('GC_')[1][:7] + '_CTD.nc'
    if os.path.exists(safepath + '/CTD/' + file_name):
        os.remove(safepath + '/CTD/' + file_name)
    ds.to_netcdf(safepath + '/CTD/' + file_name)
    print('Saved: ', file_name)

Saved:  WBTS_2001_04_CTD.nc
Saved:  WBTS_2002_06_CTD.nc
Saved:  WBTS_2003_02_CTD.nc
Saved:  WBTS_2004_09_CTD.nc
Saved:  WBTS_2005_05_CTD.nc
Saved:  WBTS_2005_09_CTD.nc
Saved:  WBTS_2006_03_CTD.nc
Saved:  WBTS_2006_09_CTD.nc
Saved:  WBTS_2007_03_CTD.nc
Saved:  WBTS_2007_09_CTD.nc
Saved:  WBTS_2008_04_CTD.nc
Saved:  WBTS_2008_09_CTD.nc
Saved:  WBTS_2009_04_CTD.nc
Saved:  WBTS_2009_11_CTD.nc
Saved:  WBTS_2010_03_CTD.nc
Saved:  WBTS_2011_04_CTD.nc
Saved:  WBTS_2012_02_CTD.nc
Saved:  WBTS_2012_09_CTD.nc
Saved:  WBTS_2013_02_CTD.nc
Saved:  WBTS_2014_03_CTD.nc
Saved:  WBTS_2015_02_CTD.nc
Saved:  WBTS_2015_10_CTD.nc
Saved:  WBTS_2016_02_CTD.nc
Saved:  WBTS_2017_05_CTD.nc
Saved:  WBTS_2018_02_CTD.nc
Saved:  WBTS_2018_11_CTD.nc
Saved:  WBTS_2019_12_CTD.nc
Saved:  WBTS_2021_02_CTD.nc
Saved:  WBTS_2023_02_CTD.nc


In [7]:
for i in dir_list_ADCP:
    ds = load_vel_files.create_Dataset(i)
    file_name = 'WBTS_' + i.split('GC_')[1][:7] + '_ADCP.nc'
    if os.path.exists(safepath + '/ADCP/' + file_name):
        os.remove(safepath + '/ADCP/' + file_name)
    ds.to_netcdf(safepath + '/ADCP/' + file_name)
    print('Saved: ', file_name)

Saved:  WBTS_2001_04_ADCP.nc
Saved:  WBTS_2002_06_ADCP.nc
Saved:  WBTS_2003_02_ADCP.nc
Saved:  WBTS_2004_09_ADCP.nc
Saved:  WBTS_2005_05_ADCP.nc
Saved:  WBTS_2005_09_ADCP.nc
Saved:  WBTS_2006_03_ADCP.nc
Saved:  WBTS_2006_09_ADCP.nc
Saved:  WBTS_2007_03_ADCP.nc
Saved:  WBTS_2007_09_ADCP.nc
Saved:  WBTS_2008_04_ADCP.nc
Saved:  WBTS_2008_09_ADCP.nc
Saved:  WBTS_2009_04_ADCP.nc
Saved:  WBTS_2009_11_ADCP.nc
Saved:  WBTS_2010_03_ADCP.nc
Saved:  WBTS_2011_04_ADCP.nc
Saved:  WBTS_2012_02_ADCP.nc
Saved:  WBTS_2012_09_ADCP.nc
Saved:  WBTS_2013_02_ADCP.nc
Saved:  WBTS_2014_03_ADCP.nc
Saved:  WBTS_2015_02_ADCP.nc
Saved:  WBTS_2015_10_ADCP.nc
Saved:  WBTS_2016_02_ADCP.nc
Saved:  WBTS_2017_05_ADCP.nc
Saved:  WBTS_2018_02_ADCP.nc
Saved:  WBTS_2018_11_ADCP.nc


### merge and save the datasets of ADCP and CTD for each year

In [8]:
### load CTD and ADCP datasets for years having both data and merge them
for cal_dir in dir_list_CTD:
    year = cal_dir.split('GC_')[1][:7]
    for vel_dir in dir_list_ADCP:
        if year in vel_dir:
            print('Merging CTD and ADCP data for year: ', year)
            merged_ds = merge_datasets.merge_datasets(cal_dir, vel_dir)
            file_name = 'WBTS_' + year + '_CTD_LADCP.nc'
            merged_ds.to_netcdf(safepath + '/Merged/' + file_name)
        else:
            continue

Merging CTD and ADCP data for year:  2001_04
Merging CTD and ADCP data for year:  2002_06
Merging CTD and ADCP data for year:  2003_02
Merging CTD and ADCP data for year:  2004_09
Merging CTD and ADCP data for year:  2005_05
Merging CTD and ADCP data for year:  2005_09
Merging CTD and ADCP data for year:  2006_03
Merging CTD and ADCP data for year:  2006_09
Merging CTD and ADCP data for year:  2007_03
Merging CTD and ADCP data for year:  2007_09
Merging CTD and ADCP data for year:  2008_04
Merging CTD and ADCP data for year:  2008_09
Merging CTD and ADCP data for year:  2009_04
Merging CTD and ADCP data for year:  2009_11
Merging CTD and ADCP data for year:  2010_03
Merging CTD and ADCP data for year:  2011_04
Merging CTD and ADCP data for year:  2012_02
Merging CTD and ADCP data for year:  2012_09
Merging CTD and ADCP data for year:  2013_02
Merging CTD and ADCP data for year:  2014_03
Merging CTD and ADCP data for year:  2015_02
Merging CTD and ADCP data for year:  2015_10
Merging CT