## Join the updated CDP netCDF with the updated CIP netCDF

The two netcdf files are joined on the CIP sample rate and a new netCDF is created

In [1]:
import xarray as xr
import os

In [8]:
# function for joining TODO: move to separate file (Preprocessing)
def join_cdp_cip_ds(flight,sample_rate, cip_path, cdp_path):
    """ Joins the CIP and CDP netCDFs on CIP time

    Parameters
    ----------
    flight
        A string representing the flightid (islasid) of the files
    sample_rate
        the sample rate (in sek) to use for joining
    cip_path
        The path to where the CIP-netCDFs are located
    cdp_path
        The path to where the CDP-netCDFs are located
    save_path
        The path to where the joint-netCDF will be stored

    Returns
    -------
    microphy_ds
        An xarray with updated attributes of the joined CIP and CDP netCDF.
        Joined on sample time from the CIP netCDF file.
    """

    # Import packages
    import xarray as xr
    from datetime import date
    import glob
    import numpy as np

    # Import local functions
    import functions

    # read in data
    # TODO: handle more than one file for flight
    file1 = f'CIP_update_{sample_rate}s_{flight}.nc'

    cdp_file = glob.glob(cdp_path + f'CDP_updated_{flight}.nc')
    cip_file = glob.glob(cip_path + file1)

    print(f'Joining: {cdp_file[0]} and {cip_file[0]}')

    cdp_ds = xr.open_dataset(cdp_file[0])
    cip_ds = xr.open_dataset(cip_file[0])

    #  Remove milliseconds to ease joining
    cdp_ds = functions.floor_to_sec_res(cdp_ds, 'time')
    cip_ds = functions.floor_to_sec_res(cip_ds, 'time')

    # drop duplicate time steps 
    index = np.unique(cdp_ds.time, return_index = True)[1]
    cdp_ds = cdp_ds.isel(time=index)


    # merge the two xarrays on the times from cip.
    microphy_ds = xr.merge([cip_ds, cdp_ds],compat='override',join='left')
    
    # update attrs for variables with parent file
    for var_name in cdp_ds.data_vars:
        microphy_ds[var_name].attrs.update({"parent file":cdp_file[0].split('/')[-1]})
        microphy_ds[var_name].attrs.update({"instrument":"CDP"})
    for var_name in cip_ds.data_vars:
        microphy_ds[var_name].attrs.update({"parent file":cip_file[0].split('/')[-1]})
        microphy_ds[var_name].attrs.update({"instrument":"CIP"})
    
    # remove dataset attributes
    microphy_ds = microphy_ds.drop_attrs(deep = False)
    
    # set new dataset attributes
    microphy_ds.attrs['safireid']=cip_ds.attrs['safireid']
    microphy_ds.attrs['islasid']=cip_ds.attrs['islasid'] #NB! duplicated!
    microphy_ds.attrs['parent files']=[cip_file[0].split('/')[-1],cdp_file[0].split('/')[-1]]
    microphy_ds.attrs['date_modified'] = date.today().strftime("%Y-%m-%d")
    microphy_ds.attrs['Joint sample rate (sek)'] = cip_ds.attrs['RATE'] # Todo make check to use the largest value (should always be CIP though)
    

    # calculate new SV for CDP (that covers the sample rate used for CIP)
    # Sample volume: sample area SA * TAS * sample time
    sa = float(cdp_ds.attrs['Sample Area (mm^2)'])/(1000*1000) # adjust from mm² to m²
    st = float(microphy_ds.attrs['Joint sample rate (sek)']) # sample time for the joint dataset
    microphy_ds['SV_CDP'] = sa * microphy_ds['TAS'] * st
    microphy_ds['SV_CDP'].attrs['name'] = 'Sample volume, joint sample rate'
    microphy_ds['SV_CDP'].attrs['unit'] = 'm^3'
    microphy_ds['SV_CDP'].attrs['description'] = 'Sample volume calculated with joint sample rate (sample area SA * TAS redused * sample time)'
    microphy_ds['SV_CDP'].attrs['parent variables'] = ['TAS']
    microphy_ds['SV_CDP'].attrs['parent attributes'] =  ['Joint sample rate (sek)', 'CDP sample area']
    microphy_ds['SV_CDP'].attrs['CDP sample area'] = cdp_ds.attrs['Sample Area (mm^2)']
    microphy_ds['SV_CDP'].attrs['CDP sample area unit'] = 'mm²'

    # set the islas id as a coordinate
    islasid = cip_ds.attrs['islasid']
    #microphy_ds = microphy_ds.expand_dims('islasid')
    #microphy_ds['islasid'] = ('islasid',[islasid])
    microphy_ds = microphy_ds.assign_coords({'islasid':islasid})

    # close netcdf files
    cip_ds.close
    cdp_ds.close
    print('...done')

    return microphy_ds

In [9]:
# --- File paths

# main paths
main_path = '/home/ninalar/Documents/MC2'
cdp_path = main_path + '/Results_2022-islas/Processed/CDP_processed/'
cip_path = main_path + '/Results_2022-islas/Processed/CIP_processed/'
savepath = main_path + '/Results_2022-islas/Processed/ISLAS_processed/'

# sample rate to join on
sample_rate = 5

In [10]:
flights = ['IS22-02','IS22-03','IS22-04','IS22-05','IS22-06','IS22-07','IS22-08','IS22-09','IS22-10','IS22-11']

for flight in flights:
    print(flight)
    microphy_ds = join_cdp_cip_ds(flight,sample_rate,cip_path,cdp_path) # join the cdp and the cip file for the flight
    print(sample_rate)
    # write to netcdf file
    filename = f'microphy_{sample_rate}s_{flight}.nc'
    file_save_path = savepath + filename
    microphy_ds.to_netcdf(path=file_save_path, mode='w', encoding={'time':{'zlib':True}}, format = 'NETCDF4', engine='netcdf4')

IS22-02
Joining: /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed/CDP_updated_IS22-02.nc and /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CIP_processed/CIP_update_5s_IS22-02.nc
...done
5
IS22-03
Joining: /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed/CDP_updated_IS22-03.nc and /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CIP_processed/CIP_update_5s_IS22-03.nc
...done
5
IS22-04
Joining: /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed/CDP_updated_IS22-04.nc and /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CIP_processed/CIP_update_5s_IS22-04.nc
...done
5
IS22-05
Joining: /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed/CDP_updated_IS22-05.nc and /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CIP_processed/CIP_update_5s_IS22-05.nc
...done
5
IS22-06
Joining: /home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed/CDP_updated_IS22-06.nc and /

In [5]:
# read in again one of the files:
test = xr.open_dataset(file_save_path)
test