# Create netcdf from cdp data

Status 'Turn into script'

- read_chunky_csv OK
- resolve_date OK
- imports OK
- read_nav removed
- cdp_df_to_netcdf OK
- separate paths and struct information for reuse OK


TODO: 
- update metadata
- pull the metadata definitions outside of this program

In [1]:
# imports from packages
import pandas as pd
import xarray as xr
import numpy as np
import warnings
import glob # allows for wildcards in filemanagement
import os # get a list of all directories/files
import re
from datetime import datetime

# imports from files
from utils.flight_utils import get_safire_flightid
from utils.nc_utils import read_chunky_csv, binned_cdp_to_xds, cdp_df_to_netcdf, cdp_to_df, add_cdp_df_to_xds
from utils.func_nc import resolve_date, floor_to_sec_res
from utils.cdp_utils import create_derived_vars

# surpress UserWarning connected to timezoneless np.datetime
warnings.filterwarnings("ignore", message="no explicit representation of timezones available for np.datetime64")
# surpress UserWrning connected to boolean series indexing (creating dataframe with null values)
warnings.filterwarnings("ignore", message="Boolean Series key will be reindexed to match DataFrame index.")


In [2]:
# --- Read in data to dataframes

# -- Paths to datafiles
main_path = '/home/ninalar/Documents/MC2/2022-islas/' # Local disk path to nav data:
pads_path = '/microphy/pads/' # path to pads (CIP and CDP data)
cdp_main_path = main_path + pads_path
#path_store = '/home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed/' # where to store the netcdfs
path_store = '/home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed_test/' # where to store the netcdfs # remove when checked

# structure of file names (for access)
file_struct = {'cip':'/*CIP.nc',
               'cdp':'/02CDP*.csv',
               'nav_tdyn':'/*_TDYN_*.nc',
               'nav_nav': '/*_NAV_*.nc',
               'flight_rep': '/*MAIN*.csv'} # Flight report file name

flights, safire_to_islas = get_safire_flightid(main_path)

In [3]:
# get the nav information from the given flight
# - need to loop over each fligth and extract only the dimensions and the coordinates, and attributes from the nav file
# - for safireid with more than one islasid, separate into two

flight = 'as220008'

# -- Get NAV files
# get the nav file from the given flight
nav_file = glob.glob(main_path + flight + file_struct['nav_tdyn'])

nav_xds = xr.open_dataset(nav_file[0]) # the nav file xarray

# NAV preparations: drop duplicate time steps
index = np.unique(nav_xds.time, return_index = True)[1]
nav_xds = nav_xds.isel(time=index)
nav_xds = floor_to_sec_res(nav_xds,'time') # floor the times to sec for easier joining

# drop variables from nav
nav_xds = nav_xds[['TAS1']] # only keeps TAS

# update attributes of data variables and coordinates with original file id
for var_name, variable in nav_xds.data_vars.items():
    nav_xds[var_name].attrs['comment'] = f'source id: {nav_xds.attrs["id"]}' # update data variables

for coord_name, coordinate in nav_xds.coords.items():
    nav_xds[coord_name].attrs['comment'] = f'source id: {nav_xds.attrs["id"]}' # update coordinates

# filter out relevant attributes for resulting dataset
attrs_relevant = ['flight_id','project','platform','source','product_version','Conventions']

filtered_attrs = {k: v for k, v in nav_xds.attrs.items() if k in attrs_relevant} # new dictionary of relevant attributes
nav_xds.attrs = filtered_attrs # set new attrs

# ---- Get CDP data    
# path to CDP data
path_in = main_path + flight + pads_path

# Get a list of all the CDPfiles in the directory (also look in subdirectories)
filelist = glob.glob(path_in + '**' + file_struct['cdp'], recursive=True)

cdp_df, filenames, meta_df, chan_list, pads_df, bins_df = cdp_to_df(filelist, flight)


# separate out the CDP_Bin columns and the time for separate handling
cdp_bin_df = cdp_df.loc[:,cdp_df.columns.str.startswith('CDP Bin')|(cdp_df.columns == 'time')]

# separate out the other columns
cdp_df = cdp_df.loc[:,~cdp_df.columns.str.startswith('CDP Bin')]

# --- Get the flight report for the given flight
pattern = os.path.join(main_path,f'{flight}/CRvol{file_struct['flight_rep']}')
file = glob.glob(pattern)

# Find landing and takeoff time. Will in most instances result in
# single row dataframes: takeoffs, landings
fr_list = read_chunky_csv(file[0])                                              # get information from first file as chuncks
headers = pd.DataFrame(fr_list[1]).iloc[0]                                      # Extract headers
report_df  = pd.DataFrame(pd.DataFrame(fr_list[1]).values[1:], columns=headers) # Create df
landings = report_df[report_df['title']=='landing']                             # extract landing(s)
takeoffs = report_df[report_df['title']=='takeoff']                             # extract takeoff(s)


Reading: /home/ninalar/Documents/MC2/2022-islas/as220008/microphy/pads/20220324080247/02CDP 20220324080247.csv
Reading: /home/ninalar/Documents/MC2/2022-islas/as220008/microphy/pads/20220324113123/02CDP 20220324113123.csv
Reading: /home/ninalar/Documents/MC2/2022-islas/as220008/microphy/pads/20220324130909/02CDP 20220324130909.csv


In [4]:
# Check if two islasids for the safireid

# if two islasids in the safire id separate the nav_xds in two
if isinstance(safire_to_islas[flight],list):
    # if more than one islasid per safireid, there will be more than one takeoff and landing
    i=0
    while i < len(safire_to_islas[flight]):
        
        # Get takeoff and landing
        landing = datetime.strptime(landings.iloc[i].date, "%Y-%m-%dT%H:%M:%S.%fZ")
        takeoff = datetime.strptime(takeoffs.iloc[i].date, "%Y-%m-%dT%H:%M:%S.%fZ")

        # Get nav_xds and cdp_dfs with only the data between takeoff and landing
        nav_filtered_xds = nav_xds.sel(time=slice(takeoff,landing))
        cdp_filtered_df = cdp_df[(cdp_df.index >= takeoff) & (cdp_df.index <= landing)]
        cdp_filtered_bin_df = cdp_bin_df[(cdp_bin_df.index >= takeoff) & (cdp_bin_df.index <= landing)]

        bins_xds = binned_cdp_to_xds(bins_df, cdp_filtered_bin_df) #turn the binned information into an xarray

        # add cdp information to nav_xds to create cdp_xds
        cdp_xds = add_cdp_df_to_xds(nav_filtered_xds, cdp_filtered_df, meta_df, pads_df) # variables with dimension 'time'
        full_cdp_xds = xr.merge([cdp_xds,bins_xds])

        # update with derived parameters
        full_cdp_xds = create_derived_vars(full_cdp_xds)
        
        full_cdp_xds.attrs['takeoff'] = takeoff.strftime("%Y-%m-%dT%H:%M:%S.%fZ")     # update attributes with takeoff and landing
        full_cdp_xds.attrs['landing'] = landing.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        full_cdp_xds.attrs['islasid'] = safire_to_islas[flight][i] # update attributes with islasid

         # save as netcdf
        filename = path_store + f'CDP_updated_{full_cdp_xds.attrs["islasid"]}.nc'
        print(filename)
        full_cdp_xds.to_netcdf(filename,'w')

        i+=1 #update counter for while loop


else:
    # only one islasid for safireid, go directly to generating the full_xds
    # Process for creating complete xarray:

    landing = datetime.strptime(landings.iloc[0].date, "%Y-%m-%dT%H:%M:%S.%fZ")
    takeoff = datetime.strptime(takeoffs.iloc[0].date, "%Y-%m-%dT%H:%M:%S.%fZ")
    
    bins_xds = binned_cdp_to_xds(bins_df, cdp_bin_df) # Turning the binned information into an xarray to be added to dataset

    # add cdp information to nav_xds to create cdp_xds
    cdp_xds = add_cdp_df_to_xds(nav_xds, cdp_df, meta_df, pads_df)
    full_cdp_xds = xr.merge([cdp_xds,bins_xds])

    # update with derived parameters
    full_cdp_xds = create_derived_vars(full_cdp_xds)

    full_cdp_xds.attrs['takeoff'] = takeoff.strftime("%Y-%m-%dT%H:%M:%S.%fZ")     # update attributes with takeoff and landing
    full_cdp_xds.attrs['landing'] = landing.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
    full_cdp_xds.attrs['islasid'] = safire_to_islas[flight]

     # save as netcdf
    filename = path_store + f'CDP_updated_{full_cdp_xds.attrs["islasid"]}.nc'
    print(filename)
    full_cdp_xds.to_netcdf(filename,'w')



/home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed_test/CDP_updated_IS22-03.nc
/home/ninalar/Documents/MC2/Results_2022-islas/Processed/CDP_processed_test/CDP_updated_IS22-04.nc


In [6]:
# Testing how the netcdf files looks

filepath = f'{path_store}CDP_updated_IS22-04.nc'

cdp_ds = xr.open_dataset(filepath)

In [7]:
cdp_ds