In [1]:
import xarray as xr
import pandas as pd
from datetime import date
import read_flight_report as rfr
import functions

In [2]:
# --- Data input
flight = 'IS22-11'

cdp_file = f'../Results_2022-islas/Processed/CDP_processed/CDP_updated_{flight}.nc'
#cip_file = f'../Results_2022-islas/Processed/CIP_processed/CIP_update_5s_{flight}.nc'
cip_file = f'../Results_2022-islas/temp/allin_off/Processed/CIP_update_notallin_{flight}.nc'
cip_file = f'../Results_2022-islas/temp/with_MET/{flight}.nc'

#savepath = f'../Results_2022-islas/Processed/ISLAS_processed/microphy_5s_{flight}.nc'
savepath = f'../Results_2022-islas/temp/with_MET/ISLAS_processed/microphy_with_met_{flight}.nc'

cdp_ds = xr.open_dataset(cdp_file)
cip_ds = xr.open_dataset(cip_file)

In [4]:
cdp_ds

In [5]:
#  Remove milliseconds to ease joining
cdp_ds = functions.floor_to_sec_res(cdp_ds, 'time')
cip_ds = functions.floor_to_sec_res(cip_ds, 'time')
# remove any unwanted duplicate values from cdp
cdp_ds = cdp_ds.sel(time=~cdp_ds.get_index("time").duplicated())

In [6]:
# check for duplicates
#cdp_ds.sel(time=cdp_ds.get_index('time').duplicated())

In [7]:

# merge the two xarrays on the times from cip.
microphy_ds = xr.merge([cip_ds, cdp_ds],compat='override',join='left')

# update attrs for variables with parent file
for var_name in cdp_ds.data_vars:
    microphy_ds[var_name].attrs.update({"parent file":cdp_file.split('/')[-1]})
for var_name in cip_ds.data_vars:
    microphy_ds[var_name].attrs.update({"parent file":cip_file.split('/')[-1]})

# check if SV (sample volume for CIP exists, drop if true) KEEP FOR NOW!!
#if 'SV' in microphy_ds:
 #   microphy_ds = microphy_ds.drop_vars('SV')

In [8]:
microphy_ds

In [180]:
# remove dataset attributes
microphy_ds = microphy_ds.drop_attrs(deep = False)

# set new dataset attributes
microphy_ds.attrs['safireid']=cip_ds.attrs['safireid']
microphy_ds.attrs['islasid']=cip_ds.attrs['islasid']
microphy_ds.attrs['parent files']=[cip_file.split('/')[-1],cdp_file.split('/')[-1]]
microphy_ds.attrs['date_modified'] = date.today().strftime("%Y-%m-%d")
microphy_ds.attrs['Joint dataset sample rate'] = cip_ds.attrs['RATE'] # Todo make check to use the largest value (should always be CIP though)
microphy_ds.attrs['CDP sample area'] = cdp_ds.attrs['Sample Area (mm^2)']
microphy_ds.attrs['CDP sample area unit'] = 'mm²'

In [181]:
# calculate new SV for CDP (that covers the sample rate used for CIP)
# Sample volume: sample area SA * TAS redused * sample time
sa = float(microphy_ds.attrs['CDP sample area'])/(1000*1000) # adjust from mm² to m²
st = float(microphy_ds.attrs['Joint dataset sample rate']) # sample time for the joint dataset
microphy_ds['SV_CDP'] = sa * microphy_ds['TAS reduce'] * st

In [182]:



# calculate Total Water content
microphy_ds['TWC'] = microphy_ds['LWC corr']+microphy_ds['IWC100']
microphy_ds['TWC'].attrs['longname']='Total Water Content'
microphy_ds['TWC'].attrs['unit']=microphy_ds['LWC corr'].attrs['unit']
microphy_ds['TWC'].attrs['description']='TWC calculated from CIP IWC and CDP LWC, assuming all WC from CIP is ice and all WC from CDP is water'
microphy_ds['TWC'].attrs['calculated from']=['LWC corr','IWC100']



# Calculate supercooled liquid fraction for in-cloud values
microphy_ds.attrs['incloud_thres']= 0.01
import numpy as np
microphy_ds['SLF_all'] = microphy_ds['LWC corr']/microphy_ds['TWC']*100
microphy_ds['SLF_all'].attrs['longname']='Supercooled liquid fraction, all points'
microphy_ds['SLF_all'].attrs['unit']='percent'
microphy_ds['SLF_all'].attrs['description']='Supercooled liquid fraction for all observation points, calculated from the Total water content and the Liquid water content'
microphy_ds['SLF_all'].attrs['calculated from']=['LWC corr', 'TWC']

microphy_ds['SLF'] = xr.where(microphy_ds['LWC corr']>microphy_ds.attrs['incloud_thres'],microphy_ds['SLF_all'],np.nan)
microphy_ds['SLF'].attrs['longname']='Supercooled liquid fraction, incloud LWC threshold'
microphy_ds['SLF'].attrs['unit']='percent'
microphy_ds['SLF'].attrs['description']='Supercooled liquid fraction for points where LWC above threshold, calculated from the Total water content and the Liquid water content'
microphy_ds['SLF'].attrs['calculated from']=['LWC corr', 'SLF_all', 'incloud_thres']

microphy_ds['SLF_twc'] = xr.where(microphy_ds['TWC']>microphy_ds.attrs['incloud_thres'],microphy_ds['SLF_all'],np.nan)
microphy_ds['SLF_twc'].attrs['longname']='Supercooled liquid fraction, incloud TWC threshold'
microphy_ds['SLF_twc'].attrs['unit']='percent'
microphy_ds['SLF_twc'].attrs['description']='Supercooled liquid fraction for points where TWC above threshold, calculated from the Total water content and the Liquid water content'
microphy_ds['SLF_twc'].attrs['calculated from']=['SLF_all', 'TWC', 'incloud_thres']

In [183]:
# setting surface conditions

from global_land_mask import globe
sea_ice_file = ''

surface_cond = [] # to store the surface information
#find surface condition at each point in time
for time_index in range(microphy_ds.sizes['time']):
    # Extract lat/lon values at current time
    current_lat = microphy_ds['lat'].isel(time=time_index).values
    current_lon = microphy_ds['lon'].isel(time=time_index).values
    # Determine land/sea condition for current time
    condition = globe.is_land(current_lat, current_lon)
    current_surface_cond = xr.where(condition, 'land', 'sea')
    
    surface_cond.append(current_surface_cond)

# Convert the list to a numpy array with time as the first dimension
surface_cond = np.array(surface_cond)

# Add the data to the dataset with appropriate dimensions
microphy_ds['surface_cond'] = (('time'), surface_cond)

# determine lan/sea distinction from global_land_mask
#condition = globe.is_land(microphy_ds['lat'].values[:, np.newaxis], microphy_ds['lon'].values)
#surface_cond = xr.where(condition, 'land', 'sea')
#microphy_ds['surface_cond']= (('lat', 'lon'), surface_cond)

# set attributes for the surface conditions
microphy_ds['surface_cond'].attrs['longname']='Surface conditions'
microphy_ds['surface_cond'].attrs['standard_name']='area_type'
microphy_ds['surface_cond'].attrs['description']= 'Nature of surface below measurements. Land and sea is determined by global_land_mask package, sea ice from satellite observations'
microphy_ds['surface_cond'].attrs['sea_ice_file']= sea_ice_file



In [184]:
# Get sea ice concentation
#date = str(microphy_ds.time[0].values.astype('datetime64[D]')).replace('-', '')
#date

#sic_ds = xr.open_dataset('sea_ice_satellite/asi-n6250-' + str(date) + '-5.4_regridded.nc')
#sic_ds.close()

# rename data variable and update attributes
#sic_ds['sic'] = sic_ds['__xarray_dataarray_variable__'].assign_attrs(units="Percent", description="Sea Ice Concentration")
#sic_ds = sic_ds.drop_vars(['__xarray_dataarray_variable__'])

#test = sic_ds['sic'].sel(lat=('lat', microphy_ds.lat),lon=('lon',microphy_ds.lon), method='nearest')
#test
#microphy_ds['sic']=sic_ds.sel(lat=microphy_ds.lat, lon=microphy_ds.lon, method='nearest')

In [185]:
microphy_ds['Size'].shape

(30,)

In [186]:
# Set selection variables

In [187]:
# --- in cloud
# in-cloud set based on LWC > threshold
microphy_ds['in_cloud_lwc']=microphy_ds['LWC corr']>microphy_ds.attrs['incloud_thres']
microphy_ds['in_cloud_lwc'].attrs['longname']='In cloud determination, LWC based'
microphy_ds['in_cloud_lwc'].attrs['description']= 'Boolean to determine if in cloud, based on LWC and Korolev et. al. 2022'
microphy_ds['in_cloud_lwc'].attrs['calculated from']=['LWC corr', 'incloud_thres']

# in-cloud set based on TWC > threshold
microphy_ds['in_cloud_twc']=microphy_ds['TWC']>microphy_ds.attrs['incloud_thres']
microphy_ds['in_cloud_twc'].attrs['longname']='In cloud determination, TWC based'
microphy_ds['in_cloud_twc'].attrs['description']= 'Boolean to determine if in cloud, based on TWC and Korolev et. al. 2022'
microphy_ds['in_cloud_twc'].attrs['calculated from']=['TWC', 'incloud_thres']

# in-cloud set based on LWC > threshold OR IWC > threshold
microphy_ds['in_cloud'] = (microphy_ds['LWC corr']>microphy_ds.attrs['incloud_thres'])|(microphy_ds['IWC']>microphy_ds.attrs['incloud_thres'])
microphy_ds['in_cloud'].attrs['longname']='In cloud determination, LWC and IWC based'
microphy_ds['in_cloud'].attrs['description']= 'Boolean to determine if in cloud, based on LWC, IWC and Korolev et. al. 2022'
microphy_ds['in_cloud'].attrs['calculated from']=['LWC corr','IWC', 'incloud_thres']

In [188]:
# --- relevance (relevant cloud or not: mp low cloud)
ds = microphy_ds

relevance_array = np.full(ds.sizes['time'], 'No relevance', dtype=object) # initialize np-array for relevance information


# Set conditions for the relevant clouds per flight
if ds.attrs['islasid']=='IS22-02':
    print('IS22-02')
    conds = [(ds['time'] < np.datetime64('2022-03-22 11:58')),
             (ds['time']>= np.datetime64('2022-03-22 11:58')) & (ds['time'] < np.datetime64('2022-03-22 13:55')),
             (ds['time']>= np.datetime64('2022-03-22 13:55'))]

    relevance = ['Upper clouds',
                 'Lower clouds',
                 'Upper clouds']
    
elif ds.attrs['islasid']=='IS22-03':
    print('IS22-03')
    conds = [(ds['time'] < np.datetime64('2022-03-24 08:15')),
             (ds['time']>= np.datetime64('2022-03-24 08:15')) & (ds['time'] < np.datetime64('2022-03-24 10:45')),
             (ds['time']>= np.datetime64('2022-03-24 10:45')) & (ds['time'] < np.datetime64('2022-03-24 11:45')),
             (ds['time']>= np.datetime64('2022-03-24 11:45'))]
    
    relevance = ['Lower clouds, endpoints',
                 'Upper clouds',
                 'Lower clouds',
                 'Lower clouds, endpoints']
    
elif ds.attrs['islasid']=='IS22-04':
    print('IS22-04')
    conds = [(ds['time'] < np.datetime64('2022-03-24 13:29')),
             (ds['time']>= np.datetime64('2022-03-24 13:29')) & (ds['time'] < np.datetime64('2022-03-24 14:34')),
             (ds['time']>= np.datetime64('2022-03-24 14:34'))]
    
    relevance = ['Lower clouds, endpoints',
                 'Lower clouds',
                 'Upper clouds']
    
elif ds.attrs['islasid']=='IS22-05':
    print('IS22-05')
    conds = [(ds['time'] < np.datetime64('2022-03-26 08:20')),
             (ds['time']>= np.datetime64('2022-03-26 08:30')) & (ds['time'] < np.datetime64('2022-03-26 09:42')),
             (ds['time']>= np.datetime64('2022-03-26 09:42'))]
    
    relevance = ['Lower clouds, endpoints',
                 'Upper clouds',
                 'Lower clouds']
    
elif ds.attrs['islasid']=='IS22-06':
    print('IS22-06')
    conds = [(ds['time'] < np.datetime64('2022-03-26 17:00')),
             (ds['time']>= np.datetime64('2022-03-26 17:00')) & (ds['time'] < np.datetime64('2022-03-26 17:30')),
             (ds['time']>= np.datetime64('2022-03-26 17:30'))]
    
    relevance = ['Lower clouds',
                 'Upper clouds',
                 'Lower clouds, endpoints']
    
elif ds.attrs['islasid']=='IS22-07':
    print('IS22-07')
    conds = [(ds['time'] < np.datetime64('2022-03-29 09:50')),
             (ds['time']>= np.datetime64('2022-03-29 09:50')) & (ds['time'] < np.datetime64('2022-03-29 11:40')),
             (ds['time']>= np.datetime64('2022-03-29 11:40'))]

    relevance = ['Upper clouds',
                 'Lower clouds',
                 'Upper clouds']
    
elif ds.attrs['islasid']=='IS22-08':
    print('IS22-08')
    conds = [(ds['time']>= np.datetime64('2022-03-30 14:00')) & (ds['time'] < np.datetime64('2022-03-30 15:00')),
             (ds['time']>= np.datetime64('2022-03-30 15:00')) & (ds['time'] < np.datetime64('2022-03-30 16:00')),
             (ds['time']>= np.datetime64('2022-03-30 16:00'))]
    
    relevance = ['Upper clouds',
                 'Lower clouds',
                 'Upper clouds']

    
elif ds.attrs['islasid']=='IS22-09':
    print('IS22-09')
    conds = [(ds['time'] < np.datetime64('2022-03-31 09:47')),
             (ds['time']>= np.datetime64('2022-03-31 09:47')) & (ds['time'] < np.datetime64('2022-03-31 11:00')),
             (ds['time']>= np.datetime64('2022-03-31 11:00')) & (ds['time'] < np.datetime64('2022-03-31 13:10')),
             (ds['time'] > np.datetime64('2022-03-31 13:10'))]
    
    relevance = ['Lower clouds, endpoints',
                 'Upper clouds',
                 'Lower clouds',
                 'Lower clouds, endpoints']
    
elif ds.attrs['islasid']=='IS22-10':
    print('IS22-10')
    conds = [(ds['time'] < np.datetime64('2022-04-03 07:30')),
             (ds['time']>= np.datetime64('2022-04-03 07:30')) & (ds['time'] < np.datetime64('2022-04-03 09:00')),
             (ds['time']>= np.datetime64('2022-04-03 09:00')) & (ds['time'] < np.datetime64('2022-04-03 10:54')),
             (ds['time']>= np.datetime64('2022-04-03 10:54'))]
    
    relevance = ['Lower clouds, endpoints',
                 'Upper clouds',
                 'Lower clouds',
                 'Lower clouds, endpoints']
    
elif ds.attrs['islasid']=='IS22-11':
    print('IS22-11')
    
    conds = [(ds['time'] < np.datetime64('2022-04-03 12:45')),
             (ds['time']>= np.datetime64('2022-04-03 12:56')) & (ds['time'] < np.datetime64('2022-04-03 15:00')),
             (ds['time']>= np.datetime64('2022-04-03 15:00'))]
    
    relevance = ['Lower clouds, endpoints',
                 'Lower clouds',
                 'Upper clouds']

    
else:
    print('conditions not set')

# Apply conditions to fill in the relevance_array
for cond, rel in zip(conds, relevance):
    relevance_array = np.where(cond, rel, relevance_array)

# Add the 'Relevance' data variable to the dataset
microphy_ds['Relevance'] = ('time', relevance_array)


IS22-11


In [189]:
microphy_ds['Size'].shape

(30,)

In [190]:
# write to netcdf file
microphy_ds.to_netcdf(path=savepath, mode='w', encoding={'time':{'zlib':True}}, format = 'netCDF4', engine='netcdf4')
cip_ds.close
cdp_ds.close

<bound method DataWithCoords.close of <xarray.Dataset> Size: 7MB
Dimensions:                 (time: 14447, CDP_Bin: 30)
Coordinates:
    lat                     (time) float32 58kB ...
    lon                     (time) float32 58kB ...
    alt                     (time) float32 58kB ...
  * CDP_Bin                 (CDP_Bin) int64 240B 1 2 3 4 5 6 ... 26 27 28 29 30
  * time                    (time) datetime64[ns] 116kB 2022-04-03T12:35:35 ....
Data variables: (12/34)
    End Seconds             (time) float64 116kB ...
    Day of Year             (time) float64 116kB ...
    Year                    (time) float64 116kB ...
    Status                  (time) float64 116kB ...
    DOF Reject Counts       (time) float64 116kB ...
    Avg Transit Reject      (time) float64 116kB ...
    ...                      ...
    CDP Bin Particle Count  (time, CDP_Bin) float64 3MB ...
    TAS reduce              (time) float32 58kB ...
    TAS correction factor   (time) float64 116kB ...
    Number

In [191]:
# read in again one of the files:
test = xr.open_dataset(savepath)
test