## Phase determination

In [1]:
# imports from packages

#import pandas as pd
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
#from matplotlib.gridspec import GridSpec # gridspec for nested subfigures
import matplotlib.dates as mdates
import glob
import os

import data_selection
import create_vars
import numb_conc # functions relating to number concentration calculations


In [2]:
# --- Global formatting/settings/variables

sample_rate = 5 # alternatives: 1 s, 5 s, 12 s.

# formatting for only showing time on the x-axis for plots per flight
# Use by setting axs[1,0].xaxis.set_major_formatter(formatter) for each axis where only time should show
formatter = mdates.DateFormatter('%H:%M')

In [3]:
# --- Data import

main_path = '/home/ninalar/Documents/MC2/Results_2022-islas/Processed/ISLAS_processed' # regular path
file_struct = f'/microphy_{sample_rate}s*.nc' # structure of cip text-file names

# get all the .nc files in the main path
files = glob.glob(main_path + file_struct)

# Exclude the file containing 'IS22-09' (flew over land)
files_to_exclude = [f for f in files if 'IS22-09' in os.path.basename(f)]
files_to_include = [f for f in files if f not in files_to_exclude]


ds = xr.open_mfdataset(files_to_include, combine='by_coords', combine_attrs='drop_conflicts') # drop the IS22-09 flight

In [18]:
# Main selection of data
pre_ds, info = data_selection.preselect_ds(ds) # do preselection with default values (T>-30, lat>70)
incloud_ds, incloud_desc = data_selection.sel_incloud_values(pre_ds) # select incloud values with default method (LWC_TWC_th)

# Get short and long descriptions
short_desc = info[0]+'_'+ incloud_desc[0]
long_desc = info[1]+incloud_desc[1]

In [21]:
# --- Preparations for altitude based plots

# need to add altitude binning information(based on in-cloud values9
incloud_ds, altitude_bins, bin_labels = create_vars.add_alt_bins(incloud_ds) # add altitude binning information

In [24]:
incloud_ds['IWC100']

Unnamed: 0,Array,Chunk
Bytes,22.09 kiB,7.72 kiB
Shape,"(5655,)","(1976,)"
Dask graph,3 chunks in 31 graph layers,3 chunks in 31 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 22.09 kiB 7.72 kiB Shape (5655,) (1976,) Dask graph 3 chunks in 31 graph layers Data type float32 numpy.ndarray",5655  1,

Unnamed: 0,Array,Chunk
Bytes,22.09 kiB,7.72 kiB
Shape,"(5655,)","(1976,)"
Dask graph,3 chunks in 31 graph layers,3 chunks in 31 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Calculating SLF and phase determination

In [25]:
# calculating SLF
incloud_ds['SLF'] = incloud_ds['LWC corr']/(incloud_ds['LWC corr']+incloud_ds['IWC100'])
incloud_ds['LWC corr'].values

array([1.64504930e-05, 0.00000000e+00, 0.00000000e+00, ...,
       1.09485513e-01, 1.38962296e-05, 2.52436811e-05], shape=(5655,))

In [58]:
# Phase determination: mass-based
# categorize cloud phase based on Korolev et. al. 2003

# list of conditions to select categories
SLF_conds = [
    (incloud_ds['SLF'] < 0.1),
    (incloud_ds['SLF'] > 0.9),
    (incloud_ds['SLF'] >= 0.1) & (incloud_ds['SLF'] <= 0.9)
]
# list of categories to return
phase = [
    'ice',
    'liquid',
    'mixed-phase'
]
# set phase
incloud_ds['phase_mass']=np.select(SLF_conds,phase,"ERROR")
incloud_ds['phase_mass']

# count phase
phase_count_mass = incloud_ds['phase_mass'].to_pandas().value_counts()
print(phase_count_mass)

phase_mass
ice            4262
mixed-phase     905
liquid          488
Name: count, dtype: int64


In [59]:
# Phase determination: Concentration-based
# categorize cloud phase based on Evans et. al. 2003
# CDP NT > 2 cm-3
# CIP NT > 0.1 L-1 converted to m-3: multiply by 1000 = 100 m-3

# list of conditions to select categories
SLF_conds = [
    (incloud_ds['NT100'] > 100),
    (incloud_ds['Number Conc corr'] > 2),
    (incloud_ds['NT100'] > 100) & (incloud_ds['Number Conc corr'] > 2)
]
# list of categories to return
phase = [
    'ice',
    'liquid',
    'mixed-phase'
]
# set phase
incloud_ds['phase_conc']=np.select(SLF_conds,phase,"ERROR")
incloud_ds['phase_conc']

# count phase
phase_count_mass = incloud_ds['phase_conc'].to_pandas().value_counts()
print(phase_count_mass)


phase_conc
ice       5580
liquid      75
Name: count, dtype: int64


In [None]:
# Checking the NT values
# CDP
print(f'CDP NT units: {incloud_ds['Number Conc corr'].attrs['unit']}')
print(f'max: {incloud_ds['Number Conc corr'].values.max()}')
print(f'min: {incloud_ds['Number Conc corr'].values.min()}')



max: 603.1792324653054
min: 0.0
units: #/cm^3


In [6]:
#TODO: move this to file/module. the version in create_vars does not work right now

def lat_2band_select(lat_bands, ds):
    """Function to create masks for two latitude bands defined by the latitudes in lat_bands
      Parameters
    ----------
    ds
        xarray dataset with all microphy values
    lat_bands
        array of 3 values: lat_min, lat_mid, lat_max, defining the edges of the two lat bands

    Returns
    -------
    lat_mask_north
        mask to select values in ds that is within the northern latitude band
    lat_mask_south
        mask to select values in ds that is within the southern latitude band
    count_dict
        dictionary with following information:
            - count_south: number of observations in the southern region
            - count_north: number of observations in the northern region
            - lat_bands: array of the latitudes used to define the two regions

    """

    lat_min = lat_bands[0] # unpack lat selection
    lat_mid = lat_bands[1] # unpack lat selection
    lat_max = lat_bands[2] # unpack lat selection

    # count number of values between different latitudes and add to dictionary
    lat_values = ds['lat'].values  # Access the latitude coordinate
    
    count_dict = {'count_south': ((lat_values >= lat_min) & (lat_values <= lat_mid)).sum().item(),
                  'count_north': ((lat_values >= lat_mid) & (lat_values <= lat_max)).sum().item(),
                 'lat_bands': lat_bands}

    #return count_dict
    # Compute the boolean masks for latitude conditions
    lat_mask_north = (ds['lat'].values < lat_max) & (ds['lat'].values >= lat_mid)
    lat_mask_south = (ds['lat'].values < lat_mid) & (ds['lat'].values >= lat_min)

    # Example of filtering a dataset on masks
    #ds_filtered_north = ds.where(lat_mask_north, drop=True)
    #ds_filtered_south = ds.where(lat_mask_south, drop=True)

    #print(f'number of values in 2 bands defined by {lat_bands}:')
    #print(f'count_south: {len(ds_filtered_south.lat)},count_north: {len(ds_filtered_north.lat)}')
    
    return lat_mask_north, lat_mask_south, count_dict


In [7]:
# Mapping variables and set region

# dataset coordinate limits
lon_min = incloud_ds.lon.values.min() 
lon_max = incloud_ds.lon.values.max()
lat_min = incloud_ds.lat.values.min() 
lat_max = incloud_ds.lat.values.max()
lat_mid = lat_min + ((lat_max-lat_min)/2) # separation point for the southern marine and northern marine

# --- Split for southern marine, northern marine and land datasets (in cloud)

# set lat-bands as boundaries for regions (as an array)
lat_bands = [lat_min, lat_mid, lat_max]

# Get the mask to use to select northern and southern band data
lat_mask_north, lat_mask_south, set_val_count_dict = lat_2band_select(lat_bands, ds)

# set the value of region based on the two masks
incloud_ds['region'] = xr.where(lat_mask_north, 'north', xr.where(lat_mask_south, 'south', 'none'))

# drop values where region is 'none'
no_region_mask = (incloud_ds['region']=='none').compute()
incloud_ds = incloud_ds.where(~no_region_mask, drop=True)

In [8]:
# separate Northern and Southern Marine data
# - Northern marine
north_mask = (incloud_ds['region']=='north').compute()
north_ds = incloud_ds.where(north_mask, drop = True)

# - Southern marine
south_ds = incloud_ds.where(incloud_ds['region']=='south', drop = True)
