This notebook aims at processing the data used to plot the relative humidity (RH) profiles.

In [10]:
import intake
import os
import numpy as np
import xarray as xr
import dask.array as da
import pandas as pd
import glob
from dask.diagnostics import ProgressBar

## Get the RH data and the clouds data processed in the code process_LW_BCO_data

In [2]:
RH = xr.open_dataset('./../data/RH.nc')['rh'] #this contains relative humidity data from Jan-Feb 2020  (EUREC4A period)

start_time = np.datetime64('2020-01-12T00:00:00')
end_time   = np.datetime64('2020-02-28T23:00:00')
windspeed = xr.open_dataset('./../data/windspeed_eurec4a.nc')['wind_speed']
windspeed_avg = windspeed.sel(range = 1000, method = 'nearest').mean(dim = 'time')

In [29]:
#We load the cloud borders array that has been processed in the code "process_LW_BCO_data.ipynb"
clouds = np.loadtxt('./../data/processed_data/cloud_borders_eurec4a.txt', delimiter=',', dtype='object') 
clouds[:, 0] = np.array(clouds[:, 0], dtype='datetime64[s]')
clouds[:, 1] = np.array(clouds[:, 1], dtype='datetime64[s]')
clouds[:, 2] = np.array(clouds[:, 2], dtype='float64')
clouds[:, 3] = np.array(clouds[:, 3], dtype='float64')
clouds[:, 4] = np.array(clouds[:, 4], dtype='float64')
clouds[:, 5] = np.array(clouds[:, 5], dtype='float64')

#Since MR data is only available during Januray February (EUREC4A period), we restric our clouds during that time
#clouds_time = clouds[(clouds[:, 0] >= start_time) & (clouds[:, 0] <= end_time)]


## anomalies 1d profiles at the specific heights: SCL, CL, TCL

The aim is to prepares the data to plot profiles of RH anomaly as a function of distances for the three defined heights: SCL, CL, TCL

In [5]:
# this function puts NaN where the are clouds to avoid contamination of the profiles by clouds, and be sure that we only look at sourrounding of clouds.
def mask_clouds(RH, clouds):
    for cloud in clouds:
        start_time = np.datetime64(cloud[0])
        end_time = np.datetime64(cloud[1])
        

        # Mask the values in MR for the given time range
        RH = RH.where(~((RH.time >= start_time) & (RH.time <= end_time)), np.nan)
        
    return RH

In [6]:
#We do a pre-selection of clouds such that it runs faster. We take clouds that are longer than 40m and with a cloud base below 1500 (to stay in the boundary layer)
clouds = clouds_time[(clouds_time[:,2] >= 40) & (clouds_time[:,3] <= 1500)]

In [7]:
%%time
masked_RH = mask_clouds(RH, clouds)

CPU times: user 27min 49s, sys: 7.04 s, total: 27min 56s
Wall time: 28min 3s


In [8]:
masked_RH.to_netcdf('./../data/processed_data/1d_anomalies/masked_RH.nc')

In [30]:
masked_RH = xr.open_dataset('./../data/processed_data/1d_anomalies/masked_RH.nc')['rh']

In [31]:
#definition of shallow and deep clouds:
clouds_shallow = clouds[((clouds[:,4] - clouds[:,3]) >= 100) & ((clouds[:,4] - clouds[:,3]) <= 600)]
clouds_deep = clouds[((clouds[:,4] - clouds[:,3]) >= 400) & ((clouds[:,4] - clouds[:,3]) <= 2500)]

In [32]:
# this function is used later to interpolate data where there are gaps.
def interpolate_time(RH, start_time):
    RH['time'] = np.round(np.abs((RH.time - start_time) / np.timedelta64(1, 's')))
    RH_filled = RH.interpolate_na(dim='alt', method='linear', limit=None)

    # Interpolate along the time and altitude dimension
    new_time = np.arange(0, 60*60, 4)
    RH_interpolated = RH_filled.interp(time=new_time)
    return RH_interpolated

In [22]:
def profiles_1_d_alt(RH, clouds):
    """
    This function goes through all clouds. I first computes the RH anomaly at each level It separates data into 6 boxes: first up/downwind, and then it takes the 
    subcloud layer (200m below), the cloud layer and  the topcloud layer (200m above).
    Then it concatenates the data by including info on the cloud (length, base, top,...)


    inputs:
    - RH
    - clouds

    Outputs:
    
    - 6 xarrays (before, after; and bottom, mid, top) with the profiles of RH anomalies as a function of distance to the cloud
    """

    
    delta_time = np.timedelta64(60, 'm')  # time we look before and after the cloud 

    #define the different array where we will store the anomaly profiles of RH
    RH_before_bottom_arrays = [] 
    RH_after_bottom_arrays = [] 
    RH_before_mid_arrays = [] 
    RH_after_mid_arrays = [] 
    RH_before_top_arrays = [] 
    RH_after_top_arrays = [] 
    
    cloud_length = []
    cloud_bottom = []
    cloud_top = []
    cloud_mid = []
    cloud_depth = []

    #Iterate over all the clouds to compute their anomly profile at the three levels
    for cloud_index, cloud_times in enumerate(clouds):
        
        start_time = np.datetime64(cloud_times[0])
        end_time = np.datetime64(cloud_times[1])
        cloud_bottom_i = cloud_times[3]
        cloud_top_i = cloud_times[4]
        cloud_mid_i = cloud_times[5]
        cloud_depth_i = cloud_top_i - cloud_mid_i
        timedelta_before = start_time - delta_time
        timedelta_after = end_time + delta_time
        
        # Create the anomaly (subtracting the mean)
        RH_cloud = RH.sel(time=slice(timedelta_before, timedelta_after))
        RH_anomaly = RH_cloud - RH_cloud.mean(dim='time')

        # For each cloud, we separate into 6: up/down, bottom, mid, top, and then fill the arrays with numbers over time that are means over 
        # the altitude of the box:
        # before the cloud/downwind
        RH_before = RH_anomaly.sel(time=slice(timedelta_before, start_time))
        RH_before_bottom = RH_before.sel(alt=slice(cloud_bottom_i - 200, cloud_bottom_i))
        RH_before_bottom = interpolate_time(RH_before_bottom, start_time)
        RH_before_bottom = RH_before_bottom.mean(dim='alt', skipna=True)
        
        RH_before_mid = RH_before.sel(alt=slice(cloud_bottom_i, cloud_top_i))
        RH_before_mid = interpolate_time(RH_before_mid, start_time)
        RH_before_mid = RH_before_mid.mean(dim='alt', skipna=True)
        
        RH_before_top = RH_before.sel(alt=slice(cloud_top_i, cloud_top_i + 200))
        RH_before_top = interpolate_time(RH_before_top, start_time)
        RH_before_top = RH_before_top.mean(dim='alt', skipna=True)

        RH_before_bottom_arrays.append(lw_down_clouds_dist_approx(RH_before_bottom, windspeed_avg))
        RH_before_mid_arrays.append(lw_down_clouds_dist_approx(RH_before_mid, windspeed_avg))
        RH_before_top_arrays.append(lw_down_clouds_dist_approx(RH_before_top, windspeed_avg))

        # upwind
        RH_after = RH_anomaly.sel(time=slice(end_time, timedelta_after))
        
        RH_after_bottom = RH_after.sel(alt=slice(cloud_bottom_i - 200, cloud_bottom_i))
        RH_after_bottom = interpolate_time(RH_after_bottom, start_time)
        RH_after_bottom = RH_after_bottom.mean(dim='alt', skipna=True)
    
        RH_after_mid = RH_after.sel(alt=slice(cloud_bottom_i, cloud_top_i))
        RH_after_mid = interpolate_time(RH_after_mid, start_time)
        RH_after_mid = RH_after_mid.mean(dim='alt', skipna=True)
        
        RH_after_top = RH_after.sel(alt=slice(cloud_top_i, cloud_top_i + 200))
        RH_after_top = interpolate_time(RH_after_top, start_time)
        RH_after_top = RH_after_top.mean(dim='alt', skipna=True)

        RH_after_bottom_arrays.append(lw_down_clouds_dist_approx(RH_after_bottom, windspeed_avg))
        RH_after_mid_arrays.append(lw_down_clouds_dist_approx(RH_after_mid, windspeed_avg))
        RH_after_top_arrays.append(lw_down_clouds_dist_approx(RH_after_top, windspeed_avg))

        cloud_length.append(clouds[cloud_index][2])
        cloud_bottom.append(clouds[cloud_index][3])
        cloud_top.append(clouds[cloud_index][4])
        cloud_mid.append(clouds[cloud_index][5])
        cloud_depth.append(cloud_depth_i)
            
    #create a new function that combines all the arrays together and includes the informations on the clouds
    def create_combined_xarray(before_arrays, after_arrays):
        combined_before = xr.concat(before_arrays, dim='cloud')
        combined_before.coords['cloud_length'] = ('cloud', cloud_length)
        combined_before.coords['cloud_bottom'] = ('cloud', cloud_bottom)
        combined_before.coords['cloud_mid'] = ('cloud', cloud_mid)
        combined_before.coords['cloud_top'] = ('cloud', cloud_top)
        combined_before.coords['cloud_depth'] = ('cloud', cloud_depth)
    
        combined_after = xr.concat(after_arrays, dim='cloud')
        combined_after.coords['cloud_length'] = ('cloud', cloud_length)
        combined_after.coords['cloud_bottom'] = ('cloud', cloud_bottom)
        combined_after.coords['cloud_mid'] = ('cloud', cloud_mid)
        combined_after.coords['cloud_top'] = ('cloud', cloud_top)
        combined_after.coords['cloud_depth'] = ('cloud', cloud_depth)
    
        return combined_before, combined_after
    
    RH_combined_before_bottom, RH_combined_after_bottom = create_combined_xarray(RH_before_bottom_arrays, RH_after_bottom_arrays)
    RH_combined_before_mid, RH_combined_after_mid = create_combined_xarray(RH_before_mid_arrays, RH_after_mid_arrays)
    RH_combined_before_top, RH_combined_after_top = create_combined_xarray(RH_before_top_arrays, RH_after_top_arrays)
    
    return RH_combined_before_bottom, RH_combined_before_mid, RH_combined_before_top, RH_combined_after_bottom, RH_combined_after_mid, RH_combined_after_top


In [33]:
"""
This function goes through all clouds. I first computes the RH anomaly at each level It separates data into 6 boxes: first up/downwind, and then it takes the 
subcloud layer (200m below), the cloud layer and  the topcloud layer (200m above).
Then it concatenates the data by including info on the cloud (length, base, top,...)

Returns 6 xarrays (before,after; and bottom,mid,top)
"""
def profiles_1_d_alt(MR, clouds):
    delta_time = np.timedelta64(60, 'm') #time we look before and after the cloud 
    
    MR_before_bottom_arrays = [] 
    MR_after_bottom_arrays = [] 
    MR_before_mid_arrays = [] 
    MR_after_mid_arrays = [] 
    MR_before_top_arrays = [] 
    MR_after_top_arrays = [] 
    
    cloud_length = []
    cloud_bottom = []
    cloud_top = []
    cloud_mid = []
    cloud_depth = []
    
    for cloud_index, cloud_times in enumerate(clouds):
        
        start_time = np.datetime64(cloud_times[0])
        end_time = np.datetime64(cloud_times[1])
        cloud_bottom_i = cloud_times[3]
        cloud_top_i = cloud_times[4]
        cloud_mid_i = cloud_times[5]
        cloud_depth_i = cloud_top_i - cloud_mid_i
        timedelta_before = start_time - delta_time
        timedelta_after = end_time + delta_time

        #create the anomaly (subtracting the mean)
        MR_cloud = MR.sel(time = slice(timedelta_before, timedelta_after))
        MR_anomaly = MR_cloud - MR_cloud.mean(dim='time')

        #for each cloud, we separate into 6: up/down, bottom, mid, top, and then fill the arrays with numbers over time that are means over 
        #the altitude of the box:
        #before the cloud/downwind
        MR_before = MR_anomaly.sel(time = slice(timedelta_before, start_time))
        
        MR_before_bottom = MR_before.sel(alt = slice(cloud_bottom_i - 200, cloud_bottom_i))
        MR_before_bottom = interpolate_time(MR_before_bottom,start_time)
        MR_before_bottom = MR_before_bottom.mean(dim='alt', skipna = True)
        
        MR_before_mid = MR_before.sel(alt = slice(cloud_bottom_i , cloud_top_i))
        MR_before_mid = interpolate_time(MR_before_mid,start_time)
        MR_before_mid = MR_before_mid.mean(dim='alt', skipna = True)
        
        MR_before_top = MR_before.sel(alt = slice(cloud_top_i, cloud_top_i + 200))
        MR_before_top = interpolate_time(MR_before_top,start_time)
        MR_before_top = MR_before_top.mean(dim='alt', skipna = True)


        MR_before_bottom_arrays.append(lw_down_clouds_dist_approx(MR_before_bottom, windspeed_avg))
        MR_before_mid_arrays.append(lw_down_clouds_dist_approx(MR_before_mid, windspeed_avg))
        MR_before_top_arrays.append(lw_down_clouds_dist_approx(MR_before_top, windspeed_avg))


        #upwind
        MR_after = MR_anomaly.sel(time = slice(end_time, timedelta_after))
        
        MR_after_bottom = MR_after.sel(alt = slice(cloud_bottom_i - 200, cloud_bottom_i))
        MR_after_bottom = interpolate_time(MR_after_bottom,start_time)
        MR_after_bottom = MR_after_bottom.mean(dim='alt', skipna = True)
        
        MR_after_mid = MR_after.sel(alt = slice(cloud_bottom_i , cloud_top_i))
        MR_after_mid = interpolate_time(MR_after_mid,start_time)
        MR_after_mid = MR_after_mid.mean(dim='alt', skipna = True)
        
        MR_after_top = MR_after.sel(alt = slice(cloud_top_i, cloud_top_i + 200))
        MR_after_top = interpolate_time(MR_after_top,start_time)
        MR_after_top = MR_after_top.mean(dim='alt', skipna = True)


        MR_after_bottom_arrays.append(lw_down_clouds_dist_approx(MR_after_bottom, windspeed_avg))
        MR_after_mid_arrays.append(lw_down_clouds_dist_approx(MR_after_mid, windspeed_avg))
        MR_after_top_arrays.append(lw_down_clouds_dist_approx(MR_after_top, windspeed_avg))

        cloud_length.append(clouds[cloud_index][2])
        cloud_bottom.append(clouds[cloud_index][3])
        cloud_top.append(clouds[cloud_index][4])
        cloud_mid.append(clouds[cloud_index][5])
        cloud_depth.append(cloud_depth_i)


        def create_combined_xarray(before_arrays, after_arrays):
                combined_before = xr.concat(before_arrays, dim='cloud')
                combined_before.coords['cloud_length'] = ('cloud', cloud_length)
                combined_before.coords['cloud_bottom'] = ('cloud', cloud_bottom)
                combined_before.coords['cloud_mid'] = ('cloud', cloud_mid)
                combined_before.coords['cloud_top'] = ('cloud', cloud_top)
                combined_before.coords['cloud_depth'] = ('cloud', cloud_depth)
        
                combined_after = xr.concat(after_arrays, dim='cloud')
                combined_after.coords['cloud_length'] = ('cloud', cloud_length)
                combined_after.coords['cloud_bottom'] = ('cloud', cloud_bottom)
                combined_after.coords['cloud_mid'] = ('cloud', cloud_mid)
                combined_after.coords['cloud_top'] = ('cloud', cloud_top)
                combined_after.coords['cloud_depth'] = ('cloud', cloud_depth)
        
                return combined_before, combined_after
        
    MR_combined_before_bottom, MR_combined_after_bottom = create_combined_xarray(MR_before_bottom_arrays, MR_after_bottom_arrays)
    MR_combined_before_mid, MR_combined_after_mid = create_combined_xarray(MR_before_mid_arrays, MR_after_mid_arrays)
    MR_combined_before_top, MR_combined_after_top = create_combined_xarray(MR_before_top_arrays, MR_after_top_arrays)
    
    return MR_combined_before_bottom, MR_combined_before_mid, MR_combined_before_top, MR_combined_after_bottom, MR_combined_after_mid, MR_combined_after_top

In [27]:
# we convert times into distances with th efollowing function. It has already been defined in the "process_LW_BC_data.ipynb" notebook:
def lw_down_clouds_dist_approx(LW_down_clouds, windspeed_avg):
    """
    Converts the time dimension into distance using the average windspeed. This approximation is justified since the focus is not on a few meters of 
    precision. This average windspeed is enough.
    """
    # Calculate distance from cloud based on time and windspeed
    distance_from_cloud = LW_down_clouds.time * windspeed_avg.item()
    
    # Rename the 'time' dimension to 'distance_from_cloud'
    LW_down_clouds = LW_down_clouds.rename({'time': 'distance_from_cloud'})
    
    # Create a new coordinate 'distance' using the calculated distances
    LW_down_clouds.coords['distance'] = ('distance_from_cloud', distance_from_cloud.data)
    
    # Replace the existing 'time' coordinate with the new 'distance' coordinate
    LW_down_clouds = LW_down_clouds.swap_dims({'distance_from_cloud': 'distance'})
    
    # Add units attribute to the distance coordinate
    LW_down_clouds.coords['distance'].attrs['units'] = 'm'
    
    return LW_down_clouds


In [35]:
%%time
RH_combined_before_bottom, RH_combined_before_mid, RH_combined_before_top, RH_combined_after_bottom, RH_combined_after_mid, RH_combined_after_top = profiles_1_d_alt(masked_RH, clouds_shallow)
dataset = xr.Dataset({
    'RH_combined_before_bottom': RH_combined_before_bottom,
    'RH_combined_before_mid': RH_combined_before_mid,
    'RH_combined_before_top': RH_combined_before_top,
    'RH_combined_after_bottom': RH_combined_after_bottom,
    'RH_combined_after_mid': RH_combined_after_mid,
    'RH_combined_after_top': RH_combined_after_top,
})

dataset.to_netcdf('./../data/processed_data/1d_anomalies/clouds_diff_shallow.nc')

CPU times: user 6min 50s, sys: 2.74 s, total: 6min 53s
Wall time: 7min


In [34]:
%%time
RH_combined_before_bottom, RH_combined_before_mid, RH_combined_before_top, RH_combined_after_bottom, RH_combined_after_mid, RH_combined_after_top = profiles_1_d_alt(masked_RH, clouds_deep)
dataset = xr.Dataset({
    'RH_combined_before_bottom': RH_combined_before_bottom,
    'RH_combined_before_mid': RH_combined_before_mid,
    'RH_combined_before_top': RH_combined_before_top,
    'RH_combined_after_bottom': RH_combined_after_bottom,
    'RH_combined_after_mid': RH_combined_after_mid,
    'RH_combined_after_top': RH_combined_after_top,
})

dataset.to_netcdf('./../data/processed_data/1d_anomalies/clouds_diff_deep.nc')

CPU times: user 2min 41s, sys: 1.4 s, total: 2min 42s
Wall time: 3min
