Re-calculate bimodal parameters for each CCN_observation window. (We want NSD1_sum and NSD2_sum for each window)

In [1]:
import numpy as np
import os
import pandas as pd


In [2]:
# load data:
obs_dir = 'input_data'   

bimodal_params = pd.read_csv(os.path.join(obs_dir, 'Bimodal_parameters.csv'))  # Fitted bimodal parameters, which we are recalculating because it does not contain scaled number concentration values.
NSD_params_all = pd.read_csv(os.path.join(obs_dir, 'NSD_PARAMS_SCALED.csv'), parse_dates=['datetime'])  # bimodal parameters scaled to aerosol observations at 10min resolution
CCN_all = pd.read_csv(os.path.join(obs_dir, 'CCN_all.csv'), parse_dates=['datetime','start_time','end_time'])  # observed CCN data (contains CCN-obs Window information (start and end time), 2hr resolution)

In [3]:
# function to calculate the median absolute deviation (MAD):

def mad(series):
    median = series.median()
    return np.median(np.abs(series - median))

In [19]:
#re-calculate median bimodal parameters for each 2 hr window, with scaled number concentration:

NSD_params = NSD_params_all.copy(deep=True)

tw_start = CCN_all.loc[CCN_all['datetime']>= '2016-08-16 00:00:00'].reset_index(drop=True)['start_time'] # time window start
tw_end = CCN_all.loc[CCN_all['datetime']>= '2016-08-16 00:00:00'].reset_index(drop=True)['end_time'] # time window end
tw_mid = CCN_all.loc[CCN_all['datetime']>= '2016-08-16 00:00:00'].reset_index(drop=True)['datetime'] # time window mid
window_idx = 0

# create mask which identifies which time window parameters belong to:
for i in range(len(tw_start)):
    mask = (
        (NSD_params['datetime'] >= tw_start[i]) &
        (NSD_params['datetime'] < tw_end[i])         
    )
    
    if mask.any():
        NSD_params.loc[mask, 'CCN_window'] = window_idx # index of the time window, used for grouping
        NSD_params.loc[mask, 'datetime'] = tw_mid[i] # assign the mid-point of the time window to the datetime column
        window_idx += 1

# drop rows with NaN in CCN_window column (no data in that time window):
NSD_params = NSD_params.dropna(subset=['CCN_window'])

# drop where the datetime does not match original datetimes in bimodal_params:
NSD_params = NSD_params[NSD_params['datetime'].isin(pd.to_datetime(bimodal_params['datetime']))].reset_index(drop=True)

# THERE IS ONE OBS WINDOW WHERE THE NSD PARAMETERS ARE VERY STRANGE AND RESULT IN UNREALISTIC MASS VALUES:
# here we filter these out:
NSD_params = NSD_params[~((NSD_params['CCN_window'] == 8564.0) & (NSD_params['mode2_d']>100))]

# Take median of parameters within each time window:
NSD_params_windows = NSD_params.groupby('CCN_window').median().copy(deep=True).reset_index(drop=True) # take median of parameters within each time window

# add columns for max/min/median abs. dev. in each time window:
NSD_params_windows['mode1_d_max'] = NSD_params.groupby('CCN_window')['mode1_d'].max().reset_index(drop=True)
NSD_params_windows['mode1_d_min'] = NSD_params.groupby('CCN_window')['mode1_d'].min().reset_index(drop=True)
NSD_params_windows['mode1_d_mad'] = NSD_params.groupby('CCN_window')['mode1_d'].agg(mad).reset_index(drop=True)
NSD_params_windows['mode2_d_max'] = NSD_params.groupby('CCN_window')['mode2_d'].max().reset_index(drop=True)
NSD_params_windows['mode2_d_min'] = NSD_params.groupby('CCN_window')['mode2_d'].min().reset_index(drop=True)
NSD_params_windows['mode2_d_mad'] = NSD_params.groupby('CCN_window')['mode2_d'].agg(mad).reset_index(drop=True)
NSD_params_windows['mode1_NSD_max'] = NSD_params.groupby('CCN_window')['NSD1_sum'].max().reset_index(drop=True)
NSD_params_windows['mode1_NSD_min'] = NSD_params.groupby('CCN_window')['NSD1_sum'].min().reset_index(drop=True)
NSD_params_windows['mode1_NSD_mad'] = NSD_params.groupby('CCN_window')['NSD1_sum'].agg(mad).reset_index(drop=True)
NSD_params_windows['mode2_NSD_max'] = NSD_params.groupby('CCN_window')['NSD2_sum'].max().reset_index(drop=True)
NSD_params_windows['mode2_NSD_min'] = NSD_params.groupby('CCN_window')['NSD2_sum'].min().reset_index(drop=True)
NSD_params_windows['mode2_NSD_mad'] = NSD_params.groupby('CCN_window')['NSD2_sum'].agg(mad).reset_index(drop=True)

# add column for number of measurements in each time window:
NSD_params_windows['n_measurements'] = NSD_params.groupby('CCN_window')['mode1_d'].count()

# create a unique ID for each time window
NSD_params['window_id'] = pd.factorize(NSD_params['CCN_window'])[0]  

In [21]:
# save to CSV:
NSD_params_windows.to_csv(os.path.join(obs_dir, 'bimodal_params_windows.csv')) 

NSD_params.to_csv(os.path.join(obs_dir, 'NSD_params_withwindows.csv'))  