In [73]:
# want to examine the number of samples below LOD, get some statistics 
# see if TTS result changes for different <LOD treatments
# date created: 02/26/2020
# author: sofia chelpon

In [83]:
import xarray as xr
import numpy as np 
from matplotlib import pyplot as plt
import pandas as pd

In [84]:
### ---------------------- READ IN DATA  ---------------------- ###
# path to data
mypath = '/mnt/home/sofia/TTS/paper_2020/contrast_readin/toga_lodhalf/toga_trace_gases.nc'
# read in merge all flights 
toga_trc = xr.open_dataset(mypath)
toga_trc

<xarray.Dataset>
Dimensions:               (GGALT: 2231, GGLAT: 2231, GGLON: 2231, Time: 2231)
Coordinates:
  * Time                  (Time) datetime64[ns] 2014-01-17T00:18:06 ... 2014-02-22T09:20:05
  * GGALT                 (GGALT) float32 3127.166 4212.603 ... 3358.9587
  * GGLAT                 (GGLAT) float32 13.379817 13.258845 ... 12.707678
  * GGLON                 (GGLON) float32 144.9935 145.1362 ... 144.72903
Data variables:
    Acetaldehyde          (Time) float32 ...
    n_Butane              (Time) float32 ...
    i_Butane              (Time) float32 ...
    Benzene               (Time) float32 ...
    C3H8                  (Time) float32 ...
    Methanol              (Time) float32 ...
    Bromoform             (Time) float32 ...
    Tetrachloroethylene   (Time) float32 ...
    Dibromomethane        (Time) float32 ...
    Dichloromethane       (Time) float32 ...
    Chloroform            (Time) float32 ...
    Carbon_Tetrachloride  (Time) float32 ...

In [85]:
### ---------------------- COUNT FILLS ---------------------- ###
# count number of samples in UT < and > LOD 
ggalt = toga_trc.GGALT.values
ggalt_ut = np.where((ggalt > 12000) & (ggalt <14000))
ggalt_bl = np.where(ggalt < 2000)

count_fills = []
count_meas = []
tracer_name = []
for ii in toga_trc.data_vars:
    # get this tracer and its LOD 
    trc_vals = toga_trc[ii].values
    trc_name = toga_trc[ii].name
    trc_fill = (toga_trc[ii].LOD)/2    
    # -------- UT 
    trc_ut = trc_vals[ggalt_ut]
    ulod_ut = (trc_ut == trc_fill).sum()
    olod_ut = (trc_ut > trc_fill).sum()
    # -------- BL
    trc_bl = trc_vals[ggalt_bl]
    ulod_bl = (trc_bl == trc_fill).sum()
    olod_bl = (trc_bl > trc_fill).sum()
    # var 
    count_fills.append((ulod_ut+ulod_bl))
    count_meas.append((olod_ut+olod_bl))    
    tracer_name.append(trc_name)



In [86]:
# make into pandas array 
my_counts = pd.DataFrame({'Trace Gas': tracer_name, 'num fills': count_fills, 'num meas': count_meas})
my_counts['Percent ULOD'] = my_counts['num fills']/(my_counts['num meas']+my_counts['num fills'])*100
my_counts

Unnamed: 0,Trace Gas,num fills,num meas,Percent ULOD
0,Acetaldehyde,323,429,42.952128
1,n_Butane,188,849,18.129219
2,i_Butane,189,848,18.225651
3,Benzene,35,1001,3.378378
4,C3H8,91,946,8.775313
5,Methanol,70,958,6.809339
6,Bromoform,102,935,9.836066
7,Tetrachloroethylene,77,959,7.432432
8,Dibromomethane,0,1034,0.0
9,Dichloromethane,0,1033,0.0


In [87]:
### ---------------------- DIFFERENT DATA TREATMENTS ---------------------- ###
#
### ---------------------- FILL WITH LOD 
# load in again
toga_trc = xr.open_dataset(mypath)
# start 
toga_trc_lod = xr.Dataset()    
for ii in toga_trc.data_vars:
    # get this tracer and its LOD 
    trc_vals = toga_trc[ii].values
    trc = toga_trc[ii]
    lodhalf = (toga_trc[ii].LOD)/2
    # locations for fills
    where_fill = np.where(trc_vals == lodhalf)
    # make those locations equal to LOD, not LOD/2
    trc_vals[where_fill] = toga_trc[ii].LOD
    # replace values in Dataarray with that including substituded values, save to var 
    trc.values = trc_vals
    toga_trc_lod[ii] = trc
    #print(np.nanmin(toga_trc_lod[ii]))

### ---------------------- FILL WITH ZERO
# load in again
toga_trc = xr.open_dataset(mypath)
# start 
toga_trc_zero = xr.Dataset()    
for ii in toga_trc.data_vars:
    # get this tracer and its LOD 
    trc_vals = toga_trc[ii].values
    trc = toga_trc[ii]
    lodhalf = (toga_trc[ii].LOD)/2
    # locations for fills
    where_fill = np.where(trc_vals == lodhalf)
    # make those locations equal to ZERO not LOD/2
    trc_vals[where_fill] = 0
    # replace values in Dataarray with that including substituded values, save to var 
    trc.values = trc_vals
    toga_trc_zero[ii] = trc
    #print(np.nanmin(toga_trc_zero[ii]))

In [88]:
toga_param = xr.Dataset({'GGALT': toga_trc.GGALT.values, 'GGLAT': toga_trc.GGLAT.values, 'GGLON': toga_trc.GGLON.values})
for ii in toga_param.coords:
    my_coord = toga_param[ii]
    toga_trc_zero.coords[ii] = my_coord
    toga_trc_lod.coords[ii] = my_coord

In [91]:
### pickle three versions
svpath_zero = '/mnt/home/sofia/TTS/paper_2020/contrast_readin/toga_lodhalf/testing_lod_treatments/lod_replace_zero.nc'
toga_trc_zero.to_netcdf(path = svpath_zero)

svpath_lod = '/mnt/home/sofia/TTS/paper_2020/contrast_readin/toga_lodhalf/testing_lod_treatments/lod_replace_lod.nc'
toga_trc_lod.to_netcdf(path = svpath_lod)
