In [1]:
#libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import erf
import netCDF4
import xarray as xr

## Inputs


Dimensions specified below

$\textit{m}$: length of the measurement vector

$\textit{n}$: length of the state (parameter) vector

$\textit{r}$: length of the model error vector


To perform this ICA, you'll need a few things:

1. Jacobian matrix, defined as the $ K_{j,i}(\textbf{x}) = \frac{\partial F_i (\textbf{x})}{\partial x_j}$ where $\textit{i}$ has length $\textit{m}$ and $\textit{j}$ has length $\textit{n}$. $F(\textbf{x})$ is the forward model at the state defined by $\textbf{x}$.

    jac [n x m]
    
2. Model error Jacobian matrix, defined as the $ K_{b,u,i}(\textbf{x}) = \frac{\partial F_i (\textbf{x})}{\partial b_u}$ where $\textit{i}$ has length $\textit{m}$ and $\textit{u}$ has length $\textit{r}$. $F(\textbf{x})$ is the forward model at the state defined by $\textbf{x}$. Note there are options in the Rodgers function to omit consideration of model errors if these are not known.

    jac_me [r x m]
    
3. Measurement error coveriance matrix, $S_{\epsilon}$. This can be specificed as either an $\textit{m}$ length vector of sigma squared measurement uncertainties or an $\textit{m} x \textit{m}$ full covariance matrix. In the former case, it is assumed that measurement uncertainty is uncorrelated. 

    err [m x m] or [m]   

4. Model error coveriance matrix, $S_{b}$. This can be specificed as either an $\textit{r}$ length vector of sigma squared measurement uncertainties or an $\textit{r} x \textit{r}$ full covariance matrix. In the former case, it is assumed that measurement uncertainty is uncorrelated. Note there are options below to omit consideration of model errors if these are not known.

    err_me [m x m] or [m]   
    
5. A priori error coveriance matrix, $S_{a}$. This can be specificed as either an $\textit{n}$ length vector of sigma squared measurement uncertainties or an $\textit{n} x \textit{n}$ full covariance matrix. In the former case, it is assumed that measurement uncertainty is uncorrelated. 

    ap [n x n] or [n]   
    
6. If calculating the detection probability: the value of the parameter in question

    mu [scalar]
    
        

# Outputs


### from rodgers()
1. Error covariance matrix, $\hat{S}$.

    S_hat [n x n]
    
2. Shannon Information Content, $SIC$.

    SIC [scalar]
    
3. Averaging kernel matrix, $A$. 

    AvgK [n x n]

4. Degrees of Freedom for Signal, $DFS$. 

    DFS [scalar]  

    
### from detect_prob()
1. Probability of detection, $P_d$. Note this requires inputs mu (parameter value in question) and sigma ($\sqrt{\hat{S}_{p,p}}$ where $p$ is the parameter index.)

    Pd [scalar]
    
    Pd_pcnt_str [percentage as string]


## from Amir:
    Relative:
σ_sys = np.array([0.01587401, 0.01244266, 0.00938955, 0.01092051, 0.00302288,
                  0.00544271, 0.00999068, 0.01467843, 0.0080387, 0.00944394,
                  0.0193447, 0.0224503, 0.02386379])

Absolute:
σ_sys = np.array([2.80453033e-03, 1.52152435e-03, 1.04028473e-03, 9.98138237e-04,
                  1.12356970e-04, 2.68196657e-04, 4.69710954e-04, 4.32004600e-04,
                  2.49862337e-04, 2.53730871e-04, 8.83851738e-05, 1.47447273e-04,
                  1.57220696e-04])

This is based on real modisa data using our OE algorithm, where we use the full spectrum to optimize things, which means that these values work better when you use all bands in MODIS. The systematic uncertainty dominates the random noise, so I feel comfortable ignoring the random component.

In [2]:
#function to calculate the parameter error covariance matrix.

#input Jacobian, K, [n x m], error covariance matrix Se, [m x m] and a priori matrix Sa, [n x n]
#jac_me and me are associated with model uncertainty - that parameterized uncertainty and its jacobian
def rodgers(jac, err, ap, model_error={}, model_error_jacobian={}): 
    #todo
    # consider microplastic simulation jacobian in the following manner: Se' = Se + Kb Sb Kbt, where 
    #  Se is the same as above, Sb is the microplastic parameter uncertainty, Kb the microplastic jacobian
    
        #check if error covariance matrix is square, or just diagonal values. If latter make full matrix
    if err.ndim == 1:
        ln=np.shape(err)
        err2d = np.zeros((ln[0], ln[0]))
        np.fill_diagonal(err2d, err)
        err=err2d

        #check if a priori covariance matrix is square, or just diagonal values. If latter make full matrix
    if ap.ndim == 1:
        ln=np.shape(ap)
        ap2d = np.zeros((ln[0], ln[0]))
        np.fill_diagonal(ap2d, ap)
        ap=ap2d        
            
        #section to verify compatable dimensions ------------------------------------------------------
    sh_jac = np.shape(jac)
    sh_err = np.shape(err)
    sh_ap = np.shape(ap)
    
    n_dim = sh_jac[0]
    m_dim = sh_jac[1]
    
    if not((sh_err[0] == sh_err[1]) and (sh_ap[0] == sh_ap[1])):
        print('ERROR: error covariance matrix or a priori matrix are not square')
        print('Error covariance matrix dimensions')
        print(sh_err)
        print('A priori matrix dimensions')
        print(sh_ap)
        return -1, -1, -1, -1
    
    if not(sh_jac[0] == sh_ap[0]):
        print('ERROR: n dimensions inconsistent, should be Jacobian [n x m]; a priori [n x n]')
        print('Jacobian matrix dimensions')
        print(sh_jac)
        print('A priori matrix dimensions')
        print(sh_ap)
        return -1, -1, -1, -1
    
    if not(sh_jac[1] == sh_err[0]):
        print('ERROR: m dimensions inconsistent, should be Jacobian [n x m]; error covariance [m x m]')
        print('Jacobian matrix dimensions')
        print(sh_jac)
        print('Error covariance matrix dimensions')
        print(sh_err)
        return -1, -1, -1, -1
        
    #section to generate model derived error -------------------------------------------------------
    
    if len(model_error) > 0:
        me=model_error
        jac_me=model_error_jacobian
        
        ln_me=np.shape(me)
        errme_2d = np.zeros((ln_me[0], ln_me[0]))
        np.fill_diagonal(errme_2d, me)
        err_me=errme_2d
    
        jac_me_t=np.transpose(jac_me)      
    
        JacmetMeJacme = np.matmul(jac_me_t,np.matmul(err_me,jac_me))
        err = err + JacmetMeJacme
    
        #perform inverse and matrix multiplication calculations ----------------------------------------
    jac_t=np.transpose(jac) #transpose of Jacobian (KT)
    
    try: 
        err_i=np.linalg.inv(err) #inverse of error covariance matrix (Se-1)
    except:
        print("ERROR: problem inverting error covariance matrix")
        return -1, -1, -1, -1
    
    try: 
        ap_i=np.linalg.inv(ap) #inverse of a priori error covariance matrix
    except:
        print("ERROR: problem inverting a priori covariance matrix")
        return -1, -1, -1, -1

    KtSK = np.matmul(jac,np.matmul(err_i,jac_t)) #calcuates KT Se-1 K

    try: 
        S_hat = np.linalg.inv(KtSK+ap_i) #calculate the inverse of (above + Sa-1)
    except:
        print("ERROR: problem inverting retrieval error covariance matrix")
        return -1, -1, -1, -1
    
    SIC = 0.5*np.log(np.linalg.det(np.matmul((KtSK+ap_i),ap))) #calculate Shannon Information Content    
    AvgK = np.matmul(S_hat,KtSK) #averaging kernel
    DFS = np.trace(AvgK) #degrees of freedom for signal (DFS) which is trace of averaging kernel
    
    return S_hat, SIC, AvgK, DFS  #returns retrieval error covariance matrix and the Shannon Information Content

In [3]:
#calculates the probability of detection given the parameter value (mu) and uncertainty (sigma)
#assumes PDF is gaussian normally distributed
def detect_prob(mu, sigma, doprint=0): 

    Pd = 1-0.5*(1+erf((-1*mu)/(sigma*np.sqrt(2))))  #detection probability, modified from CDF function

    Pd_pcnt_str=str(np.around(Pd*100,decimals=1))+'% positive probability' #string output version

    if doprint > 0:
        print(Pd_pcnt_str)

    return Pd, Pd_pcnt_str

In [4]:
#calculate detection probability metrics for a full range of fractional plastic coverage. 
#Also, return result at 95% or whatever is specified in fraction_threshold variable
def detect_prob_all(plastic_uncertainty,plastic_fraction,fraction_threshold=0.95):    

    #make array of values to assess (val) and dummy array to fill (det_prob)
    inc=np.arange(0, 10000, 1)
    val=inc/10000
    det_prob=np.arange(0, 10000, 1) / 10000
       
    #interpolate plastic_uncertainty to assessment values
    plastic_uncertainty_int = np.interp(val,plastic_fraction,plastic_uncertainty)    
    
    for x in inc:
        Pd, Pd_pcnt_str = detect_prob(val[x], plastic_uncertainty_int[x], doprint=0)
        det_prob[x] = Pd

    #get plastic fraction for a fraction_threshold detection probability 
    fraction_meeting_threshold = np.interp(fraction_threshold,det_prob,val)
    
    return det_prob, fraction_meeting_threshold
            

In [5]:
def print_out(S_hat, SIC, AvgK, DFS, jac, err, ap, me_err, numpts, params, me_params ):

    S_hat_diag=np.diagonal(S_hat)
    uncert=np.sqrt(S_hat_diag)

    np.set_printoptions(formatter={'float': '{: 0.2f}'.format})
    print('Error covariance matrix:')
    print(S_hat)
    print()

    np.set_printoptions(formatter={'float': '{: 0.5f}'.format})
    print('Averaging kernel matrix:')
    print(AvgK)
    print()
    np.set_printoptions(formatter={'float': '{: 0.5f}'.format})
    print('Model Parameters:       ', params)
    print('Number of observations: ', numpts)
    print('A priori uncertainty:   ', np.sqrt(ap))
    print('Uncertainties:          ', uncert)
    print('Shannon Information Content:      ', SIC)
    print('Degrees of freedom for signal:    ', DFS)

## Section to read simulated dataset

In [6]:
#read netcdf4 file with simulation
f = netCDF4.Dataset('../plastics_toa_simulations_modisa_permutations.nc')

#read jacobian and data into jac and meas, respectively. Also get z (#cases,), m (measurent), n (param) lengths
jac_all=np.asarray(f.variables['K_Jac'])
meas=np.asarray(f.variables['rhot'])
param_order=np.asarray(f.variables['parameter'])
waveln=np.asarray(f.variables['wavelength'])
g=jac_all.shape
z_len=g[0]
m_len=g[1]
n_len=g[2]

#read the simulation specific parameters into a dataframe
df = pd.DataFrame({'Windspeed(m_s)': np.asarray(f.variables['Windspeed(m_s)']),
                   'Humidity(%)': np.asarray(f.variables['Humidity(%)']),
                   'FMF': np.asarray(f.variables['FMF']),
                   'AOD(869)': np.asarray(f.variables['AOD(869)']),
                   'chla(mg_m3)': np.asarray(f.variables['chla(mg_m3)']),
                   'plastic_fraction': np.asarray(f.variables['plastic_fraction']), 
                   'solz': np.asarray(f.variables['solz']),     
                   'relaz': np.asarray(f.variables['relaz']),  
                   'senz': np.asarray(f.variables['senz']),  
                  })

df["plastic_uncertainty"] = np.nan
df["SIC"] = np.nan
df["plastic_avgK"] = np.nan
df["DFS"] = np.nan

#close netcdf file
f.close()

In [7]:
#make a priori covariance matrix

#ap=(np.linspace(10.0,10.0,4))**2 #generate a priori error covariance matrix
WS_range=df['Windspeed(m_s)'].max() - df['Windspeed(m_s)'].min()
RH_range=df['Humidity(%)'].max() - df['Humidity(%)'].min()
FMF_range=df['FMF'].max() - df['FMF'].min()
AOD_range=df['AOD(869)'].max() - df['AOD(869)'].min()
CHL_range=df['chla(mg_m3)'].max() - df['chla(mg_m3)'].min()
PF_range=df['plastic_fraction'].max() - df['plastic_fraction'].min()


ap=np.asarray([WS_range,RH_range,FMF_range,AOD_range,CHL_range,PF_range]) #generate a priori error covariance matrix
ap=(ap/2)**2 #generate a priori error covariance matrix diagonals


In [8]:
#make error covariance matrix and prepare jacobian

for idx in range(0, z_len):
#for idx in range(0, 1000):
    #rel_err=0.003  #relative error
    #sys_err=0.005
    #err=((meas[idx]*rel_err) + sys_err)**2 #generate error covariance matrix diagonals (code also takes 2d input)
    
    rel_err = np.array([0.01587401, 0.01244266, 0.00938955, 0.01092051, 0.00302288,
        0.00544271, 0.00999068, 0.01467843, 0.0080387, 0.00944394,
        0.0193447, 0.0224503, 0.02386379])
    
    err=(meas[idx]*rel_err)**2
    
    #prepare jacobian
    this_jac=jac_all[idx]
    jac=this_jac.transpose()

    #calculate rodgers stuff
    S_hat, SIC, AvgK, DFS = rodgers(jac, err, ap)

    df.loc[idx]["plastic_uncertainty"]=np.sqrt(S_hat[5,5]) 
    df.loc[idx]["SIC"]=SIC
    df.loc[idx]["plastic_avgK"]=AvgK[5,5]
    df.loc[idx]["DFS"]=DFS

df_orig=df.copy()    
    

In [9]:
#iterate through each set of conditions that have all the same parameter value except for plastic fraction. 
#create a new dataframe, and save one row for each set, with median values for plastic_uncertainty, SIC, plastic_avgK and DFS
#also, assess detection probability and save the plastic fraction value for 90% confident probability.

df.sort_values(by=['Windspeed(m_s)','Humidity(%)','FMF','AOD(869)','chla(mg_m3)','solz','relaz','senz'], inplace=True)

df.rename(columns={'plastic_fraction': 'plastic_threshold', 'plastic_uncertainty': 'plastic_unc_median', 'SIC': 'SIC_median', \
                   'plastic_avgK':'plastic_avgK_median', 'DFS': 'DFS_median'}, inplace=True)


fdf = df.copy()
fdfe = fdf[0:0]

#for idx in range(0, 100, 10):
for idx in range(0, z_len-10, 10):
    print(idx)
    
    #get start and end points for this iteration
    st=idx
    ed=idx+10

    this=df.iloc[st:ed]

    this_plastic_fraction = np.asarray(df.iloc[st:ed]['plastic_threshold'])   
    this_plastic_uncertainty = np.asarray(df.iloc[st:ed]['plastic_unc_median'])   

    this_detect_prob, fraction_meeting_threshold = \
        detect_prob_all(this_plastic_uncertainty,this_plastic_fraction,fraction_threshold=0.95)

    fdfe = fdfe.append(df.iloc[st], ignore_index = True)
        
    fdfe.loc[fdfe.index[-1], 'plastic_threshold']= fraction_meeting_threshold
    
    #section to update field with median values for set
    fdfe.loc[fdfe.index[-1], 'plastic_unc_median']= np.median(this_plastic_uncertainty)
    fdfe.loc[fdfe.index[-1], 'SIC_median']= np.median(np.asarray(df.iloc[st:ed]['SIC_median']))
    fdfe.loc[fdfe.index[-1], 'plastic_avgK_median']= np.median(np.asarray(df.iloc[st:ed]['plastic_avgK_median']))
    fdfe.loc[fdfe.index[-1], 'DFS_median']= np.median(np.asarray(df.iloc[st:ed]['DFS_median']))
    


#change names in original df back
df.rename(columns={'plastic_threshold':'plastic_fraction', 'plastic_unc_median':'plastic_uncertainty', 'SIC_median':'SIC', \
                   'plastic_avgK_median':'plastic_avgK', 'DFS_median':'DFS'}, inplace=True)


fdfe

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
940
950
960
970
980
990
1000
1010
1020
1030
1040
1050
1060
1070
1080
1090
1100
1110
1120
1130
1140
1150
1160
1170
1180
1190
1200
1210
1220
1230
1240
1250
1260
1270
1280
1290
1300
1310
1320
1330
1340
1350
1360
1370
1380
1390
1400
1410
1420
1430
1440
1450
1460
1470
1480
1490
1500
1510
1520
1530
1540
1550
1560
1570
1580
1590
1600
1610
1620
1630
1640
1650
1660
1670
1680
1690
1700
1710
1720
1730
1740
1750
1760
1770
1780
1790
1800
1810
1820
1830
1840
1850
1860
1870
1880
1890
1900
1910
1920
1930
1940
1950
1960
1970
1980
1990
2000
2010
2020
2030
2040
2050
2060
2070
2080
2090
2100
2110
2120
2130
2140
2150
2160
2170
2180
2190
2200
2210
2

15520
15530
15540
15550
15560
15570
15580
15590
15600
15610
15620
15630
15640
15650
15660
15670
15680
15690
15700
15710
15720
15730
15740
15750
15760
15770
15780
15790
15800
15810
15820
15830
15840
15850
15860
15870
15880
15890
15900
15910
15920
15930
15940
15950
15960
15970
15980
15990
16000
16010
16020
16030
16040
16050
16060
16070
16080
16090
16100
16110
16120
16130
16140
16150
16160
16170
16180
16190
16200
16210
16220
16230
16240
16250
16260
16270
16280
16290
16300
16310
16320
16330
16340
16350
16360
16370
16380
16390
16400
16410
16420
16430
16440
16450
16460
16470
16480
16490
16500
16510
16520
16530
16540
16550
16560
16570
16580
16590
16600
16610
16620
16630
16640
16650
16660
16670
16680
16690
16700
16710
16720
16730
16740
16750
16760
16770
16780
16790
16800
16810
16820
16830
16840
16850
16860
16870
16880
16890
16900
16910
16920
16930
16940
16950
16960
16970
16980
16990
17000
17010
17020
17030
17040
17050
17060
17070
17080
17090
17100
17110
17120
17130
17140
17150
17160
17170
1718

29190
29200
29210
29220
29230
29240
29250
29260
29270
29280
29290
29300
29310
29320
29330
29340
29350
29360
29370
29380
29390
29400
29410
29420
29430
29440
29450
29460
29470
29480
29490
29500
29510
29520
29530
29540
29550
29560
29570
29580
29590
29600
29610
29620
29630
29640
29650
29660
29670
29680
29690
29700
29710
29720
29730
29740
29750
29760
29770
29780
29790
29800
29810
29820
29830
29840
29850
29860
29870
29880
29890
29900
29910
29920
29930
29940
29950
29960
29970
29980
29990
30000
30010
30020
30030
30040
30050
30060
30070
30080
30090
30100
30110
30120
30130
30140
30150
30160
30170
30180
30190
30200
30210
30220
30230
30240
30250
30260
30270
30280
30290
30300
30310
30320
30330
30340
30350
30360
30370
30380
30390
30400
30410
30420
30430
30440
30450
30460
30470
30480
30490
30500
30510
30520
30530
30540
30550
30560
30570
30580
30590
30600
30610
30620
30630
30640
30650
30660
30670
30680
30690
30700
30710
30720
30730
30740
30750
30760
30770
30780
30790
30800
30810
30820
30830
30840
3085

42860
42870
42880
42890
42900
42910
42920
42930
42940
42950
42960
42970
42980
42990
43000
43010
43020
43030
43040
43050
43060
43070
43080
43090
43100
43110
43120
43130
43140
43150
43160
43170
43180
43190
43200
43210
43220
43230
43240
43250
43260
43270
43280
43290
43300
43310
43320
43330
43340
43350
43360
43370
43380
43390
43400
43410
43420
43430
43440
43450
43460
43470
43480
43490
43500
43510
43520
43530
43540
43550
43560
43570
43580
43590
43600
43610
43620
43630
43640
43650
43660
43670
43680
43690
43700
43710
43720
43730
43740
43750
43760
43770
43780
43790
43800
43810
43820
43830
43840
43850
43860
43870
43880
43890
43900
43910
43920
43930
43940
43950
43960
43970
43980
43990
44000
44010
44020
44030
44040
44050
44060
44070
44080
44090
44100
44110
44120
44130
44140
44150
44160
44170
44180
44190
44200
44210
44220
44230
44240
44250
44260
44270
44280
44290
44300
44310
44320
44330
44340
44350
44360
44370
44380
44390
44400
44410
44420
44430
44440
44450
44460
44470
44480
44490
44500
44510
4452

56520
56530
56540
56550
56560
56570
56580
56590
56600
56610
56620
56630
56640
56650
56660
56670
56680
56690
56700
56710
56720
56730
56740
56750
56760
56770
56780
56790
56800
56810
56820
56830
56840
56850
56860
56870
56880
56890
56900
56910
56920
56930
56940
56950
56960
56970
56980
56990
57000
57010
57020
57030
57040
57050
57060
57070
57080
57090
57100
57110
57120
57130
57140
57150
57160
57170
57180
57190
57200
57210
57220
57230
57240
57250
57260
57270
57280
57290
57300
57310
57320
57330
57340
57350
57360
57370
57380
57390
57400
57410
57420
57430
57440
57450
57460
57470
57480
57490
57500
57510
57520
57530
57540
57550
57560
57570
57580
57590
57600
57610
57620
57630
57640
57650
57660
57670
57680
57690
57700
57710
57720
57730
57740
57750
57760
57770
57780
57790
57800
57810
57820
57830
57840
57850
57860
57870
57880
57890
57900
57910
57920
57930
57940
57950
57960
57970
57980
57990
58000
58010
58020
58030
58040
58050
58060
58070
58080
58090
58100
58110
58120
58130
58140
58150
58160
58170
5818

70190
70200
70210
70220
70230
70240
70250
70260
70270
70280
70290
70300
70310
70320
70330
70340
70350
70360
70370
70380
70390
70400
70410
70420
70430
70440
70450
70460
70470
70480
70490
70500
70510
70520
70530
70540
70550
70560
70570
70580
70590
70600
70610
70620
70630
70640
70650
70660
70670
70680
70690
70700
70710
70720
70730
70740
70750
70760
70770
70780
70790
70800
70810
70820
70830
70840
70850
70860
70870
70880
70890
70900
70910
70920
70930
70940
70950
70960
70970
70980
70990
71000
71010
71020
71030
71040
71050
71060
71070
71080
71090
71100
71110
71120
71130
71140
71150
71160
71170
71180
71190
71200
71210
71220
71230
71240
71250
71260
71270
71280
71290
71300
71310
71320
71330
71340
71350
71360
71370
71380
71390
71400
71410
71420
71430
71440
71450
71460
71470
71480
71490
71500
71510
71520
71530
71540
71550
71560
71570
71580
71590
71600
71610
71620
71630
71640
71650
71660
71670
71680
71690
71700
71710
71720
71730
71740
71750
71760
71770
71780
71790
71800
71810
71820
71830
71840
7185

83850
83860
83870
83880
83890
83900
83910
83920
83930
83940
83950
83960
83970
83980
83990
84000
84010
84020
84030
84040
84050
84060
84070
84080
84090
84100
84110
84120
84130
84140
84150
84160
84170
84180
84190
84200
84210
84220
84230
84240
84250
84260
84270
84280
84290
84300
84310
84320
84330
84340
84350
84360
84370
84380
84390
84400
84410
84420
84430
84440
84450
84460
84470
84480
84490
84500
84510
84520
84530
84540
84550
84560
84570
84580
84590
84600
84610
84620
84630
84640
84650
84660
84670
84680
84690
84700
84710
84720
84730
84740
84750
84760
84770
84780
84790
84800
84810
84820
84830
84840
84850
84860
84870
84880
84890
84900
84910
84920
84930
84940
84950
84960
84970
84980
84990
85000
85010
85020
85030
85040
85050
85060
85070
85080
85090
85100
85110
85120
85130
85140
85150
85160
85170
85180
85190
85200
85210
85220
85230
85240
85250
85260
85270
85280
85290
85300
85310
85320
85330
85340
85350
85360
85370
85380
85390
85400
85410
85420
85430
85440
85450
85460
85470
85480
85490
85500
8551

97520
97530
97540
97550
97560
97570
97580
97590
97600
97610
97620
97630
97640
97650
97660
97670
97680
97690
97700
97710
97720
97730
97740
97750
97760
97770
97780
97790
97800
97810
97820
97830
97840
97850
97860
97870
97880
97890
97900
97910
97920
97930
97940
97950
97960
97970
97980
97990
98000
98010
98020
98030
98040
98050
98060
98070
98080
98090
98100
98110
98120
98130
98140
98150
98160
98170
98180
98190
98200
98210
98220
98230
98240
98250
98260
98270
98280
98290
98300
98310
98320
98330
98340
98350
98360
98370
98380
98390
98400
98410
98420
98430
98440
98450
98460
98470
98480
98490
98500
98510
98520
98530
98540
98550
98560
98570
98580
98590
98600
98610
98620
98630
98640
98650
98660
98670
98680
98690
98700
98710
98720
98730
98740
98750
98760
98770
98780
98790
98800
98810
98820
98830
98840
98850
98860
98870
98880
98890
98900
98910
98920
98930
98940
98950
98960
98970
98980
98990
99000
99010
99020
99030
99040
99050
99060
99070
99080
99090
99100
99110
99120
99130
99140
99150
99160
99170
9918

109590
109600
109610
109620
109630
109640
109650
109660
109670
109680
109690
109700
109710
109720
109730
109740
109750
109760
109770
109780
109790
109800
109810
109820
109830
109840
109850
109860
109870
109880
109890
109900
109910
109920
109930
109940
109950
109960
109970
109980
109990
110000
110010
110020
110030
110040
110050
110060
110070
110080
110090
110100
110110
110120
110130
110140
110150
110160
110170
110180
110190
110200
110210
110220
110230
110240
110250
110260
110270
110280
110290
110300
110310
110320
110330
110340
110350
110360
110370
110380
110390
110400
110410
110420
110430
110440
110450
110460
110470
110480
110490
110500
110510
110520
110530
110540
110550
110560
110570
110580
110590
110600
110610
110620
110630
110640
110650
110660
110670
110680
110690
110700
110710
110720
110730
110740
110750
110760
110770
110780
110790
110800
110810
110820
110830
110840
110850
110860
110870
110880
110890
110900
110910
110920
110930
110940
110950
110960
110970
110980
110990
111000
111010

121300
121310
121320
121330
121340
121350
121360
121370
121380
121390
121400
121410
121420
121430
121440
121450
121460
121470
121480
121490
121500
121510
121520
121530
121540
121550
121560
121570
121580
121590
121600
121610
121620
121630
121640
121650
121660
121670
121680
121690
121700
121710
121720
121730
121740
121750
121760
121770
121780
121790
121800
121810
121820
121830
121840
121850
121860
121870
121880
121890
121900
121910
121920
121930
121940
121950
121960
121970
121980
121990
122000
122010
122020
122030
122040
122050
122060
122070
122080
122090
122100
122110
122120
122130
122140
122150
122160
122170
122180
122190
122200
122210
122220
122230
122240
122250
122260
122270
122280
122290
122300
122310
122320
122330
122340
122350
122360
122370
122380
122390
122400
122410
122420
122430
122440
122450
122460
122470
122480
122490
122500
122510
122520
122530
122540
122550
122560
122570
122580
122590
122600
122610
122620
122630
122640
122650
122660
122670
122680
122690
122700
122710
122720

133010
133020
133030
133040
133050
133060
133070
133080
133090
133100
133110
133120
133130
133140
133150
133160
133170
133180
133190
133200
133210
133220
133230
133240
133250
133260
133270
133280
133290
133300
133310
133320
133330
133340
133350
133360
133370
133380
133390
133400
133410
133420
133430
133440
133450
133460
133470
133480
133490
133500
133510
133520
133530
133540
133550
133560
133570
133580
133590
133600
133610
133620
133630
133640
133650
133660
133670
133680
133690
133700
133710
133720
133730
133740
133750
133760
133770
133780
133790
133800
133810
133820
133830
133840
133850
133860
133870
133880
133890
133900
133910
133920
133930
133940
133950
133960
133970
133980
133990
134000
134010
134020
134030
134040
134050
134060
134070
134080
134090
134100
134110
134120
134130
134140
134150
134160
134170
134180
134190
134200
134210
134220
134230
134240
134250
134260
134270
134280
134290
134300
134310
134320
134330
134340
134350
134360
134370
134380
134390
134400
134410
134420
134430

144720
144730
144740
144750
144760
144770
144780
144790
144800
144810
144820
144830
144840
144850
144860
144870
144880
144890
144900
144910
144920
144930
144940
144950
144960
144970
144980
144990
145000
145010
145020
145030
145040
145050
145060
145070
145080
145090
145100
145110
145120
145130
145140
145150
145160
145170
145180
145190
145200
145210
145220
145230
145240
145250
145260
145270
145280
145290
145300
145310
145320
145330
145340
145350
145360
145370
145380
145390
145400
145410
145420
145430
145440
145450
145460
145470
145480
145490
145500
145510
145520
145530
145540
145550
145560
145570
145580
145590
145600
145610
145620
145630
145640
145650
145660
145670
145680
145690
145700
145710
145720
145730
145740
145750
145760
145770
145780
145790
145800
145810
145820
145830
145840
145850
145860
145870
145880
145890
145900
145910
145920
145930
145940
145950
145960
145970
145980
145990
146000
146010
146020
146030
146040
146050
146060
146070
146080
146090
146100
146110
146120
146130
146140

Unnamed: 0,Windspeed(m_s),Humidity(%),FMF,AOD(869),chla(mg_m3),plastic_threshold,solz,relaz,senz,plastic_unc_median,SIC_median,plastic_avgK_median,DFS_median
0,0.5,30.0,0.01,0.04,0.05,0.061213,15.0,40.0,15.0,0.034150,18.323713,0.995334,3.806892
1,0.5,30.0,0.01,0.04,0.05,0.046424,15.0,40.0,30.0,0.026122,15.604201,0.997270,3.221448
2,0.5,30.0,0.01,0.04,0.05,0.036759,15.0,40.0,60.0,0.017642,15.001254,0.998750,3.893049
3,0.5,30.0,0.01,0.04,0.05,0.021841,15.0,110.0,15.0,0.011863,16.696411,0.999435,3.950530
4,0.5,30.0,0.01,0.04,0.05,0.031967,15.0,110.0,30.0,0.016817,15.966736,0.998866,3.714407
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15303,15.0,95.0,0.95,0.40,2.00,0.020029,60.0,110.0,15.0,0.011475,10.200517,0.999473,3.007352
15304,15.0,95.0,0.95,0.40,2.00,0.018606,60.0,110.0,30.0,0.010998,10.012692,0.999516,2.973358
15305,15.0,95.0,0.95,0.40,2.00,0.047686,60.0,110.0,60.0,0.028003,9.181176,0.996863,3.017742
15306,15.0,95.0,0.95,0.40,2.00,0.022100,60.0,170.0,15.0,0.013489,10.542571,0.999272,3.220736


In [10]:
#save file or restart from here:

# save dataframe to pickle file
df.to_pickle('SQOOP_df.pkl')
fdfe.to_pickle('SQOOP_fdfe.pkl')



In [None]:
#plots probability for plastic fraction distribution

    #font1 = {'family':'serif','color':'black','size':20}
    #font2 = {'family':'serif','color':'black','size':15}
    #font3 = {'family':'courier new','color':'black','size':20}
    #plt.scatter(val,this_detect_prob)
    #plt.title("Plastic detection probability",fontdict = font1)
    #plt.xlabel('Plastic fraction',fontdict = font2)
    #plt.ylabel('Probability for plastic fraction', fontdict = font2)
    #plt.xscale('log')
    #plt.xlim(0.0001,1.0)

In [38]:
#make error covariance matrix and prepare jacobian for

#for idx in range(0, z_len):
for idx in range(0, 400):
    print(idx)
#    rel_err=0.003  #relative error
#    sys_err=0.005
#    err=((meas[idx]*rel_err) + sys_err)**2 #generate error covariance matrix diagonals (code also takes 2d input)

    rel_err = np.array([0.01587401, 0.01244266, 0.00938955, 0.01092051, 0.00302288,
                  0.00544271, 0.00999068, 0.01467843, 0.0080387, 0.00944394,
                  0.0193447, 0.0224503, 0.02386379])
    
    err=(meas[idx]*rel_err)**2

    
    #plotting specifics
    font1 = {'family':'serif','color':'black','size':20}
    font2 = {'family':'serif','color':'black','size':15}
    font3 = {'family':'courier new','color':'black','size':20}
    plt.figure(figsize=(16,16)) 
    plt.subplot(2, 2, 1)


    
    #plot simulated values
    plt.scatter(waveln,meas[idx])
    plt.errorbar(waveln,meas[idx],yerr=err,fmt='-')
    plt.title("Simulated data with error bars",fontdict = font1)
    plt.xlabel('Wavelength (nm)',fontdict = font2)
    plt.ylabel('$rho_t$', fontdict = font2)
    plt.ylim(0.0,0.4)

    #prepare jacobian
    this_jac=jac_all[idx]
    jac=this_jac.transpose()

    #plot jacobians
    plt.subplot(2, 2, 2)
    plt.plot(waveln,this_jac[:,0],'-',label='WS')
    plt.plot(waveln,this_jac[:,1],'-',label='RH')
    plt.plot(waveln,this_jac[:,2],'-',label='FMF')
    plt.plot(waveln,this_jac[:,3],'-',label='AOD')
    plt.plot(waveln,this_jac[:,4],'-',label='CHL')
    plt.plot(waveln,this_jac[:,5],'-',label='PF')
    plt.title("Jacobians",fontdict = font1)
    plt.xlabel('Wavelength (nm)',fontdict = font2)
    plt.ylabel('Parameter partial derivative', fontdict = font2)
    plt.ylim(-0.3,0.8)
    plt.legend()

    #calculate rodgers stuff
    S_hat, SIC, AvgK, DFS = rodgers(jac, err, ap)
    #print(df.iloc[idx])
    #print_out(S_hat, SIC, AvgK, DFS, jac, err, ap, 0, m_len, n_len, 0 )

    #calculate detection probability metrics
    inc=np.arange(0, 10000, 1)
    val=inc/10000
    det_prob=np.arange(0, 10000, 1) / 10000
    for x in inc:
        Pd, Pd_pcnt_str = detect_prob(val[x], np.sqrt(S_hat[5,5]), doprint=0)
        det_prob[x] = Pd
        

    df.loc[idx]["plastic_uncertainty"]=np.sqrt(S_hat[5,5])  
       
    plt.subplot(2, 2, 3)
    plt.plot(val,det_prob)
    plt.xscale('log')
    plt.title("Plastic detection probability",fontdict = font1)
    plt.xlabel('Plastic fraction',fontdict = font2)
    plt.ylabel('Probability for plastic fraction', fontdict = font2)

    plt.subplot(2, 2, 4)
    plt.axis('off')

    txt='SQOOP simulation # '+str(idx)
    plt.text(.05,0.95,txt,horizontalalignment='left',
         verticalalignment='center',fontdict=font3)
    plt.text(.05,0.55,str(df.loc[idx]),horizontalalignment='left',
         verticalalignment='center',fontdict=font3)    
    
    outname='figures/SQOOP'+str(idx)+'.png'
    plt.savefig(outname)
    plt.close()



0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27