In [1]:
%load_ext autoreload
%pylab inline

import sys
sys.path.insert(0, '../')

import pandas as pd
import seaflux as sf

%autoreload 2

Populating the interactive namespace from numpy and matplotlib


# Overview

The limit checker is a wrapper function that checks the input of arguments.  
The arguments for the function and the kwargs in the unit checker must match.   

The function will return a new type that behaves like a `numpy.ndarray`, with the exception that masks are only ornamental.  
This means that arithmetic and ufuncs are applied to the masked data.  
This is different to masked_arrays which do not apply arithmetic and functions to the masked data, thus the user can still view the processed "bad" outputs

There is an meta-data additional element to the output. The examples below show the behaviour of this new class

This is still very buggy and needs a lot of tests, but I feel this will be a useful tool for different applications where a variable range sanity check should be performed (basically all of science)!

# Simple example

In [95]:
@sf.unit_checks.check_limits(a=[5, 10], b=[3, 8])
def add(a, b):
    return a+b

In [96]:
a = add([6, 3, 5], [4, 3, 9])
a

meta_array(data=[10, --, --],
           mask=[False,  True,  True],
           meta=['', 'add: a < 5; ', 'add: b > 8; '],
     fill_value=999999)

In [97]:
a.data

array([10,  6, 14])

In [98]:
a.mask

array([False,  True,  True])

In [99]:
a.meta

array(['', 'add: a < 5; ', 'add: b > 8; '], dtype=object)

In [100]:
pd.DataFrame(a.to_dict())

Unnamed: 0,data,mask,meta
0,10,False,
1,6,True,add: a < 5;
2,14,True,add: b > 8;


## Mask Behaviour
Operations are not masked by the mask (different to `numpy.masked_arrays`)

In [102]:
a + 3

meta_array(data=[13, --, --],
           mask=[False,  True,  True],
           meta=['', 'add: a < 5; ', 'add: b > 8; '],
     fill_value=999999)

In [103]:
(a + 3).data

array([13,  9, 17])

In [104]:
(a.view(ma.masked_array) + 3).data

array([13,  6, 14])

## Meta behaviour
Meta data are added when any operation is performed.  
Slight bug in that duplicates are not removed

In [118]:
b = add([8, 3, 5], [5, 2, 6])
b

meta_array(data=[13, --, 11],
           mask=[False,  True, False],
           meta=['', 'add: a < 5; add: b < 3; ', ''],
     fill_value=999999)

In [119]:
a * b

meta_array(data=[130, --, --],
           mask=[False,  True,  True],
           meta=['', 'add: a < 5; add: b < 3; add: a < 5; ',
                 'add: b > 8; '],
     fill_value=999999)

# Seaflux example

In [120]:
n = 10
fco2 = np.random.normal(200, 100, size=10)
sst = np.random.normal(10, 10, size=10)
pres = np.random.normal(1000, 150, size=10)

output = sf.fCO2_to_pCO2(fco2, sst, pres)

output

meta_array(data=[46.68377573273359, --, 85.22136749260231,
                 500.83063909683716, --, 82.1118217718081, --, --, --,
                 246.65664858060794],
           mask=[False,  True, False, False,  True, False,  True,  True,
                  True, False],
           meta=['', 'fCO2_to_pCO2: pres_hPa > 1200; ', '', '',
                 'virial_coeff: temp_K < 271.15; fCO2_to_pCO2: tempSW_C < -2; ',
                 '', 'virial_coeff: xCO2_mol < 5e-05; ',
                 'virial_coeff: temp_K < 271.15; fCO2_to_pCO2: tempSW_C < -2; ',
                 'virial_coeff: xCO2_mol < 5e-05; ', ''],
     fill_value=1e+20)

In [121]:
pd.DataFrame(output.to_dict())

Unnamed: 0,data,mask,meta
0,46.683776,False,
1,232.46917,True,fCO2_to_pCO2: pres_hPa > 1200;
2,85.221367,False,
3,500.830639,False,
4,266.470041,True,virial_coeff: temp_K < 271.15; fCO2_to_pCO2: t...
5,82.111822,False,
6,44.023839,True,virial_coeff: xCO2_mol < 5e-05;
7,312.971045,True,virial_coeff: temp_K < 271.15; fCO2_to_pCO2: t...
8,20.030579,True,virial_coeff: xCO2_mol < 5e-05;
9,246.656649,False,
