#Mat_hanlder.py

The `mat_handler.py` module contains `matrix` class, which is the backbone of `pyemu`.  The `matrix` class overloads all common mathematical operators and also uses an "auto-align" functionality to line up matrix objects for multiplication, addition, etc. 



In [1]:
from __future__ import print_function
import os
import numpy as np
from mat_handler import matrix, cov

Here is the most basic instantiation of the `matrix` class:

In [2]:
m = matrix()

Here we will generate a `matrix` object with a random ndarray

In [3]:
a = np.random.random((5, 5))
row_names = []
[row_names.append("row_{0:02d}".format(i)) for i in range(5)]
col_names = []
[col_names.append("col_{0:02d}".format(i)) for i in range(5)]
m = matrix(x=a, row_names=row_names, col_names=col_names)
print(m)

row names: ['row_00', 'row_01', 'row_02', 'row_03', 'row_04']
col names: ['col_00', 'col_01', 'col_02', 'col_03', 'col_04']
[[ 0.3891801   0.21358206  0.28282072  0.96567931  0.91630759]
 [ 0.61026065  0.34818247  0.10960376  0.38404832  0.43975549]
 [ 0.57480228  0.74114482  0.64707643  0.8269736   0.25457062]
 [ 0.61926554  0.2221458   0.19403584  0.0922503   0.89717117]
 [ 0.36093455  0.90270032  0.73479865  0.43622756  0.32514638]]


#File I/O with `matrix`
`matrix` supports several PEST-compatible I/O routines as well as some others:

In [4]:
ascii_name = "mat_test.mat"
m.to_ascii(ascii_name)
m2 = matrix()
m2.from_ascii(ascii_name)
print(m2)

row names: ['row_00', 'row_01', 'row_02', 'row_03', 'row_04']
col names: ['col_00', 'col_01', 'col_02', 'col_03', 'col_04']
[[ 0.3891801   0.21358206  0.28282072  0.96567931  0.91630759]
 [ 0.61026065  0.34818247  0.10960376  0.38404832  0.43975549]
 [ 0.57480228  0.74114482  0.64707643  0.8269736   0.25457062]
 [ 0.61926554  0.2221458   0.19403584  0.0922503   0.89717117]
 [ 0.36093455  0.90270032  0.73479865  0.43622756  0.32514638]]


In [5]:
bin_name = "mat_test.bin"
m.to_binary(bin_name)
m3 = matrix()
m3.from_binary(bin_name)
print(m3)

row names: [u'row_00', u'row_01', u'row_02', u'row_03', u'row_04']
col names: [u'col_00', u'col_01', u'col_02', u'col_03', u'col_04']
[[ 0.3891801   0.21358206  0.28282072  0.96567931  0.91630759]
 [ 0.61026065  0.34818247  0.10960376  0.38404832  0.43975549]
 [ 0.57480228  0.74114482  0.64707643  0.8269736   0.25457062]
 [ 0.61926554  0.2221458   0.19403584  0.0922503   0.89717117]
 [ 0.36093455  0.90270032  0.73479865  0.43622756  0.32514638]]


`matrix` also implements a `to_dataframe()` and a `to_sparse`, which return `pandas dataframe` and a `scipy.sparse` (compressed sparse row) objects, respectively:

In [6]:
print(type(m.to_dataframe()))
print(type(m.to_sparse()))
m.to_dataframe() #looks really nice in the notebook!

<class 'pandas.core.frame.DataFrame'>
<class 'scipy.sparse.csr.csr_matrix'>


Unnamed: 0,col_00,col_01,col_02,col_03,col_04
row_00,0.38918,0.213582,0.282821,0.965679,0.916308
row_01,0.610261,0.348182,0.109604,0.384048,0.439755
row_02,0.574802,0.741145,0.647076,0.826974,0.254571
row_03,0.619266,0.222146,0.194036,0.09225,0.897171
row_04,0.360935,0.9027,0.734799,0.436228,0.325146


#Convience methods of `matrix`

several cool things are implemented in `matrix` and accessed through `@property` decorated methods.  For example, the SVD components of a `matrix` object are simply accessed by name.  The SVD routine is called on demand and the components are cast to `matrix` objects, all opaque to the user:

In [7]:
print(m.s) #the singular values of m cast into a matrix object.  the SVD() is called on demand...
m.s.to_ascii("test_sv.mat") #save the singular values to a PEST-compatible ASCII file

row names: ['sing_val_1', 'sing_val_2', 'sing_val_3', 'sing_val_4', 'sing_val_5']
col names: ['sing_val_1', 'sing_val_2', 'sing_val_3', 'sing_val_4', 'sing_val_5']
[[ 2.55222664]
 [ 0.98224359]
 [ 0.62997123]
 [ 0.35092161]
 [ 0.09582257]]


In [8]:
m.v.to_ascii("test_v.mat") #the right singular vectors of m.
m.u.to_dataframe()# a data frame of the left singular vectors of m

Unnamed: 0,left_sing_vec_1,left_sing_vec_2,left_sing_vec_3,left_sing_vec_4,left_sing_vec_5
row_00,-0.506929,-0.445779,0.658687,-0.301888,0.138922
row_01,-0.337476,-0.172843,-0.179034,0.698265,0.580179
row_02,-0.529925,0.434585,0.187757,0.387544,-0.587259
row_03,-0.357159,-0.547851,-0.633476,-0.150676,-0.3851
row_04,-0.469841,0.531415,-0.312303,-0.498393,0.388482


The matrix inverse operation is accessed the same way, but requires a square matrix:

In [9]:
m.inv.to_dataframe()

Unnamed: 0,col_00,col_01,col_02,col_03,col_04
row_00,-1.299805,-0.084516,2.475368,1.405178,-2.038017
row_01,0.478852,3.847852,-3.762376,-2.469871,3.207165
row_02,-0.855937,-5.063198,3.998166,2.887513,-1.837752
row_03,0.875066,0.526934,0.245296,-1.097845,-0.341515
row_04,0.873754,0.146444,-1.66694,0.244645,1.045186


#Manipulating `matrix` shape
`matrix` has lots of functionality to support getting submatrices by row and col names:

In [10]:

print(m.get(row_names="row_00",col_names=["col_01","col_03"]))

row names: ['row_00']
col names: ['col_01', 'col_03']
[[ 0.21358206  0.96567931]]


`extract()` calls `get()` then `drop()`:

In [11]:
from copy import deepcopy
m_copy = deepcopy(m)
sub_m = m_copy.extract(row_names="row_00",col_names=["col_01","col_03"])
m_copy.to_dataframe()
sub_m.to_dataframe()

Unnamed: 0,col_01,col_03
row_00,0.213582,0.965679


#Operator overloading
The operator overloading uses the auto-align functionality as well as the `isdiagonal` flag for super easy linear algebra.  The "inner join" of the two objects is found and the rows and cols are aligned appropriately:

In [12]:
#a new matrix object that is not "aligned" with m
row_names = ["row_03","row_02","row_00"]
col_names = ["col_01","col_10","col_100"]
m_mix = matrix(x=np.random.random((3,3)),row_names=row_names,col_names=col_names)
m_mix.to_dataframe()


Unnamed: 0,col_01,col_10,col_100
row_03,0.45629,0.066816,0.572185
row_02,0.680062,0.867469,0.908227
row_00,0.272466,0.478904,0.344739


In [13]:
m.to_dataframe()

Unnamed: 0,col_00,col_01,col_02,col_03,col_04
row_00,0.38918,0.213582,0.282821,0.965679,0.916308
row_01,0.610261,0.348182,0.109604,0.384048,0.439755
row_02,0.574802,0.741145,0.647076,0.826974,0.254571
row_03,0.619266,0.222146,0.194036,0.09225,0.897171
row_04,0.360935,0.9027,0.734799,0.436228,0.325146


In [14]:
prod = m * m_mix.T
prod.to_dataframe()

Unnamed: 0,row_03,row_02,row_00
row_00,0.097455,0.145249,0.058194
row_01,0.158872,0.236786,0.094868
row_02,0.338177,0.504024,0.201937
row_03,0.101363,0.151073,0.060527
row_04,0.411893,0.613892,0.245955


In [15]:
prod2 = m_mix.T * m
prod2.to_dataframe()

Unnamed: 0,col_00,col_01,col_02,col_03,col_04
col_01,0.779504,0.663581,0.605648,0.867601,0.832157
col_10,0.72638,0.760048,0.709727,1.186005,0.7196
col_100,1.010552,0.873867,0.796217,1.136772,1.060444


In [16]:
(m_mix + m).to_dataframe()

Unnamed: 0,col_01
row_03,0.678436
row_02,1.421206
row_00,0.486048


#The `cov` derived type
The `cov` type of `mat_handler` is designed specifically to handle covariance matrices.  It makes some assumptions, such as the symmetry (and accordingly that row_names == col_names). 

In [17]:
c = cov(m.newx,m.row_names)

The `cov` class supports several additional I/O routines, including the PEST uncertainty file (.unc):

In [18]:
c.to_uncfile("test.unc")

In [19]:
c1 = cov()
c1.from_uncfile("test.unc")
print(c1)

row names: ['row_00', 'row_01', 'row_02', 'row_03', 'row_04']
col names: ['row_00', 'row_01', 'row_02', 'row_03', 'row_04']
[[ 0.3891801   0.21358206  0.28282072  0.96567931  0.91630759]
 [ 0.61026065  0.34818247  0.10960376  0.38404832  0.43975549]
 [ 0.57480228  0.74114482  0.64707643  0.8269736   0.25457062]
 [ 0.61926554  0.2221458   0.19403584  0.0922503   0.89717117]
 [ 0.36093455  0.90270032  0.73479865  0.43622756  0.32514638]]


We can also build `cov` objects implied by pest control file parameter bounds or observation weights:

In [20]:
parcov = cov()
parcov.from_parbounds(os.path.join("henry","pest.pst"))
obscov = cov()
obscov.from_obsweights(os.path.join("henry","pest.pst"))

In [21]:
parcov.to_dataframe() #to_dataframe for diagonal types builds a full matrix dataframe - can be costly

Unnamed: 0,global_k,mult1,mult2,kr01c01,kr01c02,kr01c03,kr01c04,kr01c05,kr01c06,kr01c07,...,kr10c51,kr10c52,kr10c53,kr10c54,kr10c55,kr10c56,kr10c57,kr10c58,kr10c59,kr10c60
global_k,0.003076,0.000000,0.000000,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
mult1,0.000000,0.003076,0.000000,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
mult2,0.000000,0.000000,0.022655,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c01,0.000000,0.000000,0.000000,0.25,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c02,0.000000,0.000000,0.000000,0.00,0.25,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c03,0.000000,0.000000,0.000000,0.00,0.00,0.25,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c04,0.000000,0.000000,0.000000,0.00,0.00,0.00,0.25,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c05,0.000000,0.000000,0.000000,0.00,0.00,0.00,0.00,0.25,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c06,0.000000,0.000000,0.000000,0.00,0.00,0.00,0.00,0.00,0.25,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c07,0.000000,0.000000,0.000000,0.00,0.00,0.00,0.00,0.00,0.00,0.25,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00


In [22]:
obscov.to_dataframe()# notice the zero-weight obs have been assigned a really large uncertainty

Unnamed: 0,h_obs01_1,h_obs01_2,h_obs02_1,h_obs02_2,h_obs03_1,h_obs03_2,h_obs04_1,h_obs04_2,h_obs05_1,h_obs05_2,...,c_obs12_2,c_obs13_1,c_obs13_2,c_obs14_1,c_obs14_2,c_obs15_1,c_obs15_2,pd_one,pd_ten,pd_half
h_obs01_1,0.000043,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs01_2,0.000000,1.000000e+60,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs02_1,0.000000,0.000000e+00,0.000043,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs02_2,0.000000,0.000000e+00,0.000000,1.000000e+60,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs03_1,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000043,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs03_2,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,1.000000e+60,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs04_1,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000043,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs04_2,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,1.000000e+60,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs05_1,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000043,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs05_2,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,1.000000e+60,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
