In [57]:
import pathlib
import os
from scipy.io import loadmat

import pandas as pd
import numpy as np

from seaborn import heatmap
import matplotlib.pyplot as pl

From the doc:
<br>
<u>group</u>: Denotes a grouping based on a ph (440) values (12 groups)
<br>
<u>class</u>: Denotes a spectral shape grouping (4 groups)

* atot – total absorption coefficient (m -1 )
* ad – detrital absorption coefficient (m -1 )
* ag – CDOM absorption coefficient (m -1 )
* aph – phytoplankton absorption coefficient (m -1 )
* bbtot – total backscattering coefficient (m -1 )
* bbdm – detrital backscattering coefficient (m -1 )
* bbph – phytoplankton backscattering coefficient (m -1 )

In [59]:
def make_matfiles_list(path_string):
    path = pathlib.Path(path_string)
    return [file.as_posix() for file in path.glob('*.mat')]

In [157]:
def extract_data_from(this, wave_prefix):
    """Extracts matlab arrays from .mat"""
    group, class_, wave, data = this
    group = group.squeeze()
    class_ = class_.squeeze()
    wave = wave.squeeze()
    cols = ['group', 'class'] + ['%s_%d' %(wave_prefix, w) for w in wave]
    df = pd.DataFrame(np.c_[group, class_, data], columns=cols)
    return df

In [171]:
def read_files(matfiles_list):
    """This function reads Lt data, categorized by aerosol optical index tau.
        The data is then put into a pandas dataframe and written into the picklejar
        directory."""
    for file in matfiles_list:
        print('processing %s' % file)
        basename = os.path.basename(file).split('.')[0]
        mat_ = loadmat(file)
        if 'tau' in basename:
            df_t = extract_data_from(mat_[basename][0][0], wave_prefix='Lt')
            df_t.to_pickle('./pickleJar/SyntheticDataSets/df_%s.pkl' % basename)
        elif 'IOP' in basename:
            mat_keys = [k for k in mat_.keys() if '__' not in k]
            for key in mat_keys:
                df_iop = extract_data_from(mat_[key][0, 0], wave_prefix=key)
                df_iop.to_pickle('./pickleJar/SyntheticDataSets/df_%s.pkl' %key)
        else:
            print('cannot process file %s' %file)
            

In [172]:
p_str = './data/synthetic_data/'
matfiles = make_matfiles_list(p_str)

In [173]:
matfiles

['data/synthetic_data/tau_05.mat',
 'data/synthetic_data/tau_03.mat',
 'data/synthetic_data/tau_08.mat',
 'data/synthetic_data/tau_01.mat',
 'data/synthetic_data/synthIOPs.mat']

In [174]:
read_files(matfiles)

processing data/synthetic_data/tau_05.mat
processing data/synthetic_data/tau_03.mat
processing data/synthetic_data/tau_08.mat
processing data/synthetic_data/tau_01.mat
processing data/synthetic_data/synthIOPs.mat
