In [10]:
import numpy as np
from pyearth import Earth
import pandas as pd

In [56]:
def find_nearest(array, value):
    """
    find the index of an item in an array that is closest to a given value

    Args:
        array: numpy array
        value: int or float

    Returns:
        int

    """

    idx = (np.abs(array - value)).argmin()
    return idx


# load .txt data
def load_data(data_filename, startnm, endnm, starttime):
    """
    load matrix data

    Args:
        data_filename: string

    Returns:
        data_nm: an array of wavelengths, numpy array
        data_time: an array of time-delay, numpy array
        data_z: a 2-way matrix, numpy matrix

    """
    assert type(data_filename) == str, 'TypeError'
    assert type(startnm) == float or int, 'TypeError'
    assert type(endnm) == float or int, 'TypeError'
    assert type(starttime) == float or int, 'TypeError'
    
    
    data = np.genfromtxt(data_filename, delimiter='\t')
    data_nm = data[1:, 0]
    data_time = data[0, 1:]
    data_z = data[1:, 1:]
    
    assert starttime >= data_time[0] or data_time[-1], 'ValueError'
    assert startnm >= data_nm[0], 'ValueError'
    assert endnm <= data_nm[-1], 'ValueError'
    
    # trim wavelength array based on starting and ending wavlenth
    data_nm_use = data_nm[find_nearest(
        data_nm, startnm):find_nearest(data_nm, endnm)]

    # trim time array based on starting time
    data_time_use = data_time[find_nearest(data_time, starttime):]

    # trim data matrix accordingly
    data_z_use = data_z[find_nearest(data_nm, startnm):
                        find_nearest(data_nm, endnm),
                        find_nearest(data_time, starttime):]

    return data_nm_use, data_time_use, data_z_use


# load .csv data
def load_data_csv(data_filename, startnm, endnm, starttime):
    """load matrix data"""
    assert type(data_filename) == str, 'TypeError'
    assert type(startnm) == float or int, 'TypeError'
    assert type(endnm) == float or int, 'TypeError'
    assert type(starttime) == float or int, 'TypeError'
    
    data = np.genfromtxt(data_filename, delimiter=',', skip_footer=20)
    data_nm = np.nan_to_num(data[1:, 0])  # wavelength in nm
    data_time = np.nan_to_num(data[0, 1:])
    data_z = np.nan_to_num(data[1:, 1:])

    assert starttime >= data_time[0] or data_time[-1], 'ValueError'
    assert startnm >= data_nm[0], 'ValueError'
    assert endnm <= data_nm[-1], 'ValueError'
    
    # trim wavelength array based on starting and ending wavlenth
    data_nm_use = data_nm[find_nearest(data_nm, startnm):
                          find_nearest(data_nm, endnm)]

    # trim time array based on starting time
    data_time_use = data_time[find_nearest(data_time, starttime):]

    # trim data matrix accordingly
    data_z_use = data_z[find_nearest(data_nm, startnm):
                        find_nearest(data_nm, endnm),
                        find_nearest(data_time, starttime):]

    return data_nm_use, data_time_use, data_z_use


In [57]:
def test_load_data_csv():
    file_1 = 1
    try:
        load_data_csv(file_1, 900, 1400, 1)
    except AssertionError:
        pass
    else:
        'TypeError not handled'
    
    file_2 = 'test.csv'
    try:
        load_data_csv(file_2, '1', 1400, 1)
    except TypeError:
        pass
    else:
        'TypeError not handled'
        
    try:
        load_data_csv(file_2, 700, '5', 1)
    except AssertionError:
        pass
    else:
        'TypeError not handled'
        
    try:
        load_data_csv(file_2, 700, 1400, '1')
    except TypeError:
        pass
    else:
        'TypeError not handled'
        
        
    try:
        load_data_csv(file_2, 700, 1400, 1)
    except AssertionError:
        pass
    else:
        'ValueError not handled'
    
    try:
        load_data_csv(file_2, 900, 1800, 1)
    except AssertionError:
        pass
    else:
        'ValueError not handled'

    try:
        load_data_csv(file_2, 900, 1400, -1)
    except AssertionError:
        pass
    else:
        'ValueError not handled'
    
    return

In [58]:
test_load_data_csv()

In [59]:
def test_load_data():
    file_1 = 1
    try:
        load_data(file_1, 900, 1400, 1)
    except AssertionError:
        pass
    else:
        'TypeError not handled'
    
    file_2 = 'test.txt'
    try:
        load_data(file_2, '1', 1400, 1)
    except TypeError:
        pass
    else:
        'TypeError not handled'
        
    try:
        load_data(file_2, 700, '5', 1)
    except AssertionError:
        pass
    else:
        'TypeError not handled'
        
    try:
        load_data(file_2, 700, 1400, '1')
    except TypeError:
        pass
    else:
        'TypeError not handled'
        
        
    try:
        load_data(file_2, 700, 1400, 1)
    except AssertionError:
        pass
    else:
        'ValueError not handled'
    
    try:
        load_data(file_2, 900, 1800, 1)
    except AssertionError:
        pass
    else:
        'ValueError not handled'

    try:
        load_data(file_2, 900, 1400, -1)
    except AssertionError:
        pass
    else:
        'ValueError not handled'
    
    return

In [60]:
test_load_data()