# unit testing

In [1]:
from data_book import DataBook, v_norm_formula, col_names
import pandas as pd
import numpy as np

In [2]:
# test data
test_df = pd.DataFrame({'numRow':[1,2,3,4,5], 
                        'numCol':[2,2,2,2,2], 
                        'sheetName':['sheet1', 'sheet1', 'sheet1', 'sheet1', 'sheet1'], 
                       'cellFormula':[np.NaN, 'Z2/K2', 'Z3/K3', 'Z4+K4', 'SUM(B2:B4)'],
                       'cellValue':[np.NaN, '2', '3', '4.0', '9.0'],
                       'cellType':[np.NaN, 'int', 'int', 'float', 'float'],
                       'vNormFormula':[np.NaN, 'Z*/K*', 'Z*/K*', 'Z*+K*', 'SUM(B*:B*)']},
                       index=['sheet1!B1', 'sheet1!B2', 'sheet1!B3', 'sheet1!B4', 'sheet1!B5'])

In [3]:
db=DataBook()
db.load_data(test_df)
db.pre_process_data()

In [4]:
df=db.get_data()
assert all(df.index==['sheet1!B2', 'sheet1!B3', 'sheet1!B4', 'sheet1!B5'])

In [5]:
db._add_negative_cases(keys=['sheet1!B3'])

In [6]:
df.loc['sheet1!B3']

Label                            False
up1_isBlank                      False
up1_isFormula                     True
up1_isSameType                    True
up1_isWeaklyFormulaConsistent     True
up2_isWeaklyFormulaConsistent    False
dw1_isBlank                      False
dw1_isFormula                     True
dw1_isSameType                   False
dw1_isWeaklyFormulaConsistent    False
dw2_isWeaklyFormulaConsistent    False
nb1_isWeaklyFormulaConsistent    False
dw1_isSum                        False
Name: sheet1!B3, dtype: bool

In [9]:
df=db.get_data()
assert len(df.loc['sheet1!B3'])==2
assert len(df.loc['sheet1!B3','Label'].unique())==2
assert len(df.loc['sheet1!B3','dw1_isWeaklyFormulaConsistent'].unique())==1
assert len(df.loc['sheet1!B3','up1_isWeaklyFormulaConsistent'].unique())==2

In [10]:
# v_norm_formula
assert (v_norm_formula("SUM(D1:D123)") == 'SUM(D*:D*)')

In [11]:
# col_names
assert col_names()[0]=='na'
assert col_names()[2]=='B'
assert col_names()[28]=='AB'

In [12]:
# _get_v_cell_ref
assert (db._get_v_cell_ref(10, 1, 2, 'sheet')=='sheet!A12')
assert (db._get_v_cell_ref(1, 1, -1, 'sheet') is None)

In [13]:
# get_that_from_this
assert (db._get_that_from_this(test_df.loc['sheet1!B2'], -1).name=='sheet1!B1')
assert (db._get_that_from_this(test_df.loc['sheet1!B1'], 1).name=='sheet1!B2')
assert (db._get_that_from_this(test_df.loc['sheet1!B1'], -1)==None)

In [14]:
# isBlank
assert(db._isBlank(None, None))
assert(db._isBlank(None, test_df.iloc[0]))
assert(not db._isBlank(None, test_df.iloc[1]))

In [15]:
# isFormula
assert(not db._isFormula(None, None))
assert(not db._isFormula(None, test_df.iloc[0]))
assert(db._isFormula(None, test_df.iloc[1]))

In [16]:
# isSameType
assert (not db._isSameType(None, test_df.iloc[0]))
assert (not db._isSameType(test_df.iloc[0], None))
assert (db._isSameType(test_df.iloc[1], test_df.iloc[2]))
assert (not db._isSameType(test_df.iloc[1], test_df.iloc[3]))

In [17]:
# isWeaklyFormulaConsistent
assert (db._isWeaklyFormulaConsistent(None, test_df.iloc[1]))
assert (db._isWeaklyFormulaConsistent(test_df.iloc[1], None))
assert (db._isWeaklyFormulaConsistent(test_df.iloc[1], test_df.iloc[2]))
assert (not db._isWeaklyFormulaConsistent(test_df.iloc[1], test_df.iloc[3]))

In [20]:
# isSum
assert (db._isSum(None, test_df.iloc[4]))
assert (not db._isSum(None, test_df.iloc[3]))