In [1]:
import pandas as pd
import numpy as np
import csv
import os
from pandas import ExcelWriter
from GLSettingsByState import GLTaxSettingsByState

In [2]:
def excelSave(df_dict):    
    def write_df(dataframe, df_key):
        if dataframe is None:
            print("error w/ df")
        writer = ExcelWriter(df_key + ".xlsx", engine="xlsxwriter")
        dataframe.to_excel(writer, sheet_name=df_key + "_")
        writer.save()
    if isinstance(df_dict, type(dict())):
        for df_key in df_dict.keys():
            write_df(df_dict.get(df_key, None), df_key)
            
    
def currency_to_float(curr_str):
    """
    input -> currency: str
    output -> currency value: float
    
    Converts str of currency into a float.
    Will be used to apply to columns in DF.
    Note on behavior of round() from pyton docs:-
        'The behavior of round() for floats can be surprising: for example, 
        round(2.675, 2) gives 2.67 instead of the expected 2.68. 
        This is not a bug.'

    
    $1 -> 1.0
    ($1) -> -1.0
    $0 -> 0.00
    $50.50 -> 50.50
    $52,555.87 -> 52555.87
    float('nan') - > 0.0
    525.00 -> 525.00
    """
    # checks if val is already a float, if it is a nan float return 0.0 
    # otherwise return float val
    if isinstance(curr_str, float): 
        if isnan(curr_str): 
            return 0.0
        else: 
            return curr_str
    
    # check if str value is not valid, if so return 0
    if len(curr_str) < 1: 
        # print('Len less than 1: {}'.format(curr_str))
        return 0.0
    
    float_str = ''
    neg_val = False
    # check for negative value
    if '(' in curr_str : neg_val = True 
    # iterate over string, remove unwanted charachters
    for char in curr_str:
        if char in ['(', ')', '$', ',']:
            continue
        else:
            float_str+= char
    
    #print('str check:', float_str)
    # See notes on round() func behavior
    if neg_val == True:
        return float(float_str) * -1
    else:
        return float(float_str)

In [3]:
__author__ = 'nabeelh-dev'

class TaxRates(object):
    """
    Reads tax rate data per Alavara.com
    Dict will be contained in self.tax_rates.
    Will be able to query by zipcode.

    e.g
    TaxRates.query_by_zipcode('99501')
    >>>{
        "region_name": "ALASKA STATE",
        "state_rate" : 0.000000,
        "est_combined_rate": 0.000000,
        "est_country_rate": 0.000000,
        "est_city_rate": 0.000000,
        "est_special_rate": 0.000000,
        "risk_level": 1,
        "zipcode": "99501",
        "state": "AK"
        }
    """
    def __init__(self, csv_path):
        self.filepath = csv_path
        self.tax_rates = self.parse_csv()

    def parse_csv(self):
        """
        Takes str value for directory stored in self.filepath and will process
        all tax rate csv files within. Will return a dict that will be set to
        TaxRates objects self.tax_rates so we can query by zipcode.
        self.tax_rates will contain a dict that has two main keys:
        'state_tax_rates': stores all State -> Zipcode -> TaxRates dict
        'zipcode_to_state': will be a list of lists, sorted by all states zipcode ranges

        zipcode_to_states list will be used for fast querying when looking for zipcodes tax rates.
        It will quickly indicate the state we will need to look inside to find the zipcode tax rates,
        isntead of looping through state_tax_rates dict which would be inefficient.

        CSV file names to parse are in this format: TAXRATES_ZIP5_AK201901.csv
        We will want to extract the state name from the filename.

        e.g states_tax_rates:
        {
            'state_tax_rates': {
                                'AK': {
                                        '99501': {
                                                "region_name": "ALASKA STATE",
                                                "state_rate" : 0.000000,
                                                "est_combined_rate": 0.000000,
                                                "est_country_rate": 0.000000,
                                                "est_city_rate": 0.000000,
                                                "est_special_rate": 0.000000,
                                                "risk_level": 1
                                                },
                                        },
                                },
            'zipcode_to_state': {
                                '82397': 'WY',
                                '83414': 'WY',
                                '89001': 'NV',
                                '89883': 'NV',
                                '99501': 'AK',
                                '99950': 'AK',
                                }
        }

        :param: self.filepath: str
        :return: states_tax_rates: dict
        """
        states_tax_rates = dict()
        zipcode_to_state = dict()

        # directory input, process all .csv contained within directory
        dirpath = os.path.abspath('')
        csv_dp = os.path.join(dirpath, self.filepath)
        for filename in os.listdir(csv_dp):
            # splits filename to identify state: TAXRATES_ZIP5_AK201901.csv
            csv_state_abbr = filename.split("ZIP5_")[1][:2]
            if filename.endswith(".csv"):
                state_tax_csv = os.path.join(csv_dp, filename)
                # Read and get data from csv
                state_tax_data = TaxRates._parse_single_csv(state_tax_csv)
                states_tax_rates[csv_state_abbr] = state_tax_data['zipcode_rates']

        # Create and return a dict mapping zipcode to state
        # This reverse lookup is used by self.query_by_zipcode()
        for state, zipcode_tax_rates in states_tax_rates.items():
            for zipcode, tax_rates in zipcode_tax_rates.items():
                zipcode_to_state[zipcode] = state

        print("Zipcode Tax Rates loaded successfully.")
        return {
                "state_tax_rates": states_tax_rates,
                "zipcode_to_state": zipcode_to_state
                }

    @staticmethod
    def _parse_single_csv(csv_fp):
        """
        Parses csv at file path. Collects and stores all zipcode data as a dict.
        When being read by csv.reader, each row will be a list as follows:
        ['WY', '83118', 'LINCOLN COUNTY', '0.040000', '0.050000', '0.010000', '0', '0', '1']
        Data will be converted to a dict.

        Will also keep track of zipcode values and return
        the 'lowest' zipcode value and highest in the csv file, under the key 'low_high_zipcode'.
        This will be used to help to make query_by_zipcode class function perform faster.
        Will take advantage of the fact that the csv files are all in zipcode value descending order.

        e.g
        {
            'zipcode_rates' : {
                                '83118': {
                                        "region_name": "LINCOLN COUNTY",
                                        "state_rate" : 0.040000,
                                        "est_combined_rate": 0.050000,
                                        "est_country_rate": 0.010000,
                                        "est_city_rate": 0.000000,
                                        "est_special_rate": 0.000000,
                                        "risk_level": 1
                                        },
                                },
            'low_high_zipcode' : [82001, 83414]

        :param csv_fp: os path object
        :return: state_dict: dict
        """
        zipcode_rates = dict()
        with open(csv_fp) as csv_file:
            #print("Opened Tax CSV Successfully: {}".format(str(csv_fp)))
            csv_reader = csv.reader(csv_file)
            # Skip the first line of csv file due to header
            ## Keep header information in the future??
            for i in range(0, 1, 1):
                next(csv_reader, None)

            for zipcode_tax in csv_reader:
                zipcode = zipcode_tax[1]
                region_name = zipcode_tax[2]
                state_rate = float(zipcode_tax[3])
                est_combined_rate = float(zipcode_tax[4])
                est_country_rate = float(zipcode_tax[5])
                est_city_rate = float(zipcode_tax[6])
                est_special_rate = float(zipcode_tax[7])
                risk_level = int(zipcode_tax[8])

                zipcode_rates[zipcode] = {
                                        "region_name": region_name,
                                        "state_rate" : state_rate,
                                        "est_combined_rate": est_combined_rate,
                                        "est_country_rate": est_country_rate,
                                        "est_city_rate": est_city_rate,
                                        "est_special_rate": est_special_rate,
                                        "risk_level": risk_level
                                        }

        return {
                'zipcode_rates': zipcode_rates
                }

    def query_by_zipcode(self, zipcode_str):
        """
        Given a zipcode string, retrieve tax rates for that zipcode.
        First accesses 'zipcodes_to_state' dict contained in self.tax_rates to get
        the State the zipcode is in.
        If zipcode_str does not exist, then print error and return None.

        If it exists then uses State key and Zipcode key to obtain correct tax rates from
        'state_tax_rates' dict in self.tax_rates

        :param zipcode_str: str
        :return: zipcode_tax_rates: dict
        """

        # dict get method returns None if key is not found
        query_state = self.tax_rates['zipcode_to_state'].get(zipcode_str)
        if query_state is None:
            print("{} - zipcode not found!".format(zipcode_str))
            return None
        query_results = self.tax_rates['state_tax_rates'][query_state][zipcode_str]
        query_results['zipcode'] = zipcode_str
        query_results['state'] = str(query_state)
        return query_results

In [4]:
taxRates = TaxRates("TAXRATES_ZIP5/")
taxRates.query_by_zipcode("90247")

Zipcode Tax Rates loaded successfully.


{'est_city_rate': 0.0,
 'est_combined_rate': 0.095,
 'est_country_rate': 0.0025,
 'est_special_rate': 0.0325,
 'region_name': 'GARDENA',
 'risk_level': 1,
 'state': 'CA',
 'state_rate': 0.06,
 'zipcode': '90247'}

In [5]:
with open("REGION_TO_ZIP.csv") as csv_file:
    df = pd.read_csv(csv_file, delimiter=",").set_index("REGION")
    regionToZip = df.to_dict('index')

In [6]:
folder_dir = "test_NAPGLDATA"

df_list = list()
for file in os.listdir(folder_dir):
    if file.endswith(".csv"):
        fp_ = os.path.join(folder_dir, file)
        with open(fp_) as csv_file:
            for i in range(0, 5):
                next(csv_file, i)
            df_list.append(pd.read_csv(csv_file, delimiter=","))
        
core_df = pd.concat(df_list, axis=0, ignore_index=True)

In [7]:
"""
with open("test_NAPGLDATA/CORE_12-18.csv") as csv_file:
    for i in range(0, 5):
        next(csv_file, i)
    core_df = pd.read_csv(csv_file, delimiter=",")
"""

nap_csv_colMap = {
                    'Segment3': 'section',
                    'Segment4': 'area',
                    'Segment5': 'region',
                    'Account Description': 'glAcctDesc',
                    'Record Type::Number': 'Record Type::Number',
                    'CM Trx Type': 'refNum',
                    'TRX Timestamp Date': 'trxDate',
                    'GL Posting Date': 'glDate',
                    'Description': 'trxDesc',
                    'Main Account Segment': 'glAcct',
                    'PaidToRcvd From': 'paidToRcvd',
                    'TRX Amount': 'trxAmount',
                    'Originating Debit Amount': 'debitAmt',
                    'Originating Credit Amount': 'creditAmt'
                    }

core_df.rename(mapper=nap_csv_colMap, inplace=True, axis=1)

In [8]:
core_df["trxAmount"] = core_df["trxAmount"].apply(lambda x: currency_to_float(x))
core_df["debitAmt"] = core_df["debitAmt"].apply(lambda x: currency_to_float(x))
core_df["creditAmt"] = core_df["creditAmt"].apply(lambda x: currency_to_float(x))
core_df["glAcct"] = core_df["glAcct"].astype(np.int64)

In [9]:
taxRates.query_by_zipcode("92040")

{'est_city_rate': 0.0,
 'est_combined_rate': 0.0775,
 'est_country_rate': 0.0025,
 'est_special_rate': 0.015,
 'region_name': 'SAN DIEGO COUNTY',
 'risk_level': 1,
 'state': 'CA',
 'state_rate': 0.06,
 'zipcode': '92040'}

In [10]:
regionToZip[234]

{'CITY': 'Lakeside',
 'CNTCPRSN': 'Kriste Redman',
 'CUSTNAME': 'REGION 234',
 'SAR': '11-V-234',
 'STATE': 'CA',
 'ZIP': 92040.0}

In [11]:
core_df.shape

(12216, 17)

In [12]:
core_df.head(1)

Unnamed: 0,Record Type::Number,refNum,trxDate,glDate,Trx Number,paidToRcvd,trxDesc,trxAmount,debitAmt,creditAmt,glAcct,Segment2,section,area,region,Segment6,glAcctDesc
0,20::8,Supplier Invoice,2/2/2019,12/9/2018,8,NOCRA,NOCRA Inv ADULT-03 Fall 2018 assignor and forf...,0.0,0.0,162.0,2010,0,0A11,Q,0,0,Accounts Payable/AP Trade


In [217]:
# get rid of rows that don't have a region value or have a value of 0
region_df = core_df.loc[core_df["region"] != 0]
# get rid of rows that have a 0 for Trx Amount, these are reversed or voided journal lines.
region_df = region_df.loc[region_df["trxAmount"] != 0.0]

region_df["state"] = pd.Series(region_df["region"].apply(lambda x: regionToZip[x]['STATE']))
region_df["city"] = pd.Series(region_df["region"].apply(lambda x: regionToZip[x]['CITY']))
region_df["zipcode"] = pd.Series(region_df["region"].apply(lambda x: regionToZip[x]["ZIP"]))

In [218]:
region_df.drop(["Segment6", "Segment2", "trxDate"], axis=1, inplace=True)
region_df.head(2)

Unnamed: 0,Record Type::Number,refNum,glDate,Trx Number,paidToRcvd,trxDesc,trxAmount,debitAmt,creditAmt,glAcct,section,area,region,glAcctDesc,state,city,zipcode
9,1::8,Bank Payment,12/1/2018,505,GIOVANNI G,,30.0,30.0,0.0,5101,0A14,L,1408,Uniforms-Players-TAX PAID,FL,Wildwood,34785.0
10,1::9,Bank Payment,12/1/2018,506,VICTOR G,,30.0,30.0,0.0,5101,0A14,L,1408,Uniforms-Players-TAX PAID,FL,Wildwood,34785.0


Anything with a GL code less than 5000 is a Revenue account.
Anything with a GL code above 5000 is an Expense account

In [219]:
"""conditions = [region_df["glAcct"] < 5000,
             region_df["glAcct"] > 5000]
outputs = ["Expense", "Revenue"]

res = np.select(conditions, outputs)
region_df["Exp/Rev"] = pd.Series(res)
"""
region_df["Exp/Rev"] = np.where(region_df['glAcct']>=5000, "Expense", "Revenue")

In [220]:
region_df.head(1)

Unnamed: 0,Record Type::Number,refNum,glDate,Trx Number,paidToRcvd,trxDesc,trxAmount,debitAmt,creditAmt,glAcct,section,area,region,glAcctDesc,state,city,zipcode,Exp/Rev
9,1::8,Bank Payment,12/1/2018,505,GIOVANNI G,,30.0,30.0,0.0,5101,0A14,L,1408,Uniforms-Players-TAX PAID,FL,Wildwood,34785.0,Expense


In [221]:
region_df["taxableAmt"] = region_df["debitAmt"] + (region_df["creditAmt"] * -1)

In [222]:
region_df.head(3)

Unnamed: 0,Record Type::Number,refNum,glDate,Trx Number,paidToRcvd,trxDesc,trxAmount,debitAmt,creditAmt,glAcct,section,area,region,glAcctDesc,state,city,zipcode,Exp/Rev,taxableAmt
9,1::8,Bank Payment,12/1/2018,505,GIOVANNI G,,30.0,30.0,0.0,5101,0A14,L,1408,Uniforms-Players-TAX PAID,FL,Wildwood,34785.0,Expense,30.0
10,1::9,Bank Payment,12/1/2018,506,VICTOR G,,30.0,30.0,0.0,5101,0A14,L,1408,Uniforms-Players-TAX PAID,FL,Wildwood,34785.0,Expense,30.0
11,1::10,Bank Payment,12/1/2018,507,MARY P,,30.0,30.0,0.0,5101,0A14,L,1408,Uniforms-Players-TAX PAID,FL,Wildwood,34785.0,Expense,30.0


In [223]:
state_ = "AL"
period = ["start_date", "end_date"]

In [224]:
state_df = region_df.loc[region_df["state"]==state_]

In [225]:
state_settings = GLTaxSettingsByState[state_]
state_settings

{'4010': True,
 '4012': True,
 '4024': True,
 '4027': True,
 '5101': False,
 '5102': False,
 '5103': False,
 '5104': False,
 '5105': True,
 '5106': True,
 '5107': True,
 '5108': True,
 '5135': True}

In [226]:
taxable_gl = [int(x) for x,y in state_settings.items() if y]
taxable_gl

[4010, 4012, 4024, 4027, 5105, 5106, 5107, 5108, 5135]

In [227]:
# if glAcct is in Taxable_gl code list then mark as Taxable and if not then mark Non-Taxable
state_df["Taxable?"] = np.where(state_df['glAcct'].isin(taxable_gl), "Taxable", "Non-Taxable")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [228]:
# If Taxable then update taxableAmt to zero.
# UPDATE: tried doing so but too annoying to seperate out.
#state_df["taxableAmt"] = state_df["taxableAmt"] * np.where(state_df["Taxable?"]=="Taxable", 1, 0)

In [229]:
#state_df

In [230]:
parsed_state_df = state_df.sort_values(by=["Taxable?", "Trx Number"], ascending=False)

In [231]:
parsed_state_df.head(1)

Unnamed: 0,Record Type::Number,refNum,glDate,Trx Number,paidToRcvd,trxDesc,trxAmount,debitAmt,creditAmt,glAcct,section,area,region,glAcctDesc,state,city,zipcode,Exp/Rev,taxableAmt,Taxable?
1081,1::702,Bank Payment,12/20/2018,5254,John Price,,421.19,387.7,0.0,5135,5,C,894,Equipment-NO TAX PAID,AL,Huntsville,35806.0,Expense,387.7,Taxable


In [232]:
#parsed_state_df.set_index(["section", "area", "region", "Taxable?"])

In [233]:
parsed_state_df.loc[parsed_state_df["Taxable?"]=="Taxable"].groupby(by=["section", "area", "city", "region", "Taxable?"])["taxableAmt"].sum()

section  area  city        region  Taxable?
5        C     Huntsville  894     Taxable     1795.36
               Madison     498     Taxable     -210.50
         H     Wetumpka    1586    Taxable      554.04
Name: taxableAmt, dtype: float64

In [234]:
#state_df[state_df["taxableAmt"]==1402.79]

In [250]:
# sums for record number for only Taxable ones
trx_sums = parsed_state_df.loc[parsed_state_df["Taxable?"]=="Taxable"].groupby(by="Record Type::Number")["taxableAmt"].sum().sort_values()
trx_sums

Record Type::Number
2::271    -210.50
1::655    -125.80
1::222      22.55
1::700      39.56
1::679      56.90
1::701     237.04
1::702     387.70
1::226     531.49
1::703    1199.96
Name: taxableAmt, dtype: float64

In [251]:
# just group index by record number
parsed_state_df_byRecs = parsed_state_df.set_index("Record Type::Number")
parsed_state_df_byRecs.head(10)

Unnamed: 0_level_0,refNum,glDate,Trx Number,paidToRcvd,trxDesc,trxAmount,debitAmt,creditAmt,glAcct,section,area,region,glAcctDesc,state,city,zipcode,Exp/Rev,taxableAmt,Taxable?
Record Type::Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1::702,Bank Payment,12/20/2018,5254,John Price,,421.19,387.7,0.0,5135,5,C,894,Equipment-NO TAX PAID,AL,Huntsville,35806.0,Expense,387.7,Taxable
1::701,Bank Payment,12/14/2018,5241,American Soccer Company,,237.04,237.04,0.0,5105,5,C,894,Uniforms-Players-NO TAX PAID,AL,Huntsville,35806.0,Expense,237.04,Taxable
1::700,Bank Payment,12/11/2018,5240,John Price,,56.44,39.56,0.0,5135,5,C,894,Equipment-NO TAX PAID,AL,Huntsville,35806.0,Expense,39.56,Taxable
1::703,Bank Payment,12/11/2018,5239,John Price,,1199.96,1199.96,0.0,5135,5,C,894,Equipment-NO TAX PAID,AL,Huntsville,35806.0,Expense,1199.96,Taxable
1::679,Bank Payment,12/1/2018,5237,DAVID RUMNEY,,89.23,56.9,0.0,5107,5,C,894,Uniforms-Referee-NO TAX PAID,AL,Huntsville,35806.0,Expense,56.9,Taxable
1::655,Bank Payment,12/1/2018,5218,Jeremy Raper,// REVERSED:,-265.05,0.0,125.8,5107,5,C,894,Uniforms-Referee-NO TAX PAID,AL,Huntsville,35806.0,Expense,-125.8,Taxable
2::271,Bank Deposit,12/12/2018,251,AYSO 498,Concessions SalesMiscellaneous Gear Sales\tChe...,-429.5,0.0,210.5,4024,5,C,498,Concessions,AL,Madison,35758.0,Revenue,-210.5,Taxable
1::222,Bank Payment,12/1/2018,1686,"AMERICAN SOCCER COMPANY, INC.",,22.55,22.55,0.0,5106,5,H,1586,Uniforms-Coaches-NO TAX PAID,AL,Wetumpka,36092.0,Expense,22.55,Taxable
1::226,Bank Payment,12/21/2018,1612,"AMERICAN SOCCER COMPANY, INC.",,531.49,531.49,0.0,5108,5,H,1586,Uniforms-Other-NO TAX PAID,AL,Wetumpka,36092.0,Expense,531.49,Taxable
2::147,Bank Deposit,12/31/2018,99,INTEREST,,-0.24,0.0,0.24,9105,5,F,414,Interest Income,AL,CULLMAN,35056.0,Expense,-0.24,Non-Taxable


In [280]:
recordTaxableTotals = parsed_state_df_byRecs.groupby("Record Type::Number")["taxableAmt"].sum()

recordTaxableTotals[0:10]

Record Type::Number
1::116    1250.00
1::117     211.29
1::118     303.52
1::119     460.00
1::120     450.00
1::222     122.55
1::223     297.94
1::224    1489.48
1::225    1277.00
1::226    1145.33
Name: taxableAmt, dtype: float64

In [277]:
#recordTaxableTotals.align(trx_sums, join="outer")[0]

In [278]:
recordTaxableTotals.loc['1::702']

421.19

In [281]:
r_dict = dict()

r_values = list()
r_taxableSum = list()
r_index = list()
for item in parsed_state_df_byRecs.index:
    if r_dict.get(str(item)):
        r_values.append(False)
        r_index.append(item)
        r_taxableSum.append(0)
        #print([item, False])
        continue
    
    # if row has record number that doesn't exist in sum taxable record pivot abot
    r_values.append(True)
    r_index.append(item)
    r_taxableSum.append(recordTaxableTotals.loc[item])
    #print([item, True])
    r_dict[str(item)] = True

firstUniqueRecordPosition = pd.Series(r_values, index= r_index)
firstUniqueRecordPosition[0:10]

1::702    True
1::701    True
1::700    True
1::703    True
1::679    True
1::655    True
2::271    True
1::222    True
1::226    True
2::147    True
dtype: bool

In [282]:
recSumsByRecordPosition = pd.Series(r_taxableSum, index= r_index)
recSumsByRecordPosition

1::702     421.19
1::701     237.04
1::700      56.44
1::703    1199.96
1::679      89.23
1::655    -265.05
2::271    -429.50
1::222     122.55
1::226    1145.33
2::147      -0.24
1::946     370.00
1::945      15.97
1::68      390.00
1::68        0.00
1::68        0.00
1::67      123.70
1::66      348.81
1::66        0.00
1::63      180.00
1::63        0.00
1::63        0.00
1::63        0.00
1::63        0.00
1::63        0.00
22::26    1402.79
22::25    6081.00
2::83       -2.01
2::82       -1.79
22::24     226.86
2::61     -331.54
           ...   
2::271       0.00
2::271       0.00
2::270    -927.85
2::269   -3744.63
2::268   -3124.98
2::267   -1076.49
1::335     -25.00
2::266    -337.77
2::301     -15.38
2::302   -2352.24
1::318       5.00
1::353      41.91
1::352      23.93
1::351    2000.00
1::350      29.98
1::231     276.71
1::230    1677.50
1::229      25.85
1::228     280.75
1::227     227.58
1::226       0.00
1::225    1277.00
1::224    1489.48
1::223     297.94
1::222    

In [283]:
#pd.Series(r_values, index= r_index) & parsed_state_df.set_index("Record Type::Number")["taxableAmt"]

parsed_state_df_byRecs["netTax"] = recSumsByRecordPosition

In [287]:
parsed_state_df_byRecs.sort_index()

Unnamed: 0_level_0,refNum,glDate,Trx Number,paidToRcvd,trxDesc,trxAmount,debitAmt,creditAmt,glAcct,section,area,region,glAcctDesc,state,city,zipcode,Exp/Rev,taxableAmt,Taxable?,netTax
Record Type::Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1::116,Bank Payment,12/1/2018,260,AYSO AREA 5C,,1250.00,1250.00,0.00,5228,5,F,778,Tournament/National Games Entry Fees,AL,arab,35016.0,Expense,1250.00,Non-Taxable,1250.00
1::117,Bank Payment,12/19/2018,261,SCORE,,211.29,211.29,0.00,5101,5,F,778,Uniforms-Players-TAX PAID,AL,arab,35016.0,Expense,211.29,Non-Taxable,211.29
1::118,Bank Payment,12/19/2018,262,TIMOTHY HOLT,,303.52,303.52,0.00,5274,5,F,778,Awards & Volunteer Recognition,AL,arab,35016.0,Expense,303.52,Non-Taxable,303.52
1::119,Bank Payment,12/13/2018,264,FIRE GRILL 231,,460.00,460.00,0.00,5274,5,F,778,Awards & Volunteer Recognition,AL,arab,35016.0,Expense,460.00,Non-Taxable,460.00
1::120,Bank Payment,12/19/2018,266,TIMOTHY HOLT,,450.00,450.00,0.00,7431,5,F,778,Section/NAGM,AL,arab,35016.0,Expense,450.00,Non-Taxable,450.00
1::222,Bank Payment,12/1/2018,1692,Check(s) Not Enclosed,,100.00,100.00,0.00,5111,5,H,297,Field Expenses,AL,Montgomery,36117.0,Expense,100.00,Non-Taxable,0.00
1::222,Bank Payment,12/1/2018,1686,"AMERICAN SOCCER COMPANY, INC.",,22.55,22.55,0.00,5106,5,H,1586,Uniforms-Coaches-NO TAX PAID,AL,Wetumpka,36092.0,Expense,22.55,Taxable,122.55
1::223,Bank Payment,12/1/2018,1694,Check(s) Not Enclosed,,62.94,62.94,0.00,5111,5,H,297,Field Expenses,AL,Montgomery,36117.0,Expense,62.94,Non-Taxable,297.94
1::223,Bank Payment,12/1/2018,13,AYSO SECTION 5K,,235.00,235.00,0.00,5228,5,H,1586,Tournament/National Games Entry Fees,AL,Wetumpka,36092.0,Expense,235.00,Non-Taxable,0.00
1::224,Bank Payment,12/1/2018,14,AYSO SECTION 5K,,1175.00,1175.00,0.00,5228,5,H,1586,Tournament/National Games Entry Fees,AL,Wetumpka,36092.0,Expense,1175.00,Non-Taxable,0.00


In [55]:
excelSave({state_+"_test": parsed_state_df})

In [None]:
"""
ASK about: 1::702 taxable and non taxable

LOOK AT : 2::271

"""