In [1]:
import pandas as pd
import datetime
import numpy as np
import os

month_inv_by_base_file = 'PL_INV_BY_BASE-2022_11_01.xlsx'
month_txn_file = 'PL_INVENTORY_TRANSACTIONS_CY2022_OCT.xlsx'

inv_converter = {'Report Date': str,
                 'Unit' : str,
                 'Item': str,
                 'Qty On Hand': float,
                 'On Hand Value': float}

txn_converter = {'TXN - Transaction Type': str,
                 'TXN - Transaction Date': str,
                 'TXN - Item ID': str,
                 'TXN - Unit': str,
                 'TXN - Qty': float,
                 'TXN - Total Cost': float,
                 'TXN - Adjust Type': str}

#RC: add TXN-Sequence Nbr
transaction_col = ['TXN - Sequence Nbr', 'TXN - Transaction Type', 'TXN - Transaction Date', 'TXN - Unit',
                   'TXN - Item ID', 'TXN - Qty', 'TXN - Total Cost', 'TXN - Adjust Type']

out_types = ['051', '054,', '030', '031', '012']
positive_types = ['041', '022', '024', '050', '010', '020']
drop_types = ['053', '060']


# return all excel files in the directory as a list
def get_lof(directory):
    lof = []
    for file in os.listdir(directory):
        if file.endswith('.xlsx'):
            lof.append(directory + file)
    return lof


# handle the increase/decrease column
def handle_I_D(txn_df):
    txn_df['TXN - Qty'] = np.where(txn_df['TXN - Adjust Type'] == 'D', 0 - abs(txn_df['TXN - Qty']),
                                   txn_df['TXN - Qty'])
    txn_df['TXN - Qty'] = np.where(txn_df['TXN - Adjust Type'] == 'M', 0 - abs(txn_df['TXN - Qty']),
                                   txn_df['TXN - Qty'])
    return txn_df

In [17]:
inv_by_base = pd.read_excel(month_inv_by_base_file, converters=inv_converter)

inv_starting_vals = inv_by_base.groupby(['IOH - Item ID']).agg({'IOH - Qty On Hand': 'sum'}).reset_index()
inv_starting_vals.set_index('IOH - Item ID', inplace = True)

inv_starting_vals

Unnamed: 0_level_0,IOH - Qty On Hand
IOH - Item ID,Unnamed: 1_level_1
00118002,0.0
00118003,0.0
00118004,10.0
00118005,0.0
00118006,10.0
...,...
YAV70M10,0.0
YAV70M8,0.0
YAV95M6,0.0
Z53921,0.0


In [7]:
txns_raw = pd.read_excel(month_txn_file, converters=txn_converter, parse_dates=['TXN - Transaction Date'])

# filter dataframe
txn_df = txns_raw.loc[txns_raw['TXN - Transaction Type'].isin(['010', '020', '012', '022', '024', '030', '031', '041', '050', '051', '054', '053', '060'])]

# drop duplication
txn_df = txn_df.drop_duplicates()

#keep relevant rows
txn_df = txn_df[transaction_col]

#dataframe = read_txn_by_date(dataframe,d)
txn_df = handle_I_D(txn_df)
txn_df

Unnamed: 0,TXN - Sequence Nbr,TXN - Transaction Type,TXN - Transaction Date,TXN - Unit,TXN - Item ID,TXN - Qty,TXN - Total Cost,TXN - Adjust Type
0,173810600000002,030,2022-10-01,BS017,04581009,1.0,87.75,
1,173824300000006,030,2022-10-01,BS017,04290272,6.0,12.17,
2,173824300000002,030,2022-10-01,BS017,04281028,6.0,826.49,
3,173665200000001,030,2022-10-01,BS017,04581003,1.0,209.96,
4,173672200000003,030,2022-10-01,BS011,05282061,1.0,42.09,
...,...,...,...,...,...,...,...,...
54977,185945300000012,030,2022-10-31,BS018,05282061,1.0,42.09,
54978,185945300000013,030,2022-10-31,BS018,05282061,1.0,42.09,
54979,185945300000007,030,2022-10-31,BS018,04233026,1.0,6.65,
54980,186016600000029,030,2022-10-31,BS019,04295001,1.0,7.76,


In [8]:
# calculate outs
df_outgoing = txn_df.loc[txn_df['TXN - Transaction Type'].isin(out_types)]
# group by date and item ID fields
df_outgoing = df_outgoing.groupby(['TXN - Item ID', 'TXN - Transaction Date']).agg(
                        {'TXN - Qty': 'sum' }).reset_index()

# calculate ins
df_incoming = txn_df.loc[txn_df['TXN - Transaction Type'].isin(positive_types)]
# group by date and item ID fields
df_incoming = df_incoming.groupby(['TXN - Item ID', 'TXN - Transaction Date']).agg(
                        {'TXN - Qty': 'sum' }).reset_index()

In [18]:
inv_starting_vals.loc['02593057']['IOH - Qty On Hand']

7.0

In [10]:
# combine ingoing and outgoing dfs by item and date
df_txn_merged = pd.merge(df_outgoing, df_incoming, on=['TXN - Item ID', 'TXN - Transaction Date'], how='outer')
df_txn_merged.update(df_txn_merged[['TXN - Qty_x', 'TXN - Qty_y']].fillna(0))

rename_dict = {'TXN - Qty_x':'TXN - Outgoing Qty', 'TXN - Qty_y':'TXN - Incoming Qty'}

# call rename () method
df_txn_merged.rename(columns= rename_dict, inplace=True)

df_byitem = df_txn_merged.sort_values(by = ['TXN - Item ID', 'TXN - Transaction Date'], ascending = [True, True]).reset_index(drop=True)

In [19]:
original_len = len(df_byitem)

for i in range(0, original_len):
    cur_item = df_byitem.loc[i, 'TXN - Item ID']
    if i > 0 and cur_item == df_byitem.loc[i-1, 'TXN - Item ID']:
        df_byitem.loc[i, 'TXN - Overall Qty'] = df_byitem.loc[i-1, 'TXN - Overall Qty'] + df_byitem.loc[i, 'TXN - Incoming Qty'] - df_byitem.loc[i, 'TXN - Outgoing Qty']
    else:
        df_byitem.loc[i, 'TXN - Overall Qty'] = inv_starting_vals.loc[cur_item]['IOH - Qty On Hand'] + df_byitem.loc[i, 'TXN - Incoming Qty'] - df_byitem.loc[i, 'TXN - Outgoing Qty']
    if i % 100000 == 0:
        print(original_len - i)

34826


In [20]:
df_byitem

Unnamed: 0,TXN - Item ID,TXN - Transaction Date,TXN - Outgoing Qty,TXN - Incoming Qty,TXN - Overall Qty
0,00118006,2022-10-21,0.0,0.0,10.0
1,00118007,2022-10-12,0.0,0.0,3.0
2,00170005,2022-10-26,0.0,0.0,5.0
3,00170005,2022-10-28,0.0,0.0,5.0
4,00170009,2022-10-03,0.0,0.0,88.0
...,...,...,...,...,...
34821,513887,2022-10-31,2.0,0.0,8.0
34822,602692,2022-10-20,2.0,0.0,29.0
34823,617551,2022-10-06,1.0,0.0,13.0
34824,643100,2022-10-31,1.0,0.0,7.0


In [None]:
df_byitem.to_csv('output/append.csv', index = None)