### A script for generating data for input to Material Drive Trending

***
#### Process steps
***
<ol>
    <li>Search for current file</li>
    <li>Load WSP CSV data</li>
    <li>GROUP BY subset of columns to compress data</li>
    <li>Perform data wrangling</li>
    <li>Map Product info, lob and make/buy</li>
    <li>Check no nulls on make/buy</li>
            * if no nulls:: Write to file <br>
            * if nulls___:: Print table of nulls to screen <br>
</ol>

***



In [1]:
# import modules
import glob
import os
import datetime
import re

import pandas as pd

In [2]:
"""
Functions and Classes


Functions:
    search_wsps:                --> get list of wsp candidate filenames
    get_wsp_filenames:          --> generate list of wsp filenames
    create_dataframes:          --> generate list of wsp dataframes
    generate_uid:               --> generates a uid based on source, class and unit
    format_wsp_for_grouping:    --> format dataframe for grouping (remove nulls)    
    group_wsp:                  --> groups the wsp by subset of columns
    write_final_data_to_csv:    --> outputs final dataframe to file
    map_products_and_makebuy:   --> maps products fields, make/buy and LOB


Classes:
    WSPFields:                  --> key fields information for grouping, aggregating and formatting

"""
# functions
def search_wsps(search_criteria: str = None) -> list:
    """ get list of wsp candidate filenames """
    path = r'.\wsp_csv'
    if search_criteria:
        files = glob.glob(os.path.join(path, search_criteria))
        files
    return files


def get_wsp_filenames(wsp_files: list = None) -> list:
    """ generate list of wsp filenames """
    pattern = re.compile('Wk\d+')
    wsp_files_to_keep = []
    if wsp_files:
        print("Getting filenames...")
        for i, wsp in enumerate(wsp_files):
            wsp_found = re.search(pattern, wsp)
            if wsp_found:
                wsp_files_to_keep.append(wsp)
                file_name = wsp.split('\\')[-1]
                print(i, wsp_found.group(0), ": ", file_name)
                print(f"{file_name} added.")
    return wsp_files_to_keep

def create_dataframes(wsp_files: list = None) -> list:
    """ generate list of wsp dataframes """
    pattern = re.compile('Wk\d+')
    wsp_dataframes = []
    if wsp_files:
        print("Compiling dataframes...")
        for wsp in wsp_files:
            wsp_data = pd.read_csv(wsp, thousands=',', low_memory=False)
            wsp_data.columns = [c.lower().strip() for c in wsp_data.columns]
            wsp_data['fileref'] = re.search(pattern, wsp).group(0) + os.path.splitext(wsp)[0][-2:]
            wsp_dataframes.append(wsp_data)
            print(f"{wsp} added.")
        return wsp_dataframes

def generate_uid(wsp: pd.DataFrame = None) -> pd.DataFrame:
    """ generates a uid based on source, class and unit """
    if not isinstance(wsp, type(None)):
        print("Generating uid...")
        cols = ['source', 'class', 'unit']
        wsp[cols] = wsp[cols].astype(str)
        wsp['uid'] = wsp[cols].apply(lambda x: x.str.strip()).apply(lambda x: '-'.join(x), axis=1)
    return wsp

class WSPFields:
    """ key fields information for grouping, aggregating and formatting """
    agg_key = {
        'quantity': 'sum',
        'total rev': 'sum',
        'total mcc': 'sum',
        'std cost': 'sum'
    }

    group_key = [
        'fileref',
        'region',
        'source',
        'order type',
        'order category',
        'order category2',
        'order category3',
        'order category4',
#         'Master Customer Name',
        'offer pf',
        'prod grp',
        'class',
        'item type',
        'unit',
        'ssd_qtr',
        'ssd mth',
        'prq (final)',
        'excl',
        'excl2',
        'lob',
        'lob_'
    ]

    table_key = [
        'fileref',
        'region',
        'source',
        'order type',
        'order category',
        'order category2',
        'order category3',
        'order category4',
#         'Master Customer Name',
        'offer pf',
        'prod grp',
        'class',
        'item type',
        'unit',
        'ssd_qtr',
        'ssd mth',
        'prq (final)',
        'excl',
        'excl2',
        'lob',
        'lob_',
        'quantity',
        'total rev',
        'total mcc',
        'std cost'
    ]

    table_key_final = [
        'fileref',
        'region',
        'source',
        'order type',
        'order category',
        'order category2',
        'order category3',
        'order category4',
#         'Master Customer Name',
        'offer pf',
        'prod grp',
        'products qty',
        'products cost',
        'class',
        'item type',
        'unit',
        'ssd_qtr',
        'ssd mth',
        'prq (final)',
        'excl',
        'excl2',
        'lob',
        'lob_',
        'quantity',
        'total rev',
        'total mcc',
        'std cost',
        'make/buy'
    ]

def format_wsp_for_grouping(wsp: pd.DataFrame = None, wsp_fields: object = None) -> pd.DataFrame:
    """ format dataframe for grouping (remove nulls) """
    group_key = wsp_fields.group_key
    value_key = ['quantity', 'total rev', 'total mcc','std cost']
    if not isinstance(wsp, type(None)):
        print("Munging WSP for grouping...")
        wsp[group_key] = wsp[group_key].apply(lambda x: x.fillna('na'))
        wsp[value_key] = wsp[value_key].apply(lambda x: pd.to_numeric(x, errors='coerce').fillna(0))
    return wsp

def group_wsp(wsp: pd.DataFrame = None, wsp_fields: object = None) -> pd.DataFrame:
    """ group wsp """
    if not isinstance(wsp, type(None)):
        print("Grouping WSP...")
        group_key = wsp_fields.group_key
        table_key = wsp_fields.table_key
        agg_key = wsp_fields.agg_key
        wsp_grouped = wsp[table_key].groupby(group_key).agg(agg_key).reset_index()
    return wsp_grouped

def write_final_data_to_csv(wsp: pd.DataFrame = None, qtr_start:str=None, curr_wk:bool=False) -> None:
    """ outputs final dataframe to file """
    if not isinstance(wsp, type(None)):
        # ensure there are no leading/trailing spaces on the below columns
        cols = ['excl', 'excl2', 'lob']
        wsp[cols] = wsp[cols].apply(lambda x: x.str.strip())
        # set all values in excl, excl2 to 'show'
        wsp.loc[:,'excl':'excl2'] = 'Show'

        # add new columns for Qtr Start, Duplicate Make/Buy and WkPeriod
        wsp.loc[:,'make-buy'] = wsp['make/buy']
        wsp.loc[:,'QtrPeriod'] = '' if not qtr_start else qtr_start
        wsp.loc[:,'WkPeriod'] = 'CurrWk' if curr_wk else ''

        # set file date
        file_date = datetime.datetime.now().strftime('%Y%m%d')
        filename = f'material_drive_trending_weekly_data_{file_date}.csv'
        print(f"Writing WSP data to --> {filename}...")
        # write to csv
        wsp.to_csv(f'.\\mdt\\{filename}', index=False)

def map_products_and_makebuy(wsp: pd.DataFrame = None, wsp_fields: object = None) -> pd.DataFrame:
    """ maps products fields, make/buy and LOB """
    if not isinstance(wsp, type(None)):
        # load pmap table
        print(f"Merging data with pmap...")
        product_mapping_table = pd.read_csv('./product_range_map_for_material_trending.csv', thousands=',', low_memory=False)
        # tidy up wsp data
        wsp['uid'] = wsp['uid'].str.lower()
        wsp.drop(columns='lob', inplace=True)
        # tidy up pmap data
        product_mapping_table['uid'] = product_mapping_table['uid'].str.lower()
        pmap= product_mapping_table[['uid', 'products qty', 'products cost', 'lob', 'make/buy']]
        # merge data
        print(pmap.iloc[:10,:])
        wsp_map = pd.merge(wsp, pmap, how='left')
        return wsp_map[wsp_fields.table_key_final]

In [3]:
# specify search string
wsp_search_string = "2025*Wk51*.csv"

In [4]:
"""
Main script runs functions and generates the final data
for material drive trending file

Steps
    1. search for wsp files
    2. concatentate wsp files into a single dataframe
    3. format wsps and group by subset of columns
    4. generate a uid and merge with product maps table
    5. test for nulls from step.4 
        5.1 print to screen if nulls, else
        5.2 write final output to file

"""

# load wsp and perform initial munging
wsps = create_dataframes(get_wsp_filenames(search_wsps(wsp_search_string)))
wsps

wsp_data = pd.concat(wsps)
wsp_fields = WSPFields()

wsp_data = format_wsp_for_grouping(wsp_data, wsp_fields)

wsp_data_g = group_wsp(wsp_data, wsp_fields)
# print(wsp_data_g)
# input('Break....')
wsp_data_g = generate_uid(wsp_data_g)
wsp_data_g_map = map_products_and_makebuy(wsp_data_g, wsp_fields)

# finally check if there are any null entries from mapping...
ignore_missing_rows = True # set this to true if the missing rows can be ignored

# specify filt and col for checking missing rows
filt = wsp_data_g_map['make/buy'].isna()
cols = ['source', 'class', 'unit']
if all([wsp_data_g_map['make/buy'].isna().sum() > 0, not ignore_missing_rows]):
    print("Values missing make/buy...")
    print(wsp_data_g_map[filt][cols].drop_duplicates())
    wsp_data_g_map[filt][cols].drop_duplicates().to_csv('./uid_missing_mapping.csv')
    print("Done - check the nulls, update the pmap file and try again!")
else:
    write_final_data_to_csv(wsp_data_g_map)
    print("Values missing make/buy...")
    print(wsp_data_g_map[filt][cols].drop_duplicates())
    print("Done.")

Getting filenames...
0 Wk51 :  2025_12_16 Wk51 NCR Atleos Weekly Supply Plan.csv
2025_12_16 Wk51 NCR Atleos Weekly Supply Plan.csv added.
Compiling dataframes...
.\wsp_csv\2025_12_16 Wk51 NCR Atleos Weekly Supply Plan.csv added.
Munging WSP for grouping...
Grouping WSP...
Generating uid...
Merging data with pmap...
                 uid products qty products cost       lob make/buy
0      bri-1924-unit        Other      Commerce  Commerce     make
1      bri-1612-unit        Other      Commerce  Commerce     make
2      enn-7607-unit          POS           POS  Commerce     make
3      enn-7772-unit          POS           POS  Commerce     make
4      enn-7702-unit          POS           POS  Commerce     make
5      enn-7879-unit     Scanners      Scanners  Commerce     make
6      bri-7736-unit          POS           POS  Commerce      buy
7  bri-7772-non-unit          POS           POS  Commerce     make
8  bri-5915-non-unit        Other      Commerce  Commerce      buy
9      bri-59

#### PRQ Calculation

<p>Need to create functions for the below and tidy up for production</p>

In [5]:
"""
Generate the PRQ proport splits for each SSD_Qtr

Steps
    1. calculate the proport splits for prqs in ssd_qtr
    2. if there are null proports fill them with 1
    3. perform the join onto the fcst kin with the fcst kin with prq splits
    4. weight the value columns by the prq splits
    5. perform check to before and after values
    6. update the prq final column with the prq final values from the splits
    7. replace the original fcst kin values with the weighted fcst kin values

"""

ssd_prqs_col = ['fileref', 'ssd_qtr', 'prod grp', 'prq (final)', 'lob_']
ssd_only_col = ['fileref', 'ssd_qtr', 'prod grp']

#fcst_dem = wsp_data_g_map[wsp_data_g_map['order category4'] == 'Fcst-DEM'] #jose commented

#left = pd.pivot_table(fcst_dem, index=ssd_prqs_col,values='total rev', aggfunc='sum', fill_value=0).reset_index()
#right = pd.pivot_table(fcst_dem, index=ssd_only_col,values='total rev', aggfunc='sum', fill_value=0).reset_index()

#prq_table = left.merge(right, how='left', on=['fileref', 'ssd_qtr', 'prod grp'], validate='m:1')
#prq_table['proport'] = prq_table['total rev_x'] / prq_table['total rev_y']
#print("==" * 30)
#print("Check if any proports fall outside acceptable limits")
#print("--" * 30)
#proport_check = prq_table[(prq_table['proport'] < 0) | (prq_table['proport'] > 1)]
#if proport_check.empty:
    #print("No proports outside acceptable limits")
    #print("Test Pass")
    #print("--" * 30)
#else:
    #print("Test Fail")
    #print("--" * 30)
    #print(proport_check)

In [6]:
# filter on forecast kinaxis
fcst_kin = wsp_data_g_map[wsp_data_g_map['order category4'] == 'Fcst-KIN']

# generate forecast splits and fill blanks
#fcst_kin_split = fcst_kin.merge(prq_table, how='left', on=['fileref', 'ssd_qtr', 'prod grp'])
fcst_kin_split = fcst_kin.merge(how='left', on=['fileref', 'ssd_qtr', 'prod grp'], validate='m:1')
fcst_kin_split['proport'] = fcst_kin_split['proport'].fillna(1)

# value_columns
measures = ['quantity', 'total rev', 'total mcc', 'std cost']

# generate a test between the original unadjusted values and the adjusted values
check_original_vals__ = fcst_kin[measures].apply(lambda x: x / 1_000_000).sum().round(2)
check_post_split_vals = fcst_kin_split[measures].multiply(fcst_kin_split['proport'], axis='index').apply(lambda x: x / 1_000_000).sum().round(2)

# check results
print('If all values are true then pass else fail')
if all((check_original_vals__ == check_post_split_vals).to_list()):
    print('Test pass')
    generate_prq_splits = True
    fcst_kin_split[measures] = fcst_kin_split[measures].multiply(fcst_kin_split['proport'], axis='index')
else:
    generate_prq_splits = False
    print('Test fail')
print('--'*30)

print("Original Vals:", tuple(check_original_vals__))
print("Post-PRQ Vals:", tuple(check_post_split_vals))

TypeError: DataFrame.merge() missing 1 required positional argument: 'right'

In [None]:
# if the prq split calculation results in an accurate adjustment then 
# create the final output with the prq splits
if generate_prq_splits:
    # generate a dictionary of allowable prq values for ssd qtr
    ssd_qtr = {}
    for i in range(2018, 2030):
        for j in range(1, 5):
            prq_val = []
            ssd_key = str(i) + "Q" + str(j)
            if j == 4:
                prq_val.append(str(i) + "Q" + str(j))
                prq_val.append(str(i+1) + "Q1")
            else:
                prq_val.append(str(i) + "Q" + str(j))
                prq_val.append(str(i) + "Q" + str(j+1))                
            ssd_qtr[ssd_key] = prq_val

    # filter wsp to remove fcst kin and then combine fcst kin with splits
    wsp_wo_fcst_kin = wsp_data_g_map[wsp_data_g_map['order category4'] != 'Fcst-KIN']

    # get the columns from the wsp view and rename the prq final column
    standard_columns = wsp_wo_fcst_kin.columns
#     print(standard_columns)
#     print(fcst_kin_split.columns)
    fcst_kin_split.rename(columns={'prq (final)_y': 'prq (final)', 'lob__y': 'lob_'}, inplace=True)

    # generate final output with prq splits
    final_output = pd.concat([wsp_wo_fcst_kin, fcst_kin_split[standard_columns]])

    # generate the list of prq values as either prq eg 2021Q1, 2021Q2, ... or Future
    prq_final2 = []
    for ssd, prq in final_output[['ssd_qtr', 'prq (final)']].itertuples(index=False, name=None):
        try:
            if prq in ssd_qtr.get(ssd):
                prq_final2.append(prq)
            else:
                prq_final2.append('Future')
        except TypeError:
            prq_final2.append('Future')
    final_output['prq_final'] = prq_final2

    # write final output
    write_final_data_to_csv(final_output)
else:
    print('Writing WSP data to --> Test fail: Output will not be written')    

In [None]:
wsps[0][['order category3']].drop_duplicates()

In [None]:
filts = (wsps[0]['order category3'].isin(['Completions', 'Backlog', 'PBO', 'Planned Order'])) & (wsps[0]['ssd_qtr'].isin(['2021Q2', '2021Q3']) & (wsps[0]['lob_'].isna()))
wsps[0][['order category3', 'item','master customer name', 'lob_']][filts].drop_duplicates().to_csv('./missing_lob_item.csv')

In [None]:
for c in wsps[0].columns:
    print(c)