In [None]:
import pandas as pd
import os

# Define input and output paths
fda_member = "Lee Zheng"
portfolio_mapping_file = r"G:\Corporate Logistics & Services\General\Forecast and Data Analytics Team\4. Dp4a\SAP Development\CG Porfolio Mapping.xlsx"
comp_lookup_path = r"G:\Corporate Logistics & Services\General\Forecast and Data Analytics Team\Steady-state Demands\Master Dashboard v2\Dashboard Extracts\comp_mapping.csv"
product_info_file = r"G:\Corporate Logistics & Services\General\Forecast and Data Analytics Team\Steady-state Demands\Master Dashboard v2\Dashboard Extracts\prod_info.csv"
portfolio_output = r"C:\Users\lzheng\OneDrive - ALDI-HOFER\Desktop\portfolio_lee.csv"

# Read component
comp_df = pd.read_csv(comp_lookup_path, usecols=["DISPLAY", "COMPONENT"]).rename(columns={"DISPLAY": "ARTICLE"})

# Find Lee's products through mapping merch node to product info for article
mapping_df = pd.read_excel(portfolio_mapping_file, sheet_name="Details", usecols=["FDA", "MERCH_CAT_NODE"])
mapping_filtered = mapping_df[mapping_df["FDA"] == fda_member]
merch_list = mapping_filtered["MERCH_CAT_NODE"].unique().tolist()

# Read product file and take useful columns
product_df = pd.read_csv(
    product_info_file, 
    usecols=[
        "DISPLAY", "DESCRIPTION", "CLASS", "SELLINGCLASS", "PRODUCTCLASS", 
        "COMM_GROUP_DESC", "CAT_DESC", "CG_DESC", "SCG_DESC", "MERCH_CAT_NODE"
    ]
).rename(columns={"DISPLAY": "ARTICLE"})

# Strip leading zeros from MERCH_CAT_NODE
product_df['MERCH_CAT_NODE'] = product_df['MERCH_CAT_NODE'].astype(str).str.strip().str.lstrip('0')

# Ensure merch_list values are strings and strip any spaces
merch_list = [str(x).strip().lstrip('0') for x in merch_list]

# Filter product_df to include only rows with MERCH_CAT_NODE in merch_list
condition_article = product_df["MERCH_CAT_NODE"].isin(merch_list)
product_df_filtered = product_df[condition_article]

# Combine tables for portfolio file
portfolio_df = product_df_filtered.merge(comp_df, on="ARTICLE", how="inner")

# Sort the DataFrame by 'FORECAST' and 'FC_PRICE' in descending order
portfolio_df_sorted = portfolio_df.sort_values(by=['FORECAST', 'FC_PRICE'], ascending=[False, False])

# Save the resulting DataFrame to a CSV file
portfolio_df_sorted.to_csv(portfolio_output, index=False)

# Print the first few rows of the resulting DataFrame
print(portfolio_df_sorted.head())


In [None]:
# aggregation
promo_df = promo_df.groupby(['FDA', 'DISPLAY', 'REGION', 'DESCRIPTION', 'SELLINGCLASS', 'CAT_DESC', 'CG_DESC', 'DISPLAY_STATUS_STRING']).agg({
    'DIF_COUNT_STORE': 'sum',
    'SMAPE': 'mean',
    'APE %': 'mean',
    'FLAG_COUNT': 'mean',
    'BIAS %': 'mean',
    'UNDER_BIAS': 'sum',
    'OVER_BIAS': 'sum',
    'DATE': ['min', 'max'],  # Get the earliest and latest dates
    # If there are other columns not included in the groupby, you can keep the first value, for example:
}).reset_index()


In [None]:
#-------------------------------Break CSV-----------------------------
import pandas as pd

# Read the Excel file
file_path = r"C:\Users\lzheng\OneDrive - ALDI-HOFER\Desktop\ABS adjustments\16-09\Catchweight_10.09.xlsx"
df = pd.read_excel(file_path)  # Use pd.read_excel for Excel files

# Determine the number of rows per split file
num_rows = len(df)
num_files = 20
rows_per_file = num_rows // num_files + (num_rows % num_files > 0)

# Loop through and create smaller files
for i in range(num_files):
    start_row = i * rows_per_file
    end_row = (i + 1) * rows_per_file
    df_chunk = df.iloc[start_row:end_row]
    
    # Save each chunk to a new CSV file with headers
    output_file = rf'C:\Users\lzheng\OneDrive - ALDI-HOFER\Desktop\ABS adjustments\16-09\Catchweight_NewConfig_{i+1}.csv'
    df_chunk.to_csv(output_file, index=False, header=True)

print(f"CSV file split into {num_files} smaller files successfully, each with headers.")


In [None]:
#---------------------------------------------Store Order and Merge----------------------------------
import pyodbc
import pandas as pd
import os
import sys

# Add your custom function path
sys.path.insert(0, r"C:\Git\fda\python_functions")
import extract_functions as extract  # type: ignore

# Establish the database connection
conn = extract.connect_to_NDS()

# Define the output file path
order_file = r"C:\Users\lzheng\OneDrive - ALDI-HOFER\Desktop\Problem Statement-Markdowns\Store Order.csv"

# Assuming you have a DataFrame `portfolio_df` that contains the list of articles
# Generate the article list and format it for SQL
article_list = portfolio_df['ARTICLE'].unique().tolist()
article_list_str = ', '.join([f"'{article}'" for article in article_list])

# SQL Query with embedded article list
sql_query = f"""
WITH
--get purchase document information this will provide proposed POs and their associated dates and stores
--PK is PDOC_NO
pdoc AS 
    (SELECT DISTINCT
    ekko.EBELN AS PDOC_NO,
    CAST(eket.EINDT AS DATE) AS DELIVERY_DATE ,
    EKPO.EBELP AS ITEM_NO,
    ltrim(EKPO.MATNR,0) AS DISPLAY,
    EKPO.WERKS AS SITE,
    ekko.LIFNR AS Region,
    EKPO.KTMNG AS TARGET_QTY,
    EKPO.MENGE AS PO_QTY,
    EKPO.MEINS AS PO_UOM
    --header
    FROM CONSUMPTION_AU.AU_E41_EKKO_CC_V ekko
    --detail
    INNER JOIN CONSUMPTION_AU.AU_e41_ekpo_CC_V ekpo ON ekko.EBELN=ekpo.EBELN
    --schedule agreement
    INNER JOIN CONSUMPTION_AU.AU_e41_eket_CC_V eket ON ekko.EBELN=eket.EBELN AND eket.ebelp=ekpo.ebelp
    
    ORDER BY  CAST(eket.EINDT AS DATE) desc)
,
--get actual delivered data at store level
del_data AS 
    (SELECT 
    lips.VGBEL AS DOC_NO_REF_DOC,
    LIPS.VBELN AS DELIVERY,
    LIPS.POSNR AS DEL_ITEM,
    CAST(LIKP.LFDAT AS DATE) AS DELIVERY_DATE,
    ltrim(LIPS.MATNR,0) AS MATNR,
    LIPS.MATKL AS MERCH_CAT,
    LIPS.WERKS AS SITE,
    LIPS.ERNAM AS USER_CREATED,
    CAST(LIPS.ERDAT AS DATE) AS RECORD_CREATION_DATE,
    LIPS.LFIMG AS ACTUAL_DELIVERED
    
    FROM CONSUMPTION_AU.AU_e41_likp_CC_V LIKP
    INNER JOIN CONSUMPTION_AU.AU_e41_LIPS_CC_V LIPS ON LIKP.VBELN=LIPS.VBELN
    WHERE 1=1
    AND LIPS.WERKS NOT LIKE 'GD%%'
    ORDER BY  CAST(LIKP.LFDAT  AS DATE) desc)

SELECT DISTINCT
pdoc.delivery_date AS DELIVERY_DATE,
pdoc.matnr AS DISPLAY,
pdoc.site AS SITE,
pdoc.region AS REGION,
sum(pdoc.PO_QTY) AS ORDERED,
sum(del_data.actual_delivered) AS RECEIVED

FROM pdoc 
FULL OUTER JOIN del_data 
    ON pdoc.PDOC_NO=del_data.DOC_NO_REF_DOC AND del_data.matnr=pdoc.matnr
WHERE 1=1
AND pdoc.matnr IN ({article_list_str})
AND pdoc.region = 'GD07'
AND pdoc.delivery_date BETWEEN '2024-07-22' AND '2024-08-18'
GROUP BY pdoc.delivery_date,
pdoc.matnr,
pdoc.site,
pdoc.region
"""

# Execute the query and fetch the data into a DataFrame
order_df = pd.read_sql_query(sql_query, conn)

# Retrieve 'SALES_UNIT_FACTOR' from the forecast dataframe
fc_subset = fc_df[['DISPLAY', 'SALES_UNIT_FACTOR']]
merged_df = order_df.merge(fc_subset, on='DISPLAY', how='left')

# Convert cases to units
merged_df['POQTY'] = merged_df['ORDERED'] * merged_df['SALES_UNIT_FACTOR']
merged_df['RECD'] = merged_df['RECEIVED'] * merged_df['SALES_UNIT_FACTOR']

# Select only the necessary columns
order_df = merged_df[['DELIVERY_DATE', 'DISPLAY', 'SITE', 'REGION', 'POQTY', 'RECD']]

# Save the result to a CSV file
order_df.to_csv(order_file, index=False)

# Close the database connection
conn.close()


In [None]:
#------------------------------------Forecast---------------------------------------------------
import pyodbc
import pandas as pd
import os
import sys

# Define output path
fc_file = r"C:\Users\lzheng\OneDrive - ALDI-HOFER\Desktop\Problem Statement-Markdowns\Forecast.csv"

# Convert article_list to a string format for SQL condition
article_list = portfolio_df['ARTICLE'].unique().tolist()
article_list_str = ', '.join([f"'{article}'" for article in article_list])

# Add your custom function path
sys.path.insert(0, r"C:\Git\fda\python_functions")
import extract_functions as extract  # type: ignore

# Establish the database connection
conn = extract.connect_to_NDS()

# Execute the SQL query with the article list embedded
sql_query = f"""
SELECT DISTINCT
VKBUR_WRK AS REGION,
fcst.werks AS STORE,
Ltrim(fcst.matnr,0) AS DISPLAY,
aem.maktx AS DESCRIPTION,
SALES_UNIT_FACTOR,
to_varchar(fcst.fcstdate,'dd/mm/yyyy') AS FC_Date,
sum(fcst.quantity) AS FC,
sum(fcst.quantity*SALES_UNIT_FACTOR) AS UNIT_FC

FROM
    CONSUMPTION_AU.AU_E41_ZD2FTS_SFCST AS fcst
INNER JOIN
    CONSUMPTION_AU.AU_E41_MAKT AS aem
    ON fcst.MATNR = aem.MATNR 
INNER JOIN
    CONSUMPTION_AU.AU_E41_WRF1 AS wrf
    ON fcst.mandt = wrf.mandt
    AND fcst.werks = wrf.locnr
INNER JOIN 
    CONSUMPTION_AU.AU_E41_MARA AS mara
    ON fcst.mandt = mara.mandt
    AND fcst.matnr = mara.matnr
INNER JOIN
    CONSUMPTION_AU.AU_E41_MARC AS marc
    ON fcst.mandt = marc.mandt
    AND fcst.matnr = marc.matnr 
    AND fcst.werks = marc.werks
INNER JOIN
    CONSUMPTION_AU.AU_E41_ZD2FBOMSUNIT AS bom
    ON fcst.mandt = bom.mandt
    AND fcst.matnr = bom.matnr

WHERE 1=1
AND VKBUR_WRK in ('GR07')--,'GR06')
--AND fcst.werks = 'G175'
AND fcst.MANDT='100'
AND fcst.source = 'FC_MEAN'
AND fcst.fcstdate BETWEEN '20240722000000' AND '20240818000000'
--AND fcst.fcstdate BETWEEN add_days(current_date,-30) AND add_days(current_date,0) 
AND ltrim(fcst.matnr,0) in ({article_list_str})
AND fcst.source = 'FC_MEAN'
AND LEFT(marc.dismm,1) IN ('M','S')
AND aem.SPRAS = 'E'

GROUP BY
VKBUR_WRK,
fcst.werks,
Ltrim(fcst.matnr,0),
aem.maktx,
SALES_UNIT_FACTOR,
to_varchar(fcst.fcstdate,'dd/mm/yyyy')
"""

# Execute the query
fc_df = pd.read_sql_query(sql_query, conn)

# Save the result to a CSV file
fc_df.to_csv(fc_file, index=False)

# Close the database connection
conn.close()


In [None]:
#-------------------------------------------Promo-------------------------
# # Group by with aggregation, including min and max dates
promo_df = promo_df.groupby(['FDA', 'DISPLAY', 'REGION', 'DESCRIPTION', 'SELLINGCLASS', 'CAT_DESC', 'CG_DESC']).agg({
    'DIF_COUNT_STORE':'sum',
    'SMAPE':'mean',
    'APE %':'mean',
    'FLAG_COUNT':'mean',
    'BIAS %': 'mean',
    'UNDER_BIAS': 'sum',
    'OVER_BIAS': 'sum',
    'DATE': ['min', 'max']  # Get the earliest and latest dates
}).reset_index()

# Print out columns to verify the presence of 'CAT_DESC'
print(promo_df.columns)

# Rename the multi-level columns resulting from the aggregation
promo_df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in promo_df.columns]

# Print out columns again to verify the renaming process
print(promo_df.columns)

# Ensure 'CAT_DESC' exists and is correctly named after renaming
category_condition = promo_df['CAT_DESC'] == 'Fruits & Vegetables'

# Define the date range specific to "Fruits & Vegetables"
end_date_produce = pd.to_datetime(datetime.today().date()) - timedelta(days=6) 
start_date_produce = pd.to_datetime(datetime.today().date()) - timedelta(days=12)

# Apply the filter based on both date range and 'CAT_DESC'
date_condition_produce = (promo_df['DATE_min'] <= end_date_produce) & (promo_df['DATE_max'] >= start_date_produce)

# Apply the date filter only for 'Fruits & Vegetables'
promo_df['Filtered'] = np.where(category_condition, date_condition_produce, True)

# Filter the DataFrame based on the 'Filtered' column
promo_df = promo_df[promo_df['Filtered']].drop(columns=['Filtered'])


In [None]:
#-------------------------------------------------------DIF-----------------------------------------------------------
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Load DIF data
dif_path = r"C:\Users\lzheng\OneDrive - ALDI-HOFER\Documents\DIF.csv"
dif = pd.read_csv(dif_path, on_bad_lines='skip', engine='python')

# Convert the 'DATE' column to datetime format
dif['DATE'] = pd.to_datetime(dif['DATE'], errors='coerce', format='%Y-%m-%d')  

# Define date range for the DIF data
end_date = pd.to_datetime(datetime.today().date())
start_date = end_date - timedelta(weeks=2)

# Filter DIF data based on the date range
date_condition = (dif['DATE'] <= end_date) & (dif['DATE'] >= start_date)
dif = dif[date_condition]

# Drop specific columns
dif = dif.drop(columns=['USER_DIF_TAG', 'Unnamed: 0'])

# Aggregate DIF data by DISPLAY and REGION
grouped_dif = dif.groupby(['DISPLAY', 'REGION', 'DATE'], as_index=False).agg({'DIF_COUNT_STORE': 'sum'})

# Merge DIF data with the main file
promo_df = rank_all_region.merge(grouped_dif, on=["DISPLAY", "REGION"], how="inner")

# Filter out rows with zero or NaN DIF_COUNT_STORE values
promo_df = promo_df[(promo_df['DIF_COUNT_STORE'] != 0) & (promo_df['DIF_COUNT_STORE'].notna())]

# Convert 'DATE' column to datetime
promo_df['DATE'] = pd.to_datetime(promo_df['DATE'])

# Group by with aggregation, including min and max dates
promo_df = promo_df.groupby(['FDA', 'DISPLAY', 'REGION', 'DESCRIPTION', 'SELLINGCLASS', 'CAT_DESC', 'CG_DESC']).agg({
    'DIF_COUNT_STORE':'sum',
    'SMAPE':'mean',
    'APE %':'mean',
    'FLAG_COUNT':'mean',
    'BIAS %': 'mean',
    'UNDER_BIAS': 'sum',
    'OVER_BIAS': 'sum',
    'DATE': ['min', 'max']  # Get the earliest and latest dates
}).reset_index()

# Rename the multi-level columns resulting from the aggregation
promo_df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in promo_df.columns]

# Define the date range specific to "Fruits & Vegetables"
end_date_produce = pd.to_datetime(datetime.today().date()) - timedelta(days=6) 
start_date_produce = pd.to_datetime(datetime.today().date()) - timedelta(days=12)

# Apply the filter based on both date range and 'CAT_DESC'
date_condition_produce = (promo_df['DATE_min'] <= end_date_produce) & (promo_df['DATE_max'] >= start_date_produce)
category_condition = promo_df['CAT_DESC'] == 'Fruits & Vegetables'

# Apply the date filter only for 'Fruits & Vegetables'
promo_df['Filtered'] = np.where(category_condition, date_condition_produce, True)

# Filter the DataFrame based on the 'Filtered' column
promo_df = promo_df[promo_df['Filtered']].drop(columns=['Filtered'])

# Save the result to a CSV file
promo_df.to_csv(r"G:\Corporate Logistics & Services\General\Forecast and Data Analytics Team\Steady-state Demands\Master Dashboard v2\Dashboard Extracts\Promo.csv", index=False)

# Define the output path for the Excel file
output_path = r"G:\Corporate Logistics & Services\General\Forecast and Data Analytics Team\Steady-state Demands\Master Dashboard v2\WILO\Extracts\WILO_14.xlsx"

# Save final files to Excel
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
    # Write rank_filter_2 DataFrame to the "WILO_14" sheet
    rank_filter_2.to_excel(writer, sheet_name='WILO_14', index=False)
    
    # Write promo_df DataFrame to the "Promo" sheet
    promo_df.to_excel(writer, sheet_name='Promo', index=False)


In [None]:
#------------------------------SOH, going through folders-----------------------------
import os
import pandas as pd

# Define the base path
base_path = r'G:\Corporate Logistics & Services\General\RDA\13.Daily_OP_DataFeed\2024'
# Output file path
SOH_file = r"C:\Users\lzheng\OneDrive - ALDI-HOFER\Desktop\Problem Statement-Markdowns\SOH.csv"

# Initialize an empty DataFrame
soh_df = pd.DataFrame()

# Define the date range
start_date = pd.to_datetime('20240818', format='%Y%m%d')
end_date = pd.to_datetime('20240901', format='%Y%m%d')

# Loop through each date in the specified range
current_date = start_date
while current_date <= end_date:
    # Format the current date to match the folder name
    folder_name = f"REF_{current_date.strftime('%Y%m%d')}"
    folder_path = os.path.join(base_path, folder_name)
    
    # Define the file path for the N-3_GD07.csv file
    file_path = os.path.join(folder_path, 'N-3_GD07.csv')
    
    # Check if the file exists
    if os.path.exists(file_path):
        # Read the CSV file and extract the required columns
        temp_df = pd.read_csv(file_path, usecols=['PLANNING_DATE', 'SAP_DISPLAY', 'LOCATION', 'STOCK', 'SHLF_LFE_INDAYS'])
        
        # Append the data to the main DataFrame
        soh_df = pd.concat([soh_df, temp_df], ignore_index=True)
    
    # Move to the next date
    current_date += pd.Timedelta(days=1)

# Assuming portfolio_df is defined somewhere in your script
# Filter the article list
article_list = portfolio_df['ARTICLE'].unique().tolist()
condition_article = soh_df['SAP_DISPLAY'].isin(article_list)
soh_df = soh_df[condition_article]

# Create the 'INDEX' key using columns from soh_df
soh_df['INDEX'] = soh_df['SAP_DISPLAY'] + '-' + soh_df['LOCATION'] + '-' + soh_df['PLANNING_DATE'].astype(str)

# Save the final DataFrame
soh_df.to_csv(SOH_file, index=False)
