In [1]:
import pandas as pd  
import numpy as np  
import os  
import pyodbc  
import pandasql as ps  
from datetime import datetime
from openpyxl import load_workbook

# Paths to all cleaned stock files  
file_paths = {  
    'CJ_Stock': r'D:\Data for Stock Report\cleaned_CJ_Stock_Report.xlsx',  
    'Daily_SO': r'D:\Data for Stock Report\appended_cleaned_SellOut.xlsx',  
    'PO_HBA': r'D:\Data for Stock Report\cleaned_PO_pending_HBA.xlsx',  
    'PO_Import': r'D:\Data for Stock Report\cleaned_PO_pending_import_party.xlsx',  
    'Access_PO': r'D:\Data for Stock Report\cleaned_PO_pending_other.xlsx',  
    'Daily_Stock_DC': r'D:\Data for Stock Report\cleaned_DC_daily_stock.xlsx',
    'Master_Owner&LT': r'C:\Users\Thanawit C\OneDrive - Sahamit Product Co.,Ltd\Data for Stock Report\COPY_MasterLeadTime.xlsx'  
}  

# Load Excel files into DataFrames
def load_data(file_paths):
    dataframes = {}
    for name, path in file_paths.items():
        try:
            if name == "Master_Owner&LT":
                # Load the specified sheet
                owner_scm_df = pd.read_excel(path, sheet_name='All_Product', header=1) 
                
                # Select specific columns
                owner_scm_df_selected = owner_scm_df[['SHM_Item', 'CJ_Item', 'OwnerSCM', 'Base Lead Time (Days)']]
                owner_scm_df_selected.rename(columns={'Base Lead Time (Days)': 'LeadTime'}, inplace=True)

                # Cast CJ_Item to str
                owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].astype(str)
                # Clean CJ_Item column
                owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].fillna('')
                owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].apply(lambda x: x.split('.')[0] if '.0' in str(x) else x)

                # Store the processed DataFrame
                dataframes[name] = owner_scm_df_selected
            else:
                # Load other sheets  
                sheet_name = {
                    'CJ_Stock': 'CJ Stock',
                    'Daily_SO': 'Pivot SO',
                    'PO_HBA': 'Pivot HBA',  
                    'PO_Import': 'Pivot Import',  
                    'Access_PO': 'Pivot All PO pending',  
                    'Daily_Stock_DC': 'Pivot_DC_stock'
                }.get(name)  

                dataframes[name] = pd.read_excel(path, sheet_name=sheet_name)
        except Exception as e:  
            print(f"Error loading {name}: {e}")
    return dataframes

# Connect to Access database and fetch product details  
def load_access_data():  
    access_db_path = r'D:\DataBase Access\SHM_TMS_001_Master_Copy.accdb'  
    conn_str = (  
        r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'  
        f'DBQ={access_db_path};'  
    )  
    try:  
        conn = pyodbc.connect(conn_str)  
        print("Connection successful")  

        query = """  
        SELECT CJ_Item,  
               SHM_Item,  
               Description,  
               Devision,  
               [Group],  
               PC_Cartons,  
               CJ_Status,  
               Cat,  
               Sub_Cat,  
               Brand  
          FROM qry_Product_List  
        """

        query2 = """  
        SELECT 
            t2.Item,
            t2.Unit
        FROM (
            SELECT Item, Unit, [PO Date] AS po_date
            FROM tbl_AllPO_Details
            ) AS t2
        INNER JOIN (
            SELECT Item, MAX([PO Date]) AS max_date
            FROM tbl_AllPO_Details
            Group by Item
            ) AS t1
        ON t1.Item = t2.item AND t2.po_date = t1.max_date
        GROUP BY t2.Item, t2.Unit
        """

        query3 = """  
        SELECT 
            qry_pdl.CJ_Item,
            qry_pdl.SHM_Item,
            qry_pdl.Supplier_Code,  
            qry_pdl.[Supplier Name]
        FROM qry_Product_List AS qry_pdl
        """    

        # Execute the queries
        access_df = pd.read_sql(query, conn)
        query2_df = pd.read_sql(query2, conn)
        query3_df = pd.read_sql(query3, conn)

        # merge for the unit of purchase product info
        access_df2 = pd.merge(query3_df, query2_df, left_on='SHM_Item', right_on='Item', how='left')

        conn.close()  
        print("Connection to Access database is closed.")  
        return access_df, access_df2
    
    except Exception as e:  
        print(f"Connection failed: {e}")  
        return pd.DataFrame(), pd.DataFrame()


# Convert 'CJ_Item' to string format in all DataFrames  
def convert_cj_item_to_string(dataframes, access_df, access_df2):
    # Process each DataFrame in the dictionary
    for name, df in dataframes.items():
        if 'CJ_Item' in df.columns:
            dataframes[name]['CJ_Item'] = df['CJ_Item'].astype(str).str.split('.').str[0]
        if 'SHM_Item' in df.columns:
            dataframes[name]['SHM_Item'] = df['SHM_Item'].astype(str).str.split('.').str[0]
    
    # Apply the same for access_df
    if 'CJ_Item' in access_df.columns:
        access_df['CJ_Item'] = access_df['CJ_Item'].astype(str).str.split('.').str[0]
    
    # Apply the same for access_df2
    if 'CJ_Item' in access_df2.columns:
        access_df2['CJ_Item'] = access_df2['CJ_Item'].astype(str).str.split('.').str[0]
    
    return access_df, access_df2

# Merge all DataFrames  
def merge_dataframes(dfs, access_df):  
    # Merge the 'CJ_Stock' DataFrame with 'Pivot SO' DataFrame  
    merged_df = dfs['CJ_Stock'].merge(dfs['Daily_SO'],on='CJ_Item',how='outer',suffixes=('_from-CJ', '_from-DailySO'))  
    
    # Continue merging with other DataFrames  
    merged_df = merged_df.merge(dfs['PO_HBA'], on='CJ_Item', how='outer', suffixes=('', '_from-HBA'))
    merged_df = merged_df.merge(dfs['PO_Import'], on='CJ_Item', how='outer', suffixes=('_from-HBA', '_from-Import'))

    # Rename SHM_Item for clarity  
    dfs['Access_PO'] = dfs['Access_PO'].rename(columns={'SHM_Item': 'SHM_Item_from-All_PO'})  

    # Merge with Access PO DataFrame  
    merged_df = merged_df.merge(dfs['Access_PO'], on='CJ_Item', how='outer', suffixes=('_from-Import', '_from-All_PO'))  
    
    # Merge with Daily Stock DC DataFrame  
    merged_df = merged_df.merge(dfs['Daily_Stock_DC'], on='CJ_Item', how='outer', suffixes=('_from-All_PO', '_from-DailyDC'))  

    # Merge with Access database DataFrame  
    merged_df = merged_df.merge(access_df, on='CJ_Item', how='left')

    # Rename columns
    merged_df = merged_df.rename(columns={  
        'SHM_Item': 'SHM_Item_from_qry_Product_List',
        'Division': 'Division_CJ_stock',  
        'Devision': 'Division_SHM'  
    })  

    # Create a new column for NPD Status by First_SO_Date 
    today = pd.to_datetime(datetime.now().date())  
    merged_df['days_from_first_ATP'] = (today - pd.to_datetime(merged_df['First_SO_Date'])).dt.days  
    merged_df['NPD_Status'] = np.where(merged_df['days_from_first_ATP'] <= 31, 'NPD', '-')  

    # Fill in missing values for descriptive columns  
    merged_df['Name'] = merged_df['Name'].fillna(merged_df['Description'])  
    merged_df['Category'] = merged_df['Category'].fillna(merged_df['Cat'])  
    merged_df['Subcate'] = merged_df['Subcate'].fillna(merged_df['Sub_Cat'])  

    # export merged_df to excel
    merged_df.to_excel(r'D:\Data for Stock Report\debug.xlsx', index=False)
    return merged_df

# Create the 'SHM_Item' column based on priority from various sources
def create_shm_item_column(merged_df):
    merged_df['SHM_Item_from-All_PO'] = merged_df['SHM_Item_from-All_PO'].replace('', np.nan)
    merged_df['SHM_Item_from-HBA'] = merged_df['SHM_Item_from-HBA'].replace('', np.nan)
    merged_df['SHM_Item_from-Import'] = merged_df['SHM_Item_from-Import'].replace('', np.nan)
    merged_df['SHM_Item_from_qry_Product_List'] = merged_df['SHM_Item_from_qry_Product_List'].replace('', np.nan)

    merged_df['SHM_Item'] = np.where(
        merged_df['SHM_Item_from-All_PO'].notna(), merged_df['SHM_Item_from-All_PO'],
        np.where(merged_df['SHM_Item_from-HBA'].notna(), merged_df['SHM_Item_from-HBA'],
                 np.where(merged_df['SHM_Item_from-Import'].notna(), merged_df['SHM_Item_from-Import'],
                          merged_df['SHM_Item_from_qry_Product_List'].fillna('NO SHM Item in Access')))
    )

    return merged_df

# Fill NaN values in numeric columns with 0
def fill_na_with_zero(df):
    df[df.select_dtypes(include=[np.number]).columns] = df.select_dtypes(include=[np.number]).fillna(0)
    return df

def rearrange_columns(merged_df):
    first_columns = ['CJ_Item','SHM_Item','SHM_Item_from-All_PO','SHM_Item_from-HBA','SHM_Item_from-Import','SHM_Item_from_qry_Product_List']
    remaining_columns = [col for col in merged_df.columns if col not in first_columns]
    final_column_order = first_columns + remaining_columns
    merged_df = merged_df[final_column_order]

    return merged_df

# Create new column to sum ALL PO Pending
def calculate_totals(merged_df):
    dc_columns = [1, 2, 4]
    for dc in dc_columns:
        merged_df[f'Total-PO_qty_to_DC{dc}'] = (
            merged_df[f'PO_Qty_to_DC{dc}'] +
            merged_df[f'PO_Qty_to_DC{dc}_from-Import'] + 
            merged_df[f'PO_Qty_to_DC{dc}_from-HBA'])

        # Calculate %Ratio with error handling
        merged_df[f'%Ratio_AvgSalesQty90D_DC{dc}'] = (
            merged_df[f'DC{dc}_AvgSaleQty90D'] / merged_df['Total_AvgSaleQty90D'].replace(0, 1)
        ).replace([np.inf, -np.inf], 0)

    # Calculate Total Remain Stock
    merged_df['Remain_StockQty_AllDC'] = merged_df['DC1_Remain_StockQty'] + merged_df['DC2_Remain_StockQty'] + merged_df['DC4_Remain_StockQty']
    merged_df['Remain_StockValue_AllDC'] = merged_df['DC1_Remain_StockValue'] + merged_df['DC2_Remain_StockValue'] + merged_df['DC4_Remain_StockValue']

    # Calculate SO Qty
    for dc in dc_columns:
        merged_df[f'DC{dc}_SO_Last30D'] = round(merged_df['SO_Qty_last30D'] * merged_df[f'%Ratio_AvgSalesQty90D_DC{dc}'])
        merged_df[f'DC{dc}_SO_Last7D'] = round(merged_df['SO_Qty_last7D'] * merged_df[f'%Ratio_AvgSalesQty90D_DC{dc}'])

        merged_df[f'DC{dc}_AvgSaleQty30D'] = merged_df[f'DC{dc}_SO_Last30D'] / 30
        merged_df[f'DC{dc}_AvgSaleQty7D'] = merged_df[f'DC{dc}_SO_Last7D'] / 7

    merged_df['Total_AvgSaleQty30D'] = merged_df[[f'DC{dc}_AvgSaleQty30D' for dc in dc_columns]].sum(axis=1)
    merged_df['Total_AvgSaleQty7D'] = merged_df[[f'DC{dc}_AvgSaleQty7D' for dc in dc_columns]].sum(axis=1)
    return merged_df


# Simplify DOH calculation
def calculate_DOH(stock_value, avg_cogs):
    return np.where(
        (stock_value != 0) & (avg_cogs == 0), np.inf,
        np.where((stock_value == 0) & (avg_cogs == 0), 0, stock_value / avg_cogs)
    )

# Simplified DOH calculations for various stock locations
def apply_doh_calculations(merged_df):
    merged_df['Current_DOH_All_DC'] = calculate_DOH(merged_df['Remain_StockQty_AllDC'], merged_df['Total_AvgSaleQty90D'])
    merged_df['Current_DC1_DOH'] = calculate_DOH(merged_df['DC1_Remain_StockQty'], merged_df['DC1_AvgSaleQty90D'])
    merged_df['Current_DC2_DOH'] = calculate_DOH(merged_df['DC2_Remain_StockQty'], merged_df['DC2_AvgSaleQty90D'])
    merged_df['Current_DC4_DOH'] = calculate_DOH(merged_df['DC4_Remain_StockQty'], merged_df['DC4_AvgSaleQty90D'])

    # Create new col for calculate
    merged_df['Total-PO_qty_to_DC'] = merged_df['Total-PO_qty_to_DC1'] + merged_df['Total-PO_qty_to_DC2'] + merged_df['Total-PO_qty_to_DC4']
    merged_df['Current_DOH(Stock+PO)_All_DC'] = calculate_DOH(merged_df['Remain_StockQty_AllDC'] + merged_df['Total-PO_qty_to_DC'], merged_df['Total_AvgSaleQty90D'])
    merged_df['DC1_DOH(Stock+PO)'] = calculate_DOH(merged_df['DC1_Remain_StockQty'] + merged_df['Total-PO_qty_to_DC1'], merged_df['DC1_AvgSaleQty90D'])
    merged_df['DC2_DOH(Stock+PO)'] = calculate_DOH(merged_df['DC2_Remain_StockQty'] + merged_df['Total-PO_qty_to_DC2'], merged_df['DC2_AvgSaleQty90D'])
    merged_df['DC4_DOH(Stock+PO)'] = calculate_DOH(merged_df['DC4_Remain_StockQty'] + merged_df['Total-PO_qty_to_DC4'], merged_df['DC4_AvgSaleQty90D'])


    merged_df['Min_delivery_date_to_DC1'] = merged_df[['Min_del_date_to_DC1', 'Min_del_date_to_DC1_from-Import', 'Min_del_date_to_DC1_from-HBA']].min(axis=1)
    merged_df['Min_delivery_date_to_DC2'] = merged_df[['Min_del_date_to_DC2', 'Min_del_date_to_DC2_from-Import', 'Min_del_date_to_DC2_from-HBA']].min(axis=1)
    merged_df['Min_delivery_date_to_DC4'] = merged_df[['Min_del_date_to_DC4','Min_del_date_to_DC4_from-Import','Min_del_date_to_DC4_from-HBA']].min(axis=1)
    merged_df['Min_delivery_date_to_DC'] = merged_df[['Min_delivery_date_to_DC1', 'Min_delivery_date_to_DC2','Min_delivery_date_to_DC4']].min(axis=1)

    # Calculate Stock cover date as a date time format
    current_date = pd.to_datetime(datetime.now().date())
    # Define maximum value for calculation
    max_doh_value = 1825

    # Set Current DOH if exceeding the max DOH, then = infinite
    merged_df['Current_DOH_All_DC'] = np.where(merged_df['Current_DOH_All_DC'] > max_doh_value, np.inf, merged_df['Current_DOH_All_DC'])
    merged_df['Current_DC1_DOH'] = np.where(merged_df['Current_DC1_DOH'] > max_doh_value, np.inf, merged_df['Current_DC1_DOH'])
    merged_df['Current_DC2_DOH'] = np.where(merged_df['Current_DC2_DOH'] > max_doh_value, np.inf, merged_df['Current_DC2_DOH'])
    merged_df['Current_DC4_DOH'] = np.where(merged_df['Current_DC4_DOH'] > max_doh_value, np.inf, merged_df['Current_DC4_DOH'])

    # Also set DOH of Stock+PO to inf
    merged_df['Current_DOH(Stock+PO)_All_DC'] = np.where(merged_df['Current_DOH(Stock+PO)_All_DC'] > max_doh_value, np.inf, merged_df['Current_DOH(Stock+PO)_All_DC'])
    merged_df['DC1_DOH(Stock+PO)'] = np.where(merged_df['DC1_DOH(Stock+PO)'] > max_doh_value, np.inf, merged_df['DC1_DOH(Stock+PO)'])
    merged_df['DC2_DOH(Stock+PO)'] = np.where(merged_df['DC2_DOH(Stock+PO)'] > max_doh_value, np.inf, merged_df['DC2_DOH(Stock+PO)'])
    merged_df['DC4_DOH(Stock+PO)'] = np.where(merged_df['DC4_DOH(Stock+PO)'] > max_doh_value, np.inf, merged_df['DC4_DOH(Stock+PO)'])

    # Create new columns and initialize with NaT
    merged_df['Total_Store_cover_to_date'] = pd.NaT
    merged_df['Stock_All_DC_Cover_to_date'] = pd.NaT
    merged_df['Stock+PO_All_DC_Cover_to_date'] = pd.NaT
    merged_df['Store_DC1_cover_to_date'] = pd.NaT
    merged_df['Stock_DC1_cover_to_date'] = pd.NaT
    merged_df['Stock+PO_DC1_cover_to_date'] = pd.NaT
    merged_df['Store_DC2_cover_to_date'] = pd.NaT
    merged_df['Stock_DC2_cover_to_date'] = pd.NaT
    merged_df['Stock+PO_DC2_cover_to_date'] = pd.NaT
    merged_df['Store_DC4_cover_to_date'] = pd.NaT
    merged_df['Stock_DC4_cover_to_date'] = pd.NaT
    merged_df['Stock+PO_DC4_cover_to_date'] = pd.NaT


    # Loop through each row to handle calculations
    for index, row in merged_df.iterrows():
        # Process Total Stores DOH
        all_store_doh = row['Total_DOHStore']
        if pd.notnull(all_store_doh) and all_store_doh > 0 and not np.isinf(all_store_doh) and all_store_doh <= max_doh_value:
            merged_df.at[index, 'Total_Store_cover_to_date'] = current_date + pd.to_timedelta(all_store_doh, unit='d')

        # Process for Current DOH All DC
        Current_DOH_All_DC = row['Current_DOH_All_DC']
        if pd.notnull(Current_DOH_All_DC) and Current_DOH_All_DC > 0 and not np.isinf(Current_DOH_All_DC) and Current_DOH_All_DC <= max_doh_value:
            merged_df.at[index, 'Stock_All_DC_Cover_to_date'] = current_date + pd.to_timedelta(Current_DOH_All_DC, unit='d')

        # Process for Current Stock + PO
        doh_all_dc_plus_po = row['Current_DOH(Stock+PO)_All_DC']
        if pd.notnull(doh_all_dc_plus_po) and doh_all_dc_plus_po > 0 and not np.isinf(doh_all_dc_plus_po) and doh_all_dc_plus_po <= max_doh_value:
            merged_df.at[index, 'Stock+PO_All_DC_Cover_to_date'] = current_date + pd.to_timedelta(doh_all_dc_plus_po, unit='d')

        # Process Stores DC1 DOH
        DC1_store_doh = row['DC1_DOHStore']
        if pd.notnull(DC1_store_doh) and DC1_store_doh > 0 and not np.isinf(DC1_store_doh) and DC1_store_doh <= max_doh_value:
            merged_df.at[index, 'Store_DC1_cover_to_date'] = current_date + pd.to_timedelta(DC1_store_doh, unit='d')

        # Process for Current DC1 DOH
        current_dc1_doh = row['Current_DC1_DOH']
        if pd.notnull(current_dc1_doh) and current_dc1_doh > 0 and not np.isinf(current_dc1_doh) and current_dc1_doh <= max_doh_value:
            merged_df.at[index, 'Stock_DC1_cover_to_date'] = current_date + pd.to_timedelta(current_dc1_doh, unit='d')

        # Process for Current Stock + PO DC1
        doh_dc1_plus_po = row['DC1_DOH(Stock+PO)']
        if pd.notnull(doh_dc1_plus_po) and doh_dc1_plus_po > 0 and not np.isinf(doh_dc1_plus_po) and doh_dc1_plus_po <= max_doh_value:
            merged_df.at[index, 'Stock+PO_DC1_cover_to_date'] = current_date + pd.to_timedelta(doh_dc1_plus_po, unit='d')

        # Process Stores DC2 DOH
        DC2_store_doh = row['DC2_DOHStore']
        if pd.notnull(DC2_store_doh) and DC2_store_doh > 0 and not np.isinf(DC2_store_doh) and DC2_store_doh <= max_doh_value:
            merged_df.at[index, 'Store_DC2_cover_to_date'] = current_date + pd.to_timedelta(DC2_store_doh, unit='d')

        # Process for Current DC2 DOH
        current_dc2_doh = row['Current_DC2_DOH']
        if pd.notnull(current_dc2_doh) and current_dc2_doh > 0 and not np.isinf(current_dc2_doh) and current_dc2_doh <= max_doh_value:
            merged_df.at[index, 'Stock_DC2_cover_to_date'] = current_date + pd.to_timedelta(current_dc2_doh, unit='d')

        # Process for Current Stock + PO DC2
        doh_dc2_plus_po = row['DC2_DOH(Stock+PO)']
        if pd.notnull(doh_dc2_plus_po) and doh_dc2_plus_po > 0 and not np.isinf(doh_dc2_plus_po) and doh_dc2_plus_po <= max_doh_value:
            merged_df.at[index, 'Stock+PO_DC2_cover_to_date'] = current_date + pd.to_timedelta(doh_dc2_plus_po, unit='d')

        # Process Stores DC4 DOH
        DC4_store_doh = row['DC4_DOHStore']
        if pd.notnull(DC4_store_doh) and DC4_store_doh > 0 and not np.isinf(DC4_store_doh) and DC4_store_doh <= max_doh_value:
            merged_df.at[index, 'Store_DC4_cover_to_date'] = current_date + pd.to_timedelta(DC4_store_doh, unit='d')            

        # Process for Current DC4 DOH
        current_dc4_doh = row['Current_DC4_DOH']
        if pd.notnull(current_dc4_doh) and current_dc4_doh > 0 and not np.isinf(current_dc4_doh) and current_dc4_doh <= max_doh_value:
            merged_df.at[index, 'Stock_DC4_cover_to_date'] = current_date + pd.to_timedelta(current_dc4_doh, unit='d')

        # Process for Current Stock + PO DC4
        doh_dc4_plus_po = row['DC4_DOH(Stock+PO)']
        if pd.notnull(doh_dc4_plus_po) and doh_dc4_plus_po > 0 and not np.isinf(doh_dc4_plus_po) and doh_dc4_plus_po <= max_doh_value:
            merged_df.at[index, 'Stock+PO_DC4_cover_to_date'] = current_date + pd.to_timedelta(doh_dc4_plus_po, unit='d')
    
    return merged_df

# Main function
def main():
    dataframes = load_data(file_paths)
    access_df, access_df2 = load_access_data()
    access_df, access_df2 = convert_cj_item_to_string(dataframes, access_df, access_df2)
    
    merged_df = merge_dataframes(dataframes, access_df)
    merged_df = create_shm_item_column(merged_df)

    # Merge with access_df again to get the correct supplier name
    merged_df = pd.merge(merged_df, access_df2, on=['CJ_Item', 'SHM_Item'], how='left')
    # Merge with master Owner and lead time to get owner name and lead time
    merged_df = merged_df.merge(dataframes['Master_Owner&LT'],on=['SHM_Item','CJ_Item'],how='left')
    merged_df = merged_df.fillna({'OwnerSCM': 'No data', 'LeadTime': 'No data'})

    merged_df = fill_na_with_zero(merged_df)
    merged_df = rearrange_columns(merged_df)
    merged_df = calculate_totals(merged_df)
    merged_df = apply_doh_calculations(merged_df)

   # Execute SQL query
    query = """
    SELECT Division_SHM,
        OwnerSCM,
        [Supplier Name],
        SHM_Item,
        CJ_Item,
        Name,
        Category,
        Brand,
        LeadTime,
        Status,
        [Group],
        Unit,
        PC_Cartons,
        First_SO_Date,
        NPD_Status,
        Total_ScmAssort,
        Total_OOSAssort,
        Total_CountOKROOS,
        Total_PercOOS,
        Total_StoreStockQty,
        Total_DOHStore,
        Total_Store_cover_to_date,
        Total_AvgSaleQty90D,
        Total_AvgSaleQty30D,
        Total_AvgSaleQty7D,
        SO_Qty_last7D,
        Remain_StockQty_AllDC,
        Current_DOH_All_DC,
        Stock_All_DC_Cover_to_date,
        [Total-PO_qty_to_DC],
        Min_delivery_date_to_DC,
        [Current_DOH(Stock+PO)_All_DC],
        [Stock+PO_All_DC_Cover_to_date],
        DC1_ScmAssort,
        DC1_OOSAssort,
        DC1_CountOKROOS,
        DC1_PercOOS,
        DC1_StoreStockQty,
        DC1_DOHStore,
        Store_DC1_cover_to_date,
        DC1_AvgSaleQty90D,
        DC1_AvgSaleQty30D,
        DC1_AvgSaleQty7D,
        [%Ratio_AvgSalesQty90D_DC1],
        DC1_Remain_StockQty,
        Current_DC1_DOH,
        Stock_DC1_cover_to_date,
        [Total-PO_qty_to_DC1],
        Min_delivery_date_to_DC1,
        [DC1_DOH(Stock+PO)],
        [Stock+PO_DC1_cover_to_date],
        DC2_ScmAssort,
        DC2_OOSAssort,
        DC2_CountOKROOS,
        DC2_PercOOS,
        DC2_StoreStockQty,
        DC2_DOHStore,
        Store_DC2_cover_to_date,
        DC2_AvgSaleQty90D,
        DC2_AvgSaleQty30D,
        DC2_AvgSaleQty7D,
        [%Ratio_AvgSalesQty90D_DC2],
        DC2_Remain_StockQty,
        Current_DC2_DOH,
        Stock_DC2_cover_to_date,
        [Total-PO_qty_to_DC2],
        Min_delivery_date_to_DC2,
        [DC2_DOH(Stock+PO)],
        [Stock+PO_DC2_cover_to_date],
        DC4_ScmAssort,
        DC4_OOSAssort,
        DC4_CountOKROOS,
        DC4_PercOOS,
        DC4_StoreStockQty,
        DC4_DOHStore,
        Store_DC4_cover_to_date,
        DC4_AvgSaleQty90D,
        DC4_AvgSaleQty30D,
        DC4_AvgSaleQty7D,
        [%Ratio_AvgSalesQty90D_DC4],
        DC4_Remain_StockQty,
        Current_DC4_DOH,
        Stock_DC4_cover_to_date,
        [Total-PO_qty_to_DC4],
        Min_delivery_date_to_DC4,
        [DC4_DOH(Stock+PO)],
        [Stock+PO_DC4_cover_to_date]
    FROM merged_df
    WHERE ([Group] != 'Discontinuous' or [Group] IS NULL)
    """

    result_df = ps.sqldf(query, locals())
    

    # Rename All column for store to display a data this column is freezed
    columns_to_rename = {
        'Status': 'CJ_Status',
        'Group': 'SHM_Status',
        'Unit': 'Unit_of_Purchase',
        'LeadTime':'LeadTime(Days)',
        'Total_OOSAssort':'Total_ActiveAssort',
        'Total_CountOKROOS':'Total_StoreOOS',
        'Total_PercOOS':'Total_%StoreOOS'
        }
    
    # Rename the columns
    result_df.rename(columns=columns_to_rename, inplace=True)
    renamed_columns = [columns_to_rename[old_name] for old_name in columns_to_rename if old_name in result_df.columns]
    print(f"The following columns have been renamed: {columns_to_rename}")


    # Filter to show the data only all numeric column > 0
    numeric_cols = result_df.select_dtypes(include=[np.number]).columns
    result_df = result_df[(result_df[numeric_cols] != 0).any(axis=1)]

    # Format the date columns to display in Excel
    date_columns = [
        'First_SO_Date',
        'Min_delivery_date_to_DC',
        'Min_delivery_date_to_DC1',
        'Min_delivery_date_to_DC2',
        'Min_delivery_date_to_DC4',
        'Total_Store_cover_to_date',
        'Store_DC1_cover_to_date',
        'Store_DC2_cover_to_date',
        'Store_DC4_cover_to_date',
        'Stock_All_DC_Cover_to_date',
        'Stock_DC1_cover_to_date',
        'Stock_DC2_cover_to_date',
        'Stock_DC4_cover_to_date',
        'Stock+PO_All_DC_Cover_to_date',
        'Stock+PO_DC1_cover_to_date',
        'Stock+PO_DC2_cover_to_date',
        'Stock+PO_DC4_cover_to_date'
    ]

    for col in date_columns:
        if col in result_df.columns:
            result_df[col] = pd.to_datetime(result_df[col], errors='coerce')  # Ensure it's in datetime format
    

    # Drop Duplicate rows when all data in each columns is duplicated except in the []
    column_to_check = result_df.columns.difference(['SHM_Item','SHM_Status'])  # get all column except in []
    deduplicated_df = result_df.drop_duplicates(subset=column_to_check)


    # Drop SHM_Item = No shm item in access and Save deduplicated_df to excel
    #deduplicated_df = deduplicated_df[deduplicated_df['SHM_Item'] != 'NO SHM Item in Access']
    #deduplicated_df.to_excel(r'D:\Data for Stock Report\Data_for_OOS_Log\3.Mar_10-03-2025.xlsx', index=False)

    # Handle duplicates 'CJ_Item' by using groupby
    def replace_cj_duplicate(group):
        # For each numeric check for duplicates
        for col in numeric_cols:
            if col == 'PC_Cartons':
                continue  # Skip when column = pc_carton
            if group[col].nunique() == 1:  # All values are the same in this group
                group.iloc[1:, group.columns.get_loc(col)] = 0  # Replace all but keep the first row
        
        # Filter these 2 columns if both columns are 0 then drop the row, keep the first row
        group = group[(group[['Remain_StockQty_AllDC', 'Total-PO_qty_to_DC']].ne(0).any(axis=1)) | (group.index == group.index.min())]

        return group
    
    # Apply this function to each group of 'CJ_Item'
    deduplicated_df = deduplicated_df.groupby('CJ_Item').apply(replace_cj_duplicate).reset_index(drop=True)

    # Rename column for converting QTY to BOX QTY
    column_rename_mapping = {
        'Total_AvgSaleQty90D': 'Total_AvgSaleCTN_Last90D',
        'Total_AvgSaleQty30D': 'Total_AvgSaleCTN_Last30D',
        'Total_AvgSaleQty7D': 'Total_AvgSaleCTN_Last7D',
        'DC1_AvgSaleQty90D': 'DC1_AvgSaleCTN_Last90Days',
        'DC1_AvgSaleQty30D': 'DC1_AvgSaleCTN_Last30Days',
        'DC1_AvgSaleQty7D': 'DC1_AvgSaleCTN_Last7Days',
        'DC2_AvgSaleQty90D': 'DC2_AvgSaleCTN_Last90Days',
        'DC2_AvgSaleQty30D': 'DC2_AvgSaleCTN_Last30Days',
        'DC2_AvgSaleQty7D': 'DC2_AvgSaleCTN_Last7Days',
        'DC4_AvgSaleQty90D': 'DC4_AvgSaleCTN_Last90Days',
        'DC4_AvgSaleQty30D': 'DC4_AvgSaleCTN_Last30Days',
        'DC4_AvgSaleQty7D': 'DC4_AvgSaleCTN_Last7Days',
        'Total_StoreStockQty': 'Total_StoreStockCTN',
        'DC1_StoreStockQty': 'DC1_StoreStockCTN',
        'DC2_StoreStockQty': 'DC2_StoreStockCTN',
        'DC4_StoreStockQty': 'DC4_StoreStockCTN',
        'SO_Qty_last7D': 'SO_CTN_last7D',
        'Remain_StockQty_AllDC': 'Remain_CTN_AllDC',
        'DC1_Remain_StockQty': 'DC1_Remain_CTN',
        'DC2_Remain_StockQty': 'DC2_Remain_CTN',
        'DC4_Remain_StockQty': 'DC4_Remain_CTN',
        'Total-PO_qty_to_DC': 'Total-CTN_to_DC',
        'Total-PO_qty_to_DC1': 'Total-CTN_to_DC1',
        'Total-PO_qty_to_DC2': 'Total-CTN_to_DC2',
        'Total-PO_qty_to_DC4': 'Total-CTN_to_DC4'
    }

    # Process the rename
    modified_df = deduplicated_df.rename(columns=column_rename_mapping)

    # List of new columns that were just renamed
    renamed_columns = list(column_rename_mapping.values())
    
    # Loop through renamed column to perform division by PC_Cartons
    for col in renamed_columns:
        modified_df[col] = np.where(
            modified_df['PC_Cartons'] == 0,0,
            modified_df[col] / modified_df['PC_Cartons']
        )

    # Define current date and save path 
    current_date = datetime.now().strftime('%d-%m-%Y')
    save_directory = r'D:\Data for Stock Report\Completed Daily Stock Report'
    file_name = f"Sahamit Daily Stock Report_{current_date}.xlsx"
    save_path = os.path.join(save_directory, file_name)

    if not os.path.exists(save_directory):
        os.makedirs(save_directory)

    # Save the merged DataFrame
    try: 
        with pd.ExcelWriter(save_path, mode='w') as writer:
            deduplicated_df.to_excel(writer, sheet_name='Data by Qty', index=False)
            modified_df.to_excel(writer, sheet_name='Data by Cartons',index=False)

            # For checking Raw data
            #merged_df.to_excel(writer, sheet_name='Raw Data', index=False)
            #result_df.to_excel(writer, sheet_name='after query', index=False)

            # Load All sheet from Raw_PO_pending and save them together
            existing_file_path = r'D:\Data for Stock Report\Raw_PO_pending.xlsx'
            if os.path.exists(existing_file_path):
                workbook = load_workbook(existing_file_path)
                for sheet_name in workbook.sheetnames:
                    existing_df = pd.read_excel(existing_file_path,sheet_name=sheet_name)
                    existing_df.to_excel(writer,sheet_name=sheet_name,index=False)
            else:
                print(f"File not found: {existing_file_path}")

        print(f"Data merged and saved successfully at {save_path}")
    except Exception as e:
        print(f"An error occurred while saving the file: {e}")

if __name__ == "__main__":
    main()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  owner_scm_df_selected.rename(columns={'Base Lead Time (Days)': 'LeadTime'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].fillna('')
A value is tr

Connection successful


  access_df = pd.read_sql(query, conn)
  query2_df = pd.read_sql(query2, conn)
  query3_df = pd.read_sql(query3, conn)


Connection to Access database is closed.


  merged_df['Current_DOH(Stock+PO)_All_DC'] = calculate_DOH(merged_df['Remain_StockQty_AllDC'] + merged_df['Total-PO_qty_to_DC'], merged_df['Total_AvgSaleQty90D'])
  merged_df['DC1_DOH(Stock+PO)'] = calculate_DOH(merged_df['DC1_Remain_StockQty'] + merged_df['Total-PO_qty_to_DC1'], merged_df['DC1_AvgSaleQty90D'])
  merged_df['DC2_DOH(Stock+PO)'] = calculate_DOH(merged_df['DC2_Remain_StockQty'] + merged_df['Total-PO_qty_to_DC2'], merged_df['DC2_AvgSaleQty90D'])
  merged_df['DC4_DOH(Stock+PO)'] = calculate_DOH(merged_df['DC4_Remain_StockQty'] + merged_df['Total-PO_qty_to_DC4'], merged_df['DC4_AvgSaleQty90D'])
  merged_df['Min_delivery_date_to_DC1'] = merged_df[['Min_del_date_to_DC1', 'Min_del_date_to_DC1_from-Import', 'Min_del_date_to_DC1_from-HBA']].min(axis=1)
  merged_df['Min_delivery_date_to_DC2'] = merged_df[['Min_del_date_to_DC2', 'Min_del_date_to_DC2_from-Import', 'Min_del_date_to_DC2_from-HBA']].min(axis=1)
  merged_df['Min_delivery_date_to_DC4'] = merged_df[['Min_del_date_to_DC4'

The following columns have been renamed: {'Status': 'CJ_Status', 'Group': 'SHM_Status', 'Unit': 'Unit_of_Purchase', 'LeadTime': 'LeadTime(Days)', 'Total_OOSAssort': 'Total_ActiveAssort', 'Total_CountOKROOS': 'Total_StoreOOS', 'Total_PercOOS': 'Total_%StoreOOS'}


  deduplicated_df = deduplicated_df.groupby('CJ_Item').apply(replace_cj_duplicate).reset_index(drop=True)


Data merged and saved successfully at D:\Data for Stock Report\Completed Daily Stock Report\Sahamit Daily Stock Report_10-03-2025.xlsx
