In [1]:
import pandas as pd
import os
import time
import pyodbc
from datetime import datetime

def ensure_file_available(file_path):
    if not os.path.exists(file_path):
        print("File is not available. Please check your file in OneDrive")
        return False
    print("File is available. Proceeding with the processes")
    return True

   
def clean_and_groupby_file_PO_pending():
    file_path = r'C:\Users\Thanawit C\OneDrive - Sahamit Product Co.,Ltd\Data for Stock Report\COPY_Summary Forecast.xlsx'
    save_path = r'D:\Data for Stock Report\cleaned_PO_pending_import_party.xlsx'
    
    ensure_file_available(file_path)

    try:
        # Attempt to read the Excel file
        df2 = pd.read_excel(file_path, sheet_name='Summary Forecast', header=3)
    except PermissionError:
        print(f"Permission denied: {file_path}. Please check if the file is open or if you have access.")
        return
    except FileNotFoundError:
        print(f"File not found: {file_path}. Please check the file path.")
        return
    except Exception as e:
        print(f"An error occurred while reading the file: {e}")
        return

    # Convert the specific columns to datetime
    df2['แผนส่งเข้าคลังโพธาราม'] = pd.to_datetime(df2['แผนส่งเข้าคลังโพธาราม'], errors='coerce')
    df2['แผนส่งเข้าคลังบางปะกง'] = pd.to_datetime(df2['แผนส่งเข้าคลังบางปะกง'], errors='coerce')
    df2['แผนส่งเข้าคลังขอนแก่น'] = pd.to_datetime(df2['แผนส่งเข้าคลังขอนแก่น'], errors ='coerce')
    df2['CJ_Item'] = df2['CJ_Item'].astype(str)

    today = pd.to_datetime(datetime.now().date())
    
    # Filter Delviery date >= Today
    df2_filtered = df2[(df2['แผนส่งเข้าคลังโพธาราม'] >= today) | (df2['แผนส่งเข้าคลังบางปะกง'] >= today) | (df2['แผนส่งเข้าคลังขอนแก่น'] >= today)]
    
    # Select needed columns
    selected_columns = df2_filtered[['SHM_Item',
                                      'CJ_Item',
                                      'Name',
                                      'PO CJ No.',
                                      'เรือ ETA',
                                      'แผนส่งเข้าคลังโพธาราม',
                                      'ยอดสั่งโพธาราม(ลัง)',
                                      'แผนส่งเข้าคลังบางปะกง',
                                      'ยอดสั่งบางปะกง(ลัง)',
                                      'แผนส่งเข้าคลังขอนแก่น',
                                      'ยอดสั่งขอนแก่น(ลัง)']]
    
    # Rename columns
    selected_columns.rename(columns={'Name' : 'Product Name',
                                     'แผนส่งเข้าคลังโพธาราม': 'Date_to_DC1',
                                     'ยอดสั่งโพธาราม(ลัง)': 'cartons_to_DC1',
                                     'แผนส่งเข้าคลังบางปะกง': 'Date_to_DC2',
                                     'ยอดสั่งบางปะกง(ลัง)': 'cartons_to_DC2',
                                     'แผนส่งเข้าคลังขอนแก่น': 'Date_to_DC4',
                                     'ยอดสั่งขอนแก่น(ลัง)': 'cartons_to_DC4'}, inplace=True)
    
    
    # Step to connect to Access database and load data
    access_db_path = r'D:\DataBase Access\SHM_TMS_001_Master_Copy.accdb'  # Updated Access DB path
    connection_string = f"Driver={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ={access_db_path};"

    conn = None
    
    try:
        # Connect to Access database
        conn = pyodbc.connect(connection_string)
        
        # Load data from Access
        query = """
        SELECT SHM_Item,
               CJ_Item,
               PC_Cartons
        FROM qry_Product_List
        """

        access_data = pd.read_sql(query, conn)

        query2 = """
        SELECT [PO Date],
               [PO Num],
               [PO Ref],
               [Supplier Name],
               Item as SHM_Item,
               CJ_Item,
               CJ_Description as [Product Name],
               Devision,
               PC_Cartons,
               Rec_Date,
               [Order Qty],
               Unit,
               Delivery_Status, 
               Customer, 
               DC_Name
        FROM qry_Output_For_Excel_New 
        WHERE Delivery_Status IS NULL
        AND Customer = 'CJ'
        AND Devision in ('Import-Foods', 'Import-NF')
        """

        access_data2 = pd.read_sql(query2, conn)
        access_data2.rename(columns={'PO Ref': 'PO CJ No.'}, inplace=True)

        dc_name_mapping = {
            'CJ DC1 ราชบุรี': 'DC1',
            'CJ DC2 บางปะกง': 'DC2',
            'DC โพธาราม': 'DC1',
            'DC บางวัว 1': 'DC2',
            'DC ขอนแก่น': 'DC4',
            'DC บางวัว 2': 'TD09'
        }
        access_data2['DC_Name'] = access_data2['DC_Name'].replace(dc_name_mapping)
        # Create new column for PO_Qty with condition on Unit
        access_data2['PO_Qty'] = access_data2.apply(lambda row: row['Order Qty'] * row['PC_Cartons'] if row['Unit'] == 'ลัง' else row['Order Qty'], axis=1)
        # Create new column for PO_Ctn with condition on Unit
        access_data2['PO_CTN'] = access_data2.apply(lambda row: row['Order Qty'] if row['Unit'] == 'ลัง' else row['Order Qty'] / row['PC_Cartons'], axis=1)
        
    
        pivot_df = access_data2.pivot_table(
            index=['SHM_Item', 'CJ_Item', 'Product Name','PO CJ No.'],
            columns = 'DC_Name',
            values= 'PO_Qty',
            aggfunc='sum'
        ).reset_index()
        pivot_df.columns = ['SHM_Item', 'CJ_Item', 'Product Name','PO CJ No.'] + [f'PO_Qty_to_{col}' for col in pivot_df.columns[4:]]

        pivot_df2 = access_data2.pivot_table(
            index=['SHM_Item', 'CJ_Item', 'Product Name','PO CJ No.'],
            columns = 'DC_Name',
            values= 'PO_CTN',
            aggfunc='sum'
        ).reset_index()
        pivot_df2.columns = ['SHM_Item', 'CJ_Item', 'Product Name','PO CJ No.'] + [f'cartons_to_{col}' for col in pivot_df2.columns[4:]]

        pivot_df3 = access_data2.pivot_table(
            index=['SHM_Item', 'CJ_Item', 'Product Name','PO CJ No.'],
            columns='DC_Name',
            values='Rec_Date',
            aggfunc='min'
        ).reset_index()
        pivot_df3.columns = ['SHM_Item', 'CJ_Item', 'Product Name','PO CJ No.'] + [f'Date_to_{col}' for col in pivot_df3.columns[4:]]

        # MERGE 2 pivot tables
        merged_df = pd.merge(pivot_df, pivot_df2, on=['SHM_Item', 'CJ_Item', 'Product Name','PO CJ No.'], how='left')
        merged_df2 = pd.merge(merged_df, pivot_df3, on=['SHM_Item', 'CJ_Item', 'Product Name','PO CJ No.'], how='left')
        
        # Merge Access data with the selected columns
        cleaned_data = pd.merge(selected_columns, access_data[['CJ_Item', 'SHM_Item', 'PC_Cartons']], on=['CJ_Item', 'SHM_Item'], how='left')


        # Calculate new columns
        cleaned_data['PO_Qty_to_DC1'] = cleaned_data['cartons_to_DC1'] * cleaned_data['PC_Cartons']
        cleaned_data['PO_Qty_to_DC2'] = cleaned_data['cartons_to_DC2'] * cleaned_data['PC_Cartons']
        cleaned_data['PO_Qty_to_DC4'] = cleaned_data['cartons_to_DC4'] * cleaned_data['PC_Cartons']

        cleaned_data['PO CJ No.'] = cleaned_data['PO CJ No.'].astype(str) 

        # Identify the PO CJ No. values that are already present in cleaned_data
        existing_po_cj_no = cleaned_data['PO CJ No.'].unique()

        # Filter out rows from merged_df2 where PO CJ No. is already in cleaned_data
        merged_df2_filtered = merged_df2[~merged_df2['PO CJ No.'].isin(existing_po_cj_no)]

        # Combine the cleaned data with the filtered pivot data
        combined_data = pd.concat([cleaned_data, merged_df2_filtered], ignore_index=True)


        # Group the cleaned data for the pivot table
        pivot_data = combined_data.groupby(['SHM_Item', 'CJ_Item']).agg({
            'PO_Qty_to_DC1': 'sum',
            'Date_to_DC1' : 'min',
            'PO_Qty_to_DC2': 'sum',
            'Date_to_DC2' : 'min',
            'PO_Qty_to_DC4': 'sum',
            'Date_to_DC4' : 'min'
        }).reset_index()  # Reset index to keep grouping columns as regular columns

        # Rename the columns to match the desired output
        pivot_data.rename(columns={
            'Date_to_DC1': 'Min_del_date_to_DC1',
            'Date_to_DC2': 'Min_del_date_to_DC2',
            'Date_to_DC4': 'Min_del_date_to_DC4'
        }, inplace=True)

        print(pivot_data.dtypes)
    
    except Exception as e:
        print(f"An error occurred while connecting to Access: {e}")
        return
    finally:
        # Ensure the connection is closed after processing
        if conn is not None:
            conn.close()
            print("Connection to Access database closed.")


    # Write the cleaned data with new calculations to the Excel file
    try:
        with pd.ExcelWriter(save_path, mode='w') as writer:  # Use mode 'w' to write new data without overwriting
            pivot_data.to_excel(writer, sheet_name='Pivot Import', index=False)
            combined_data.to_excel(writer, sheet_name='cleaned data', index=False)
            merged_df2_filtered.to_excel(writer, sheet_name='cleaned data_fromAccess', index=False)
        print(f"PO pending Import report has been saved to: {save_path}")
    except Exception as e:
        print(f"An error occurred while saving the file: {e}")

# Run the function
clean_and_groupby_file_PO_pending()


File is available. Proceeding with the processes


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_columns.rename(columns={'Name' : 'Product Name',
  access_data = pd.read_sql(query, conn)
  access_data2 = pd.read_sql(query2, conn)


SHM_Item                       object
CJ_Item                        object
PO_Qty_to_DC1                 float64
Min_del_date_to_DC1    datetime64[ns]
PO_Qty_to_DC2                 float64
Min_del_date_to_DC2    datetime64[ns]
PO_Qty_to_DC4                 float64
Min_del_date_to_DC4    datetime64[ns]
dtype: object
Connection to Access database closed.
PO pending Import report has been saved to: D:\Data for Stock Report\cleaned_PO_pending_import_party.xlsx
