In [1]:
import pandas as pd
import os
import pyodbc

# Collect PO Pending Raw
po_file_path = {  
    'PO_HBA': r'D:\Data for Stock Report\cleaned_PO_pending_HBA.xlsx',  
    'PO_Import': r'D:\Data for Stock Report\cleaned_PO_pending_import_party.xlsx',  
    'Access_PO': r'D:\Data for Stock Report\cleaned_PO_pending_other.xlsx',  
}

# Load excel to DF from the specified sheet name
def load_data(po_file_path):
    dataframes = {}
    for key, path in po_file_path.items():
        print(f"Loading file for {key}: {path}")
        dataframes[key] = pd.read_excel(path, sheet_name='cleaned data')
    return dataframes

# Clean data for PO_HBA
def clean_po_hba(df):
    # Exclude unnecessary columns
    df = df.drop(columns=['Sold to', 'สถานที่จัดส่งสินค้า'])
    # Rename columns
    df.rename(columns={
        'SHM_Article': 'SHM_Item',
        'CJ_Article': 'CJ_Item',
        'SHM PO Date': 'PO Date',
        'SHM PO NO.': 'SHM PO No.',
        'CJ/TD PO.NO': 'PO CJ No.',
        'DC': 'Ship to DC',
        'วันที่โรงงานคอนเฟิร์มส่งสินค้า': 'Delivery Date',
        'สถานะการจัดส่งสินค้า': 'Delivery_Status',
        'หน่วยบรรจุ (ชิ้น/ลัง)': 'PC_Cartons',
        'จำนวนเปิด PO สหมิตร (ลัง)': 'PO Cartons',
        'จำนวนเปิด PO สหมิตร (ชิ้น)': 'PO_Qty',
        'Supplier (Short name)': 'Supplier Name'
    }, inplace=True)
    return df

# Clean data for PO_Import
def clean_po_import(df):
    # Unpivot the data
    df_unpivoted = df.melt(
        id_vars=['SHM_Item', 'CJ_Item', 'Product Name', 'PO CJ No.', 'เรือ ETA', 'PC_Cartons'],
        value_vars=['Date_to_DC1', 'cartons_to_DC1', 'Date_to_DC2', 'cartons_to_DC2', 'Date_to_DC4', 'cartons_to_DC4', 
                    'PO_Qty_to_DC1', 'PO_Qty_to_DC2', 'PO_Qty_to_DC4'],
        var_name='variable',
        value_name='value'
    )

    # Extract 'Ship to DC', 'PO Cartons', 'PO qty', and 'Date to DC' from the variable column
    df_unpivoted['Ship to DC'] = df_unpivoted['variable'].apply(
        lambda x: 'DC1' if 'DC1' in x else ('DC2' if 'DC2' in x else 'DC4'))
    df_unpivoted['PO Cartons'] = df_unpivoted.apply(lambda row: row['value'] if 'cartons' in row['variable'] else None, axis=1)
    df_unpivoted['PO qty'] = df_unpivoted.apply(lambda row: row['value'] if 'PO_Qty' in row['variable'] else None, axis=1)
    df_unpivoted['Date to DC'] = df_unpivoted.apply(lambda row: row['value'] if 'Date' in row['variable'] else None, axis=1)
    
    # Drop the variable and value columns
    df_unpivoted.drop(columns=['variable', 'value'], inplace=True)
    
    # Drop rows where all three columns ('PO Cartons', 'PO qty', and 'Date to DC') are null
    df_unpivoted.dropna(subset=['PO Cartons', 'PO qty', 'Date to DC'], how='all', inplace=True)

    # Handle missing values in 'เรือ ETA'
    df_unpivoted.fillna({"เรือ ETA": "Unknown"}, inplace=True) # Replace NaN with placeholder value

    # Pivot the data back to combine rows with the same SHM_Item, CJ_Item, PO CJ No., PC_Cartons, and Ship to DC
    final_import = df_unpivoted.groupby(
        ["SHM_Item", "CJ_Item", "Product Name", "PO CJ No.", "เรือ ETA", "PC_Cartons", "Ship to DC"]
    ).agg({
        "PO Cartons": "sum",
        "PO qty": "sum",
        "Date to DC": "first"
    }).reset_index()

    # Restore 'เรือ ETA' to NaN if it was replaced with 'Unknown'
    final_import["เรือ ETA"] = final_import["เรือ ETA"].replace("Unknown", pd.NA)

    # Rename columns
    final_import.rename(columns={
        'Date to DC': 'Delivery Date',
        'PO qty': 'PO_Qty'
    }, inplace=True)

    return final_import

# clean data for Access_PO
def clean_access_po(df):
    # Exclude unnecessary columns
    df = df.drop(columns=['Devision','Unit','Customer'])
    # Rename Column
    df.rename(columns={
        'PO Num': 'SHM PO No.',
        'PO Ref': 'PO CJ No.',
        'DC_Name': 'Ship to DC',
        'Rec_Date': 'Delivery Date',
        'Order Qty': 'PO Cartons',
        'CJ_Description':'Product Name'
    },inplace=True)
    return df


# Connect to Access database and fetch product details  
def load_access_data():  
    access_db_path = r'D:\DataBase Access\SHM_TMS_001_Master_Copy.accdb'  
    conn_str = (  
        r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'  
        f'DBQ={access_db_path};'  
    )  
    try:  
        conn = pyodbc.connect(conn_str)  
        print("Connection successful")

        query = """  
        SELECT CJ_Item,  
               SHM_Item,
               [Supplier Name],
               Devision
          FROM qry_Product_List  
        """

        access_df = pd.read_sql(query, conn)
        conn.close()  
        print("Connection to Access database is closed.")  
        return access_df
    except Exception as e:
        print(f"Error: {e}")
        return None
    
# Load DataFrame
dataframes = load_data(po_file_path)
cleaned_dataframes = {
    'PO_HBA': clean_po_hba(dataframes['PO_HBA']),
    'PO_Import': clean_po_import(dataframes['PO_Import']),
    'Access_PO': clean_access_po(dataframes['Access_PO'])
}
# Append DataFrames together
final_df = pd.concat(cleaned_dataframes.values(), ignore_index=True)
final_df['CJ_Item'] = final_df['CJ_Item'].astype(str)
final_df['SHM_Item'] = final_df['SHM_Item'].astype(str)

# Load Access data
access_df = load_access_data()
if access_df is not None:
    # Convert CJ_Item in access_df to string
    access_df['CJ_Item'] = access_df['CJ_Item'].astype('Int64').astype(str)
    access_df['SHM_Item'] = access_df['SHM_Item'].astype(str)

    # Merge DataFrames
    merged_df = pd.merge(final_df, access_df, on=['CJ_Item', 'SHM_Item'], how='left', suffixes=('', '_access'))

    # Update Supplier Name in final_df where it is null
    final_df['Supplier Name'] = final_df['Supplier Name'].fillna(merged_df['Supplier Name_access'])
    final_df['Division'] = merged_df['Devision']

# Load OwnerSCM
owner_scm_file_path = r'C:\Users\Thanawit C\OneDrive - Sahamit Product Co.,Ltd\Data for Stock Report\COPY_MasterLeadTime.xlsx'
owner_scm_df = pd.read_excel(owner_scm_file_path, sheet_name='All_Product', header=1)

# Cast column CJ_Item to str
owner_scm_df['CJ_Item'] = owner_scm_df['CJ_Item'].astype(str)
# Select some columns
owner_scm_df_selected = owner_scm_df[['SHM_Item','CJ_Item','OwnerSCM','Base Lead Time (Days)']]
owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].fillna('')
owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].apply(lambda x: x.split('.')[0] if '.0' in str(x) else x)

# Merge with Master Leadtime for get OwnerSCM
final_df = pd.merge(final_df,owner_scm_df_selected[['CJ_Item','SHM_Item','OwnerSCM']],on=['CJ_Item','SHM_Item'],how='left')

# Arrange column
desire_order = [
    'Division',
    'OwnerSCM',
    'PO Date',
    'SHM PO No.',
    'Supplier Name',
    'SHM_Item',
    'CJ_Item',
    'Product Name',
    'PO CJ No.',
    'PC_Cartons',
    'Ship to DC',
    'PO Cartons',
    'PO_Qty',
    'เรือ ETA',
    'Delivery Date',
    'Delivery_Status'
]
final_df = final_df.reindex(columns=desire_order)
final_df['เรือ ETA'] = pd.to_datetime(final_df['เรือ ETA'],errors='coerce')

final_df_pivot = final_df.pivot_table(
    index= ['CJ_Item','Ship to DC'],
    values=['เรือ ETA','Delivery Date'],
    aggfunc ='min'
).reset_index().rename(columns={'เรือ ETA':'Min ETA','Delivery Date':'First Delivery Date'})

final_df_pivot['ConcatIndex'] = final_df_pivot['CJ_Item'] + final_df_pivot['Ship to DC']


# Save updated final_df to Excel
save_directory = r'D:\Data for Stock Report'
file_name = "Raw_PO_pending.xlsx"
save_path = os.path.join(save_directory, file_name)


with pd.ExcelWriter(save_path, mode='w') as writer:
    # Save final_df to the "All PO Pending" sheet
    final_df.to_excel(writer, sheet_name='All PO Pending', index=False)
    final_df_pivot.to_excel(writer, sheet_name='MIN ETA', index=False)

print(f"combine all po pending has been saved to: {save_path}")



Loading file for PO_HBA: D:\Data for Stock Report\cleaned_PO_pending_HBA.xlsx
Loading file for PO_Import: D:\Data for Stock Report\cleaned_PO_pending_import_party.xlsx
Loading file for Access_PO: D:\Data for Stock Report\cleaned_PO_pending_other.xlsx
Connection successful


  access_df = pd.read_sql(query, conn)


Connection to Access database is closed.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].fillna('')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  owner_scm_df_selected['CJ_Item'] = owner_scm_df_selected['CJ_Item'].apply(lambda x: x.split('.')[0] if '.0' in str(x) else x)


combine all po pending has been saved to: D:\Data for Stock Report\Raw_PO_pending.xlsx
