In [1]:
import os
import pandas as pd
from datetime import datetime


# Load the CJ Master Stock File
def get_latest_master_file(directory, file_prefix, file_extension):
    # Match the prefix and extension
    masterfiles = [f for f in os.listdir(directory) if f.startswith(file_prefix) and f.endswith(file_extension)]

    # Extract date from filenames
    dates = []
    for file in masterfiles:
        try:
            date_str = file.replace(file_prefix, '').replace(file_extension, '').strip()
            date_obj = datetime.strptime(date_str, '%d-%m-%Y')  # Change to '%d-%m-%Y' to match date format
            dates.append((file, date_obj))  # Corrected to append tuple
        except ValueError:
            # If the filename doesn't match then skip
            continue

    # Sort files by name and return the latest file
    if dates:
        latest_file = max(dates, key=lambda x: x[1])[0]
        return os.path.join(directory, latest_file)
    else:
        return None

# Step 2.1: Clean the latest master file
def clean_master_file(source_directory, destination_directory):
    file_prefix = 'Sahamit Report '
    file_extension = '.xlsx'
    latest_file_path = get_latest_master_file(source_directory, file_prefix, file_extension)  # Step to find the latest file

    if latest_file_path:
        print(f"Loading the latest CJ Stock File: {latest_file_path}")
        master_df = pd.read_excel(latest_file_path, sheet_name='Sahamit Report', header=2)

        # Rename Column name = 'Product' to 'CJ_Item'
        if 'Product' in master_df.columns:
            master_df.rename(columns={'Product' : 'CJ_Item'},inplace=True)
            print("Column 'Product' has been renamed to 'CJ_Item'")
        else:
            print("Column 'Product' not found in the DataFrame")

        # Filter All data except A-Home and UNO in column Divsion
        filter_master_file = ['A-HOME','UNO']
        master_df = master_df[~master_df['Division'].isin(filter_master_file)]
        print(f"Filter Division >> {filter_master_file} out of master file")

        # Exclude CJ_Item that not belong to 100510 and 101317
        exclude_cj_path = r'D:\Data for Stock Report\Exclude CJ_Item.xlsx'
        exclude_df = pd.read_excel(exclude_cj_path, sheet_name='Exclude')
        print("Loading product list from Company Code: 101318 and 401155 to exclude from this file")

        merged_df = master_df.merge(exclude_df,on='CJ_Item',how='left',indicator='_from-merged')
        
        master_df = merged_df[merged_df['_from-merged'] !='both']
        master_df.drop('_from-merged',axis=1,inplace=True)

        # Save the cleaned version
        cleaned_file_path = os.path.join(destination_directory, 'cleaned_CJ_Stock_Report.xlsx')
        master_df.to_excel(cleaned_file_path, sheet_name='CJ Stock', index=False)  # Save without the index column
        print(f"Latest File has been cleaned & loaded to: {cleaned_file_path}")

        return master_df
    else:
        print("Not Found File Stock by CJ.")
        return None

# Main function
def main():
    # Define directories
    source_directory = r'T:\SCM Data\Data For Stock\DC_Store'
    destination_directory = r'D:\Data for Stock Report'

    # Clean the latest master file and save to the destination
    clean_master_file(source_directory, destination_directory)

if __name__ == "__main__":
    main()


Loading the latest CJ Stock File: T:\SCM Data\Data For Stock\DC_Store\Sahamit Report 24-03-2025.xlsx
Column 'Product' has been renamed to 'CJ_Item'
Filter Division >> ['A-HOME', 'UNO'] out of master file
Loading product list from Company Code: 101318 and 401155 to exclude from this file


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  master_df.drop('_from-merged',axis=1,inplace=True)


Latest File has been cleaned & loaded to: D:\Data for Stock Report\cleaned_CJ_Stock_Report.xlsx
