# M&A SAMYANG Report 

In [8]:
import pandas as pd
import numpy as np
from pathlib import Path
import re
from datetime import datetime

# ==========================
# 셀 1 — 파일 목록 & 비교 대상 자동 선택 (지역변수 + prefix)
# ==========================

sy_drayage_DATA_DIR = Path("data") / "SY" / "drayage"

sy_drayage_files = list(sy_drayage_DATA_DIR.glob("*.xlsx"))

def sy_drayage_extract_date(path: Path):
    m = re.match(r"(\d{8})", path.name)
    if not m:
        return None
    raw = m.group(1)
    try:
        return datetime.strptime(raw, "%m%d%Y").date()
    except ValueError:
        return None

sy_drayage_files_sorted = sorted(
    sy_drayage_files,
    key=lambda p: sy_drayage_extract_date(p) or datetime.min.date()
)

sy_drayage_prev_file = sy_drayage_files_sorted[-2]
sy_drayage_curr_file = sy_drayage_files_sorted[-1]

sy_drayage_prev_date = sy_drayage_extract_date(sy_drayage_prev_file)
sy_drayage_curr_date = sy_drayage_extract_date(sy_drayage_curr_file)

sy_drayage_today = datetime.today().date()

print(f"- Today        : {sy_drayage_today}")
print(f"- 최신 리포트     : {sy_drayage_curr_date}  ({sy_drayage_curr_file.name})")
print(f"- 이전 리포트     : {sy_drayage_prev_date}  ({sy_drayage_prev_file.name})")


- Today        : 2025-11-23
- 최신 리포트     : 2025-11-23  (11232025 - 2.xlsx)
- 이전 리포트     : 2025-11-21  (11212025 - 2.xlsx)


In [9]:
# ==========================
# 셀 2 — 전날/오늘 리포트 로드 + 공통 컬럼 정리 (지역변수 + prefix)
# ==========================

sy_drayage_KEY = ["Customer Reference No.", "Container No."]

sy_drayage_DISPLAY_COLS = [
    "Customer Reference No.",
    "Pick Up Location",
    "Master B/L No.",
    "Container No.",
    "Last Free Date",
    "P/U APPT DATE",
    "P/U APPT TIME",
    "EMPTY NOTICE",
    "Container Remark",
    "DELIVERY DATE",
    "Return Date",
]

# 1) 파일 읽기
sy_drayage_df_prev = pd.read_excel(sy_drayage_prev_file)
sy_drayage_df_curr = pd.read_excel(sy_drayage_curr_file)

# 2) 컬럼 공백/개행 제거
sy_drayage_df_prev.columns = sy_drayage_df_prev.columns.str.strip()
sy_drayage_df_curr.columns = sy_drayage_df_curr.columns.str.strip()

# 3) 필요 컬럼 추가 (없는 경우 NaN 생성)
for sy_drayage_col in sy_drayage_DISPLAY_COLS:
    if sy_drayage_col not in sy_drayage_df_prev.columns:
        sy_drayage_df_prev[sy_drayage_col] = pd.NA
    if sy_drayage_col not in sy_drayage_df_curr.columns:
        sy_drayage_df_curr[sy_drayage_col] = pd.NA

# 4) DISPLAY_COLS 순서대로 통일
sy_drayage_df_prev = sy_drayage_df_prev[sy_drayage_DISPLAY_COLS]
sy_drayage_df_curr = sy_drayage_df_curr[sy_drayage_DISPLAY_COLS]

print("셀2 완료 — 리포트 로드 및 컬럼 정리 완료")


셀2 완료 — 리포트 로드 및 컬럼 정리 완료


---

## Cell 3 - 일자별 요약 리포트

In [10]:
# ==========================
# SY Drayage — 셀3 (완전 재작성 / 최종 버전)
# ==========================

import re
import numpy as np
import pandas as pd
from datetime import datetime

# ==========================
# 1) 비교해야 하는 주요 컬럼 정의
# ==========================
sy_drayage_compare_cols = [
    "Last Free Date",
    "P/U APPT DATE",
    "P/U APPT TIME",
    "DELIVERY DATE",
    "Return Date",
    "Container Remark",
]

# 실제 존재하는 컬럼만 필터링
sy_drayage_compare_cols = [
    c for c in sy_drayage_compare_cols
    if c in sy_drayage_df_prev.columns and c in sy_drayage_df_curr.columns
]

# ==========================
# 2) KEY 인덱스 맞추기
# ==========================
sy_prev_k = sy_drayage_df_prev.set_index(sy_drayage_KEY)
sy_curr_k = sy_drayage_df_curr.set_index(sy_drayage_KEY)

sy_prev_idx = sy_prev_k.index
sy_curr_idx = sy_curr_k.index

# 공통 / 신규
sy_common_idx = sy_prev_idx.intersection(sy_curr_idx)
sy_new_idx = sy_curr_idx.difference(sy_prev_idx)

# ==========================
# 3) 공통 구간에서 비교대상 가져오기
# ==========================
prev_c = sy_prev_k.loc[sy_common_idx, sy_drayage_compare_cols].copy()
curr_c = sy_curr_k.loc[sy_common_idx, sy_drayage_compare_cols].copy()

# ==========================
# 4) normalize 함수들 정의
# ==========================

# 4-1) 문자열 normalize (스페이스/유니코드공백 제거)
def normalize_str(x):
    if pd.isna(x):
        return np.nan
    s = str(x)
    # 유니코드 공백(\u00A0, \u2000~\u200B 등)을 일반 스페이스로 치환
    s = re.sub(r"[\u00A0\u2000-\u200B\u3000]", " ", s)
    return s.strip()

# 4-2) 날짜 normalize (YYYY-MM-DD만)
def normalize_date(x):
    try:
        dt = pd.to_datetime(x, errors="coerce")
        if pd.isna(dt):
            return np.nan
        return dt.date()
    except:
        return np.nan

# 4-3) 시간 normalize (HH:MM)
def normalize_time(x):
    try:
        dt = pd.to_datetime(x, errors="coerce")
        if pd.isna(dt):
            return np.nan
        return dt.strftime("%H:%M")
    except:
        return np.nan

# ==========================
# 5) 컬럼별 normalize 적용
# ==========================
prev_n = pd.DataFrame(index=prev_c.index)
curr_n = pd.DataFrame(index=curr_c.index)

for col in sy_drayage_compare_cols:

    # 날짜 계열
    if col in ["Last Free Date", "P/U APPT DATE", "DELIVERY DATE", "Return Date"]:
        prev_n[col] = prev_c[col].apply(normalize_date)
        curr_n[col] = curr_c[col].apply(normalize_date)

    # 시간
    elif col == "P/U APPT TIME":
        prev_n[col] = prev_c[col].apply(normalize_time)
        curr_n[col] = curr_c[col].apply(normalize_time)

    # 문자열 (Remark 등)
    else:
        prev_n[col] = prev_c[col].apply(normalize_str)
        curr_n[col] = curr_c[col].apply(normalize_str)

# ==========================
# 6) 변경 여부 판단 (NaN 동일 취급)
# ==========================
equal_mask = (
    (prev_n == curr_n) |
    (prev_n.isna() & curr_n.isna())
)

changed_mask = ~equal_mask

# 변경된 컨테이너 판단 = 6개 필드 중 하나라도 True
changed_rows_mask = changed_mask.any(axis=1)
sy_drayage_changed_rows_count = int(changed_rows_mask.sum())

# ==========================
# 7) 날짜/시간별 변화 개수
# ==========================
def count_changes(col):
    if col not in changed_mask.columns:
        return 0, 0
    changed_cells = int(changed_mask[col].sum())
    added_values = int((prev_n[col].isna() & curr_n[col].notna()).sum())
    return changed_cells, added_values

lfd_changed, lfd_new = count_changes("Last Free Date")
pu_date_changed, pu_date_new = count_changes("P/U APPT DATE")
pu_time_changed, pu_time_new = count_changes("P/U APPT TIME")
deliv_changed, deliv_new = count_changes("Container Remark")
delivered_changed, delivered_new = count_changes("DELIVERY DATE")
return_changed, return_new = count_changes("Return Date")

# ==========================
# 8) 결과 출력
# ==========================
print("=== SY Drayage 리포트 변경 요약 ===")
print(f"- 신규 컨테이너 추가: {len(sy_new_idx)} 건")
print(f"- 값이 변경된 컨테이너 행: {sy_drayage_changed_rows_count} 건\n")

print(f"- LFD: 변경 {lfd_changed} 건 (New: {lfd_new} 건)")
print(f"- 픽업 날짜: 변경 {pu_date_changed} 건 (New: {pu_date_new} 건)")
print(f"- 픽업 시간: 변경 {pu_time_changed} 건 (New: {pu_time_new} 건)")
print(f"- 배송 예정(Container Remark): 변경 {deliv_changed} 건 (New: {deliv_new} 건)")
print(f"- 배송 완료일(Delivered): 변경 {delivered_changed} 건 (New: {delivered_new} 건)")
print(f"- 리턴 날짜(Return): 변경 {return_changed} 건 (New: {return_new} 건)")


=== SY Drayage 리포트 변경 요약 ===
- 신규 컨테이너 추가: 0 건
- 값이 변경된 컨테이너 행: 27 건

- LFD: 변경 5 건 (New: 4 건)
- 픽업 날짜: 변경 1 건 (New: 1 건)
- 픽업 시간: 변경 1 건 (New: 1 건)
- 배송 예정(Container Remark): 변경 17 건 (New: 2 건)
- 배송 완료일(Delivered): 변경 7 건 (New: 7 건)
- 리턴 날짜(Return): 변경 5 건 (New: 5 건)



## Cell 3 End

---

## Cell 4 - Final Report 

In [11]:
# ==========================
# sy_drayage 셀 4 — 변경된 컨테이너 상세 표
#  - strip() 기반 동일값은 변경 아님
#  - Delivery 파싱 동일 기준
#  - 빈칸 Delivery 는 색칠 안함
#  - 날짜는 yyyy-mm-dd, 시간은 PU Time만 hh:mm 유지
# ==========================

import numpy as np
from datetime import date
import re

# =========================================================
# 1) strip 비교 동일하게 적용된 prev/curr 준비
# =========================================================

sy_prev_k = sy_drayage_df_prev.set_index(sy_drayage_KEY)
sy_curr_k = sy_drayage_df_curr.set_index(sy_drayage_KEY)

sy_prev_idx = sy_prev_k.index
sy_curr_idx = sy_curr_k.index

sy_common_idx = sy_prev_idx.intersection(sy_curr_idx)

# subset (공통 row)
sy_prev_c = sy_prev_k.loc[sy_common_idx]
sy_curr_c = sy_curr_k.loc[sy_common_idx]

# 공통 컬럼만
sy_shared_cols = [c for c in sy_prev_c.columns if c in sy_curr_c.columns]

# strip 비교용 복사 (스페이스 → '' → NaN)
sy_prev_c2 = sy_prev_c[sy_shared_cols].copy()
sy_curr_c2 = sy_curr_c[sy_shared_cols].copy()

for col in sy_shared_cols:
    try:
        sy_prev_c2[col] = sy_prev_c2[col].astype(str).str.strip().replace({"": np.nan})
        sy_curr_c2[col] = sy_curr_c2[col].astype(str).str.strip().replace({"": np.nan})
    except:
        pass

# NaN 동일 처리
sy_equal_mask = (sy_prev_c2 == sy_curr_c2) | (sy_prev_c2.isna() & sy_curr_c2.isna())
sy_changed_mask = ~sy_equal_mask

# 셀3과 동일한 기준: 6개 컬럼만 변경 여부 판단
sy_imp_cols = [
    "Last Free Date",
    "P/U APPT DATE",
    "P/U APPT TIME",
    "DELIVERY DATE",
    "Return Date",
    "Container Remark",
]
sy_imp_cols = [c for c in sy_imp_cols if c in sy_shared_cols]

sy_imp_mask = sy_changed_mask[sy_imp_cols]

# 하나라도 바뀐 컨테이너만 필터
sy_row_changed = sy_imp_mask.any(axis=1)

sy_changed_prev = sy_prev_c.loc[sy_row_changed]
sy_changed_curr = sy_curr_c.loc[sy_row_changed]

print(f"변경된 컨테이너 수: {len(sy_changed_curr)} 건\n")

# =========================================================
# 2) 표 만들기 — strip 기준 curr 값 사용
# =========================================================

sy_table = sy_changed_curr.reset_index()

# =========================================================
# 3) Delivery 파싱 로직
# =========================================================

def sy_parse_delivery(raw, base_year):
    if raw is None or (isinstance(raw, float) and np.isnan(raw)):
        return None, None, None

    s0 = str(raw)
    s = s0.strip()
    if s == "":
        return None, None, None

    up = s.upper()

    # MT* → 배제
    if up.startswith("MT"):
        return None, None, None

    # mm/dd(/yy) 패턴
    date_matches = re.findall(r"(\d{1,2})/(\d{1,2})(?:/(\d{2,4}))?", up)

    # OR 또는 다중 날짜 → 예외 텍스트 처리
    if len(date_matches) >= 2 or "OR" in up:
        return None, None, s0.strip()

    # 단일 날짜 + DEL 또는 DEL-D
    if len(date_matches) == 1 and "DEL" in up:
        mm, dd, yy = date_matches[0]
        mm, dd = int(mm), int(dd)

        if yy:
            yy_i = int(yy)
            if yy_i < 100:
                yy_i += 2000
            year = yy_i
        else:
            year = base_year

        try:
            d = date(year, mm, dd)
        except:
            return None, None, s0.strip()

        tag = "DEL-D" if "DIRECT" in up else "DEL"
        return d, tag, None

    # yyyy-mm-dd 직접 날짜
    try:
        dt = pd.to_datetime(s0, errors="raise")
        return dt.date(), None, None
    except:
        pass

    return None, None, s0.strip()


sy_base_year = sy_drayage_curr_date.year

sy_delivery_dates = []
sy_delivery_texts = []

for v in sy_table["Container Remark"]:
    d, tag, ex = sy_parse_delivery(v, sy_base_year)
    if d is not None:
        base = d.strftime("%Y-%m-%d")
        if tag == "DEL-D":
            sy_delivery_texts.append(f"{base} (DEL-D)")
        elif tag == "DEL":
            sy_delivery_texts.append(f"{base} (DEL)")
        else:
            sy_delivery_texts.append(base)
        sy_delivery_dates.append(d)
    elif ex is not None:
        sy_delivery_texts.append(ex)
        sy_delivery_dates.append(None)
    else:
        sy_delivery_texts.append("")
        sy_delivery_dates.append(None)

# =========================================================
# 4) 표 구성 (날짜는 yyyy-mm-dd만, PU Time만 hh:mm)
# =========================================================

sy_changed_table = pd.DataFrame({
    "PO#":       sy_table["Customer Reference No."],
    "MBL#":      sy_table["Master B/L No."],
    "CNTR#":     sy_table["Container No."],
    "LFD":       sy_table["Last Free Date"],
    "PU Date":   sy_table["P/U APPT DATE"],
    "PU Time":   sy_table["P/U APPT TIME"],
    "Delivery":  sy_delivery_texts,
    "Delivered": sy_table["DELIVERY DATE"],
    "Return":    sy_table["Return Date"],
})

# 날짜 변환
for col in ["LFD", "PU Date", "Delivered", "Return"]:
    dt = pd.to_datetime(sy_changed_table[col], errors="coerce")
    sy_changed_table[col] = dt.dt.strftime("%Y-%m-%d").fillna("")

# 시간 변환 (PU Time만)
sy_changed_table["PU Time"] = (
    pd.to_datetime(sy_changed_table["PU Time"], errors="coerce")
      .dt.strftime("%H:%M")
      .fillna("")
)

# 정렬용
sy_changed_table["Delivery_sort"] = pd.to_datetime(sy_delivery_dates, errors="coerce")
sy_changed_table["LFD_sort"]      = pd.to_datetime(sy_changed_table["LFD"], errors="coerce")

sy_changed_table = (
    sy_changed_table
    .sort_values(["LFD_sort", "Delivery_sort"], ascending=[True, True])
    .drop(columns=["LFD_sort", "Delivery_sort"])
    .reset_index(drop=True)
)
sy_changed_table.index = range(1, len(sy_changed_table) + 1)

# =========================================================
# 5) 하이라이트 마스크 (빈 Delivery는 색칠 금지)
# =========================================================

sy_mask_for_style = pd.DataFrame(False, index=sy_changed_table.index, columns=sy_changed_table.columns)

# strip 비교 마스크 그대로
sy_mask_src = sy_changed_mask[sy_imp_cols].loc[sy_row_changed]

sy_col_map = {
    "Last Free Date":   "LFD",
    "P/U APPT DATE":    "PU Date",
    "P/U APPT TIME":    "PU Time",
    "Container Remark": "Delivery",
    "DELIVERY DATE":    "Delivered",
    "Return Date":      "Return",
}

for idx, row in sy_changed_table.iterrows():
    key = (row["PO#"], row["CNTR#"])
    if key not in sy_mask_src.index:
        continue

    src = sy_mask_src.loc[key]

    for src_col, disp_col in sy_col_map.items():
        if src_col in src.index and disp_col in sy_mask_for_style.columns:

            # DELIVERY가 빈칸이면 색칠 금지
            if src_col == "Container Remark":
                if str(row["Delivery"]).strip() == "":
                    continue

            if bool(src[src_col]):
                sy_mask_for_style.at[idx, disp_col] = True

# =========================================================
# 6) 스타일 적용
# =========================================================

def sy_highlight_changes(df):
    styles = np.where(
        sy_mask_for_style.values,
        "background-color: #fff3b0; color: red;",
        ""
    )
    return pd.DataFrame(styles, index=df.index, columns=df.columns)

sy_styled_table = (
    sy_changed_table.style
        .set_table_styles([{"selector": "th", "props": [("text-align", "center")]}])
        .apply(sy_highlight_changes, axis=None)
)

sy_styled_table


변경된 컨테이너 수: 27 건



Unnamed: 0,PO#,MBL#,CNTR#,LFD,PU Date,PU Time,Delivery,Delivered,Return
1,25-6828,ZIMUSEL71188158,JXLU4660830,2025-11-18,2025-11-21,11:00,2025-11-24 (DEL),,
2,25-6835,ZIMUSEL71188171,CAAU7789800,2025-11-18,2025-11-18,14:00,,2025-11-20,2025-11-22
3,25-6846,ZIMUSEL71188144,ZCSU7923334,2025-11-18,2025-11-18,15:00,,2025-11-20,2025-11-22
4,25-5984,ONEYSELFG9174903,ONEU5124335,2025-11-19,2025-11-19,14:00,,2025-11-21,2025-11-22
5,25-5985,ONEYSELFG9174904,TCNU6196339,2025-11-19,2025-11-19,15:00,,2025-11-21,2025-11-22
6,25-6829,ZIMUSEL71188186,JXLU6326186,2025-11-19,2025-11-21,10:00,,2025-11-22,
7,25-6833,ZIMUSEL71188178,JXLU6316716,2025-11-19,2025-11-21,10:00,,2025-11-22,
8,25-6096,ONEYSELFG9026514,ONEU6026837,2025-11-20,2025-11-21,13:00,2025-11-24 (DEL),,
9,25-6284,ONEYSELFG9026503,ONEU0848467,2025-11-20,2025-11-21,13:00,2025-11-24 (DEL),,
10,25-6106,COSU6432126330,OOCU8100007,2025-11-20,2025-11-20,06:00,,2025-11-20,2025-11-22


---

## Save

In [12]:
# ==========================
# sy_drayage 셀 5 — 엑셀 파일 저장
#  - 정렬: LFD → Delivery_sort
#  - strip 비교 기반 변경만 색칠
#  - Delivery 빈칸은 색칠 금지
# ==========================

from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Font, Alignment
import re

# ----------------------------------------------------------
# 1) output 폴더 및 파일 경로 준비
# ----------------------------------------------------------

sy_OUTPUT_DIR = Path("output")
sy_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

sy_date_str = sy_drayage_curr_date.strftime("%Y%m%d")
sy_output_path = sy_OUTPUT_DIR / f"sy_drayage_diff_{sy_date_str}.xlsx"

# ----------------------------------------------------------
# 2) 셀4 결과 복사
# ----------------------------------------------------------

sy_df_save = sy_changed_table.copy()

# ----------------------------------------------------------
# 3) LFD datetime 변환 (정렬 안정성)
# ----------------------------------------------------------

sy_df_save["LFD"] = pd.to_datetime(sy_df_save["LFD"], errors="coerce")

# ----------------------------------------------------------
# 4) Delivery_sort (yyyy-mm-dd 추출)
# ----------------------------------------------------------

def sy_extract_delivery_date(val):
    if not isinstance(val, str):
        return None
    m = re.search(r"\d{4}-\d{2}-\d{2}", val)
    return m.group(0) if m else None

sy_df_save["Delivery_sort"] = pd.to_datetime(
    sy_df_save["Delivery"].apply(sy_extract_delivery_date),
    errors="coerce"
)

# ----------------------------------------------------------
# 5) 정렬 후 Delivery_sort 제거
# ----------------------------------------------------------

sy_df_save = (
    sy_df_save
    .sort_values(["LFD", "Delivery_sort"], ascending=[True, True], na_position="last")
)

sy_export_df = sy_df_save.drop(columns=["Delivery_sort"])

# ----------------------------------------------------------
# 6) 엑셀 파일 저장
# ----------------------------------------------------------

sy_export_df.to_excel(
    sy_output_path,
    index=True,
    sheet_name="SY_Drayage_Diff"
)

# ----------------------------------------------------------
# 7) 엑셀 스타일 적용
# ----------------------------------------------------------

wb = load_workbook(sy_output_path)
ws = wb.active

# 헤더 가운데 정렬
for col in range(1, ws.max_column + 1):
    ws.cell(row=1, column=col).alignment = Alignment(horizontal="center", vertical="center")

fill_changed = PatternFill(fill_type="solid", fgColor="FFF3B0")
font_changed = Font(color="FF0000")

# ----------------------------------------------------------
# 8) prev/curr 로드 (strip 비교 동일하게 적용)
# ----------------------------------------------------------

sy_prev_k = sy_drayage_df_prev.set_index(sy_drayage_KEY)
sy_curr_k = sy_drayage_df_curr.set_index(sy_drayage_KEY)

# 표시 컬럼 → 원본 컬럼 매핑
sy_display_to_orig = {
    "LFD":       "Last Free Date",
    "PU Date":   "P/U APPT DATE",
    "PU Time":   "P/U APPT TIME",
    "Delivery":  "Container Remark",
    "Delivered": "DELIVERY DATE",
    "Return":    "Return Date",
}

# ----------------------------------------------------------
# 9) 변경 셀 하이라이트 적용
#     - strip 비교(셀4 기준 동일)
#     - Delivery 빈칸은 색칠 제외
# ----------------------------------------------------------

for excel_row_idx, (df_idx, row) in enumerate(sy_export_df.iterrows(), start=2):

    po = row["PO#"]
    cntr = row["CNTR#"]
    key = (po, cntr)

    if key not in sy_prev_k.index or key not in sy_curr_k.index:
        continue

    prev_row = sy_prev_k.loc[key]
    curr_row = sy_curr_k.loc[key]

    for col_offset, col_name in enumerate(sy_export_df.columns, start=2):

        if col_name not in sy_display_to_orig:
            continue

        orig_col = sy_display_to_orig[col_name]

        prev_val = prev_row.get(orig_col, pd.NA)
        curr_val = curr_row.get(orig_col, pd.NA)

        prev_s = str(prev_val).strip() if not pd.isna(prev_val) else ""
        curr_s = str(curr_val).strip() if not pd.isna(curr_val) else ""

        same = (prev_s == curr_s) or (pd.isna(prev_val) and pd.isna(curr_val))

        # Delivery가 빈칸이면 색칠 제외
        if col_name == "Delivery" and str(row["Delivery"]).strip() == "":
            continue

        if not same:
            cell = ws.cell(row=excel_row_idx, column=col_offset)
            cell.fill = fill_changed
            cell.font = font_changed

# ----------------------------------------------------------
# 10) 열 너비 자동 조정
# ----------------------------------------------------------

for col_cells in ws.columns:
    max_len = 0
    col_letter = col_cells[0].column_letter
    for cell in col_cells:
        if cell.value is None:
            continue
        length = len(str(cell.value))
        if length > max_len:
            max_len = length
    ws.column_dimensions[col_letter].width = max_len + 2

wb.save(sy_output_path)
print(f"저장 완료: {sy_output_path}")

저장 완료: output/sy_drayage_diff_20251123.xlsx
