In [2]:
import os
from pathlib import Path

# プロジェクトルートをカレントにしておく
BASE = Path("/Users/hiraokatatsuru/Library/Mobile Documents/com~apple~CloudDocs/postal-operation-shift-management-system")

os.chdir(BASE)
db_path = BASE / "excel_templates" / "posms_demo.db"
os.environ["DATABASE_URL"] = f"sqlite:///{db_path.as_posix()}"

print("cwd:", os.getcwd())
print("DATABASE_URL:", os.environ["DATABASE_URL"])

from datetime import date, timedelta
import pandas as pd

from posms.features.builder import FeatureBuilder
from posms.models.trainer import ModelTrainer
from posms.models.predictor import ModelPredictor


cwd: /Users/hiraokatatsuru/Library/Mobile Documents/com~apple~CloudDocs/postal-operation-shift-management-system
DATABASE_URL: sqlite:////Users/hiraokatatsuru/Library/Mobile Documents/com~apple~CloudDocs/postal-operation-shift-management-system/excel_templates/posms_demo.db


In [13]:
from datetime import date, timedelta
import pandas as pd

from posms.features.builder import FeatureBuilder

def forecast_28_raw(mail_kind: str, run_id: str, start: date, office_id: int = 1) -> pd.DataFrame:
    """
    指定 mail_kind の 28日分の「生予測」(raw_pred) を返す。
    後処理（丸め・繰越）は一切しない。
    """
    fb = FeatureBuilder(office_id=office_id, mail_kind=mail_kind)

    results = []
    history = fb._load_mail().copy()

    for i in range(28):
        tgt = start + timedelta(days=i)

        pred = fb.predict(
            target_date=tgt,
            run_id=run_id,
            model_name=f"posms_{mail_kind}",
            stage=None,
        )
        results.append((tgt, float(pred)))

        new_row = {
            "date": pd.Timestamp(tgt),
            "office_id": office_id,
            "actual_volume": float(pred),
            "price_increase_flag": 0,
        }
        history = pd.concat([history, pd.DataFrame([new_row])], ignore_index=True)
        history = history.sort_values("date").reset_index(drop=True)

        fb._load_mail = (lambda hist=history: hist)

    df_raw = pd.DataFrame(results, columns=["date", "raw_pred"]).set_index("date")
    return df_raw


In [14]:
def postprocess_normal(df_raw: pd.DataFrame) -> pd.DataFrame:
    return ModelPredictor.apply_delivery_rules(
        df_raw,
        value_col="raw_pred",
        round_to_thousand=True,
        extend_to_next_delivery=True,
    )


In [15]:
def postprocess_daily_1piece(df_raw: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for dt, v in df_raw["raw_pred"].items():
        v_clip = max(0.0, float(v))
        delivered = int(round(v_clip))  # 1通単位で四捨五入
        rows.append((dt.date(), float(v), None, delivered, True))
    return pd.DataFrame(
        rows,
        columns=["date", "raw_pred", "carry_in", "deliver_pred", "is_delivery_day"],
    )


In [16]:
import jpholiday

def is_weekend_or_holiday(dt) -> bool:
    d = dt.date() if hasattr(dt, "date") else dt
    return (d.weekday() >= 5) or jpholiday.is_holiday(d)

def postprocess_weekday_1piece(df_raw: pd.DataFrame) -> pd.DataFrame:
    s = df_raw["raw_pred"].copy()
    s.index = pd.to_datetime(s.index)

    carry = 0.0
    rows = []
    for dt, v in s.items():
        v_clip = max(0.0, float(v))
        if not is_weekend_or_holiday(dt):
            delivered = v_clip + carry
            d_int = int(round(delivered))
            rows.append((dt.date(), float(v), carry if carry > 0 else None, d_int, True))
            carry = 0.0
        else:
            carry += v_clip
            rows.append((dt.date(), float(v), None, 0, False))

    if carry > 0:
        dt = s.index[-1] + pd.Timedelta(days=1)
        while is_weekend_or_holiday(dt):
            dt += pd.Timedelta(days=1)
        d_int = int(round(carry))
        rows.append((dt.date(), 0.0, carry, d_int, True))

    return pd.DataFrame(
        rows,
        columns=["date", "raw_pred", "carry_in", "deliver_pred", "is_delivery_day"],
    )


In [7]:
# すでに学習済みの run_id_normal を使う（なければこのセルで学習）
fb_normal = FeatureBuilder(office_id=1, mail_kind="normal")
X_n, y_n = fb_normal.build()
trainer_normal = ModelTrainer(experiment="posms_normal")
run_id_normal = trainer_normal.train(X_n, y_n, tags={"mail_kind": "normal"})
print("run_id_normal =", run_id_normal)

df_raw_normal = forecast_28_raw("normal", run_id_normal, date(2025,1,1), office_id=1)
df_normal_28 = postprocess_normal(df_raw_normal)
df_normal_28.head(10)




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

run_id_normal = f70d0cc8442b46a999b7cff45555aa1c


AttributeError: type object 'ModelPredictor' has no attribute 'apply_delivery_rules'

In [9]:
fb_kaki = FeatureBuilder(office_id=1, mail_kind="kakitome")
X_k, y_k = fb_kaki.build()
trainer_kaki = ModelTrainer(experiment="posms_kakitome")
run_id_kaki = trainer_kaki.train(X_k, y_k, tags={"mail_kind": "kakitome"})
print("run_id_kaki =", run_id_kaki)

df_raw_kaki = forecast_28_raw("kakitome", run_id_kaki, date(2025,1,1), office_id=1)
df_kaki_28 = postprocess_daily_1piece(df_raw_kaki)
df_kaki_28.head(10)




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

run_id_kaki = 4e1cd13a118f427ca015dd709475a10c


AttributeError: 'datetime.date' object has no attribute 'date'

In [18]:
def simple_postprocess_piece(df_raw: pd.DataFrame) -> pd.DataFrame:
    """
    毎日配達・1通単位前提の簡単な後処理。
    （書留・レタパ・ゆうパケット・ゆうパック・EMSなどの確認用）
    """
    rows = []
    for dt, v in df_raw["raw_pred"].items():
        v_clip = max(0.0, float(v))
        delivered = int(np.round(v_clip))

        # dt が date でも datetime でも OK な変換
        date_obj = pd.to_datetime(dt).date()

        rows.append((date_obj, float(v), delivered))

    return pd.DataFrame(rows, columns=["date", "raw_pred", "deliver_pred"])


In [19]:
from datetime import date

# すでに run_id_kakitome があればそれを使う。なければここで学習
fb_kaki = FeatureBuilder(office_id=1, mail_kind="kakitome")
X_k, y_k = fb_kaki.build()
trainer_kaki = ModelTrainer(experiment="posms_kakitome")
run_id_kakitome = trainer_kaki.train(X_k, y_k, tags={"mail_kind": "kakitome"})
print("run_id_kakitome =", run_id_kakitome)

# 28日分の raw 予測
df_raw_kaki = forecast_28_raw("kakitome", run_id_kakitome, date(2025,1,1), office_id=1)

# まずは簡単な1通単位丸めで見てみる
df_kaki_28 = simple_postprocess_piece(df_raw_kaki)
df_kaki_28.head(10)




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

run_id_kakitome = c62aa608b09a41de947046fadb9f9558


Unnamed: 0,date,raw_pred,deliver_pred
0,2025-01-01,568.307922,568
1,2025-01-02,260.970856,261
2,2025-01-03,326.774384,327
3,2025-01-04,165.442978,165
4,2025-01-05,254.655075,255
5,2025-01-06,246.787918,247
6,2025-01-07,1146.353149,1146
7,2025-01-08,1309.540527,1310
8,2025-01-09,1534.743408,1535
9,2025-01-10,1261.5979,1262


In [20]:
# レターパックプラス
fb_lp = FeatureBuilder(office_id=1, mail_kind="letterpack_plus")
X_lp, y_lp = fb_lp.build()
trainer_lp = ModelTrainer(experiment="posms_letterpack_plus")
run_id_lp = trainer_lp.train(X_lp, y_lp, tags={"mail_kind": "letterpack_plus"})
print("run_id_lp =", run_id_lp)

df_raw_lp = forecast_28_raw("letterpack_plus", run_id_lp, date(2025,1,1), office_id=1)
df_lp_28 = simple_postprocess_piece(df_raw_lp)
df_lp_28.head(10)


2025/12/04 04:05:17 INFO mlflow.tracking.fluent: Experiment with name 'posms_letterpack_plus' does not exist. Creating a new experiment.


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

run_id_lp = 2f16fc340d714a7f8777e7ba741a5605


Unnamed: 0,date,raw_pred,deliver_pred
0,2025-01-01,64.664726,65
1,2025-01-02,131.997147,132
2,2025-01-03,120.951752,121
3,2025-01-04,21.775505,22
4,2025-01-05,27.042637,27
5,2025-01-06,69.335373,69
6,2025-01-07,144.504883,145
7,2025-01-08,147.890579,148
8,2025-01-09,199.797516,200
9,2025-01-10,248.560242,249


In [21]:
# raw_pred 同士を日付ごとに足す（欠けている日があれば fill_value=0 で埋める）
s_combined = df_raw_kaki["raw_pred"].add(df_raw_lp["raw_pred"], fill_value=0.0)

# DataFrame にしておく（あとで扱いやすくするため）
df_raw_combined = s_combined.to_frame(name="raw_pred")
df_raw_combined.head()


Unnamed: 0_level_0,raw_pred
date,Unnamed: 1_level_1
2025-01-01,632.972649
2025-01-02,392.968002
2025-01-03,447.726135
2025-01-04,187.218483
2025-01-05,281.697712


In [29]:
def forecast_range_raw(mail_kind: str, run_id: str, start: date, end: date, office_id: int = 1) -> pd.DataFrame:
    fb = FeatureBuilder(office_id=office_id, mail_kind=mail_kind)

    history = fb._load_mail().copy()
    results = []
    current = start

    while current <= end:
        pred = fb.predict(
            target_date=current,
            run_id=run_id,
            model_name=f"posms_{mail_kind}",
            stage=None,
        )
        results.append((current, float(pred)))

        new_row = {
            "date": pd.Timestamp(current),
            "office_id": office_id,
            "actual_volume": float(pred),
            "price_increase_flag": 0,
        }
        history = pd.concat([history, pd.DataFrame([new_row])], ignore_index=True)
        history = history.sort_values("date").reset_index(drop=True)

        fb._load_mail = (lambda hist=history: hist)
        current += timedelta(days=1)

    return pd.DataFrame(results, columns=["date", "raw_pred"]).set_index("date")


In [24]:
import jpholiday

def postprocess_normal(df_raw: pd.DataFrame) -> pd.DataFrame:
    # ModelPredictor の既存ロジックを使う
    return ModelPredictor.apply_delivery_rules(
        df_raw,
        value_col="raw_pred",
        round_to_thousand=True,
        extend_to_next_delivery=True,
    )


In [25]:
import numpy as np

def postprocess_daily_piece(df_raw: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for dt, v in df_raw["raw_pred"].items():
        v_clip = max(0.0, float(v))
        delivered = int(np.round(v_clip))   # 1通単位
        rows.append((pd.to_datetime(dt).date(), float(v), None, delivered, True))
    return pd.DataFrame(rows, columns=["date","raw_pred","carry_in","deliver_pred","is_delivery_day"])


In [26]:
def is_weekend_or_holiday(dt):
    d = dt.date() if hasattr(dt, "date") else dt
    return (d.weekday() >= 5) or jpholiday.is_holiday(d)

def postprocess_weekday_piece(df_raw: pd.DataFrame) -> pd.DataFrame:
    s = df_raw["raw_pred"].astype(float)
    s.index = pd.to_datetime(s.index)
    carry = 0.0

    rows = []
    for dt, v in s.items():
        d = dt.date()
        v_clip = max(0.0, float(v))
        if not is_weekend_or_holiday(dt):
            delivered = v_clip + carry
            d_int = int(np.round(delivered))     # 1通単位
            rows.append((d, float(v), carry if carry > 0 else None, d_int, True))
            carry = 0.0
        else:
            carry += v_clip
            rows.append((d, float(v), None, 0, False))

    if carry > 0:
        dt = s.index[-1] + pd.Timedelta(days=1)
        while is_weekend_or_holiday(dt):
            dt += pd.Timedelta(days=1)
        d_int = int(np.round(carry))
        rows.append((dt.date(), 0.0, carry, d_int, True))

    return pd.DataFrame(rows, columns=["date","raw_pred","carry_in","deliver_pred","is_delivery_day"])


In [27]:
def forecast_1year(mail_kind: str, run_id: str, year: int, office_id: int = 1):
    start = date(year, 1, 1)
    end   = date(year, 12, 31)

    df_raw = forecast_range_raw(mail_kind, run_id, start, end, office_id)

    # mail_kind のルールに応じて丸め関数を適用
    if mail_kind == "normal":
        return postprocess_normal(df_raw)
    
    elif mail_kind in ("kakitome", "letterpack_plus", "letterpack_light",
                       "yu_packet", "yu_pack", "ems"):
        return postprocess_daily_piece(df_raw)
    
    elif mail_kind in ("tokutei_kiroku", "e_packet"):
        return postprocess_weekday_piece(df_raw)

    else:
        raise ValueError(f"Unsupported mail_kind: {mail_kind}")


In [31]:
import jpholiday
import numpy as np
import pandas as pd

def is_weekday_and_not_holiday(dt) -> bool:
    d = dt.date() if hasattr(dt, "date") else dt
    return (d.weekday() < 5) and (not jpholiday.is_holiday(d))

def round_to_thousand_half_up(x: float) -> int:
    if x <= 0:
        return 0
    return int(np.round(x / 1000.0) * 1000)

def postprocess_normal(df_raw: pd.DataFrame) -> pd.DataFrame:
    """
    通常郵便用ポストプロセス:
    - 平日かつ祝日でない日だけ配達日
    - 土日祝の分は次の配達日に繰り越し
    - 千通単位で四捨五入
    """
    s = df_raw["raw_pred"].astype(float)
    s.index = pd.to_datetime(s.index)

    carry = 0.0
    rows = []

    for dt, v in s.items():
        d = dt.date()
        v_clip = float(v)

        if is_weekday_and_not_holiday(dt):
            delivered = v_clip + carry
            deliver_int = round_to_thousand_half_up(delivered)
            rows.append(
                (d, v_clip, carry if carry > 0 else None, deliver_int, True)
            )
            carry = 0.0
        else:
            carry += v_clip
            rows.append((d, v_clip, None, 0, False))

    # 期間末尾に carry が残っていたら、次の配達日に載せる（見る期間外だけど一応）
    if carry > 0:
        dt = s.index[-1] + pd.Timedelta(days=1)
        while not is_weekday_and_not_holiday(dt):
            dt += pd.Timedelta(days=1)
        d = dt.date()
        deliver_int = round_to_thousand_half_up(carry)
        rows.append((d, 0.0, carry, deliver_int, True))

    return pd.DataFrame(
        rows,
        columns=["date", "raw_pred", "carry_in", "deliver_pred", "is_delivery_day"],
    )


In [32]:
df_norm_2025 = forecast_1year("normal", run_id_normal, 2025)
df_norm_2025.head(), df_norm_2025.tail()


(         date      raw_pred      carry_in  deliver_pred  is_delivery_day
 0  2025-01-01  26478.578125           NaN             0            False
 1  2025-01-02  17067.269531  26478.578125         44000             True
 2  2025-01-03  25568.017578           NaN         26000             True
 3  2025-01-04  -2783.662354           NaN             0            False
 4  2025-01-05  -6531.549316           NaN             0            False,
            date      raw_pred  carry_in  deliver_pred  is_delivery_day
 360  2025-12-27   -528.509033       NaN             0            False
 361  2025-12-28     96.751396       NaN             0            False
 362  2025-12-29  85231.648438       NaN         85000             True
 363  2025-12-30  44169.488281       NaN         44000             True
 364  2025-12-31  47064.453125       NaN         47000             True)

In [33]:
pd.set_option("display.max_rows", None)   # 全行表示
pd.set_option("display.max_columns", None)


In [34]:
df_norm_2025 = forecast_1year("normal", run_id_normal, 2025)
pd.set_option("display.max_rows", None)
df_norm_2025


Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,26478.578125,,0,False
1,2025-01-02,17067.269531,26478.578125,44000,True
2,2025-01-03,25568.017578,,26000,True
3,2025-01-04,-2783.662354,,0,False
4,2025-01-05,-6531.549316,,0,False
5,2025-01-06,67666.9375,,58000,True
6,2025-01-07,27365.853516,,27000,True
7,2025-01-08,36214.035156,,36000,True
8,2025-01-09,43193.046875,,43000,True
9,2025-01-10,49881.230469,,50000,True


In [35]:
POSTPROCESS_MAP = {
    "normal": postprocess_normal,
    "kakitome": postprocess_daily_piece,
    "letterpack_plus": postprocess_daily_piece,
    "letterpack_light": postprocess_daily_piece,
    "yu_packet": postprocess_daily_piece,
    "yu_pack": postprocess_daily_piece,
    "ems": postprocess_daily_piece,
    "tokutei_kiroku": postprocess_weekday_piece,
    "e_packet": postprocess_weekday_piece,
}


In [36]:
def forecast_all_mail_kinds_1year(run_id_map: dict, year: int, office_id=1):
    results = {}
    for mail_kind, run_id in run_id_map.items():
        print(f"予測中: {mail_kind} ...")

        df_raw = forecast_range_raw(
            mail_kind=mail_kind,
            run_id=run_id,
            start=date(year,1,1),
            end=date(year,12,31),
            office_id=office_id,
        )

        post = POSTPROCESS_MAP[mail_kind]
        df_fixed = post(df_raw)

        results[mail_kind] = df_fixed
    
    return results


In [53]:
# レターパックライト（letterpack_light）の学習
fb_lpl = FeatureBuilder(office_id=1, mail_kind="letterpack_light")
X_lpl, y_lpl = fb_lpl.build()

trainer_lpl = ModelTrainer(experiment="posms_letterpack_light")
run_id_lplight = trainer_lpl.train(X_lpl, y_lpl, tags={"mail_kind": "letterpack_light"})

print("run_id_lplight =", run_id_lplight)




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

run_id_lplight = 8f895336456047939a884349c6acd867


In [48]:
fb_yu_packet = FeatureBuilder(office_id=1, mail_kind="yu_packet")
X_yu_packet, y_yu_packet = fb_yu_packet.build()

trainer_yu_packet = ModelTrainer(experiment="posms_yu_packet")
run_id_yu_packet = trainer_yu_packet.train(X_lpl, y_lpl, tags={"mail_kind": "yu_packet"})

print("yu_packet =", run_id_yu_packet)



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

yu_packet = c2e372d3ef214375bcf7403201db5063


In [49]:
fb_yu_pack = FeatureBuilder(office_id=1, mail_kind="yu_pack")
X_yu_pack, y_yu_pack = fb_yu_pack.build()

trainer_yu_pack = ModelTrainer(experiment="posms_yu_pack")
run_id_yu_pack = trainer_yu_pack.train(X_yu_pack, y_yu_pack, tags={"mail_kind": "yu_pack"})

print("yu_pack =", run_id_yu_pack)



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

yu_pack = 7030cd1fcc654f2e99d5e01fce22701d


In [50]:
fb_ems = FeatureBuilder(office_id=1, mail_kind="ems")
X_ems, y_ems = fb_ems.build()

trainer_ems = ModelTrainer(experiment="posms_ems")
run_id_ems = trainer_ems.train(X_ems, y_ems, tags={"mail_kind": "ems"})

print("ems =", run_id_ems)



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

ems = a78909e287d5407ca6321df27104197a


In [51]:
fb_tokutei_kiroku = FeatureBuilder(office_id=1, mail_kind="tokutei_kiroku")
X_tokutei_kiroku, y_tokutei_kiroku = fb_tokutei_kiroku.build()

trainer_tokutei_kiroku = ModelTrainer(experiment="posms_tokutei_kiroku")
run_id_tokutei_kiroku = trainer_tokutei_kiroku.train(X_tokutei_kiroku, y_tokutei_kiroku, tags={"mail_kind": "tokutei_kiroku"})

print("tokutei_kiroku =", run_id_tokutei_kiroku)



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

tokutei_kiroku = 73ceead5cb624db78d838b3fa226e381


In [52]:
fb_e_packet = FeatureBuilder(office_id=1, mail_kind="e_packet")
X_e_packet, y_e_packet = fb_e_packet.build()

trainer_e_packet = ModelTrainer(experiment="posms_e_packet")
run_id_e_packet = trainer_e_packet.train(X_e_packet, y_e_packet, tags={"mail_kind": "e_packet"})

print("e_packet =", run_id_e_packet)



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

e_packet = df00577bac2a4242a4f0232eb23be90f


In [56]:
run_ids = {
    "normal": run_id_normal,
    "kakitome": run_id_kakitome,
    "letterpack_plus": run_id_lp,
    "letterpack_light": run_id_lplight,
    "yu_packet": run_id_yu_packet,
    "yu_pack": run_id_yu_pack,
    "ems": run_id_ems,
    "tokutei_kiroku": run_id_tokutei_kiroku,
    "e_packet": run_id_e_packet,
}

all_2025 = forecast_all_mail_kinds_1year(run_ids, 2025)

予測中: normal ...
予測中: kakitome ...
予測中: letterpack_plus ...
予測中: letterpack_light ...
予測中: yu_packet ...
予測中: yu_pack ...
予測中: ems ...
予測中: tokutei_kiroku ...
予測中: e_packet ...


In [57]:
pd.set_option("display.max_rows", None)
all_2025["kakitome"]

Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,568.307922,,568,True
1,2025-01-02,260.970856,,261,True
2,2025-01-03,326.774384,,327,True
3,2025-01-04,165.442978,,165,True
4,2025-01-05,254.655075,,255,True
5,2025-01-06,246.787918,,247,True
6,2025-01-07,1146.353149,,1146,True
7,2025-01-08,1309.540527,,1310,True
8,2025-01-09,1534.743408,,1535,True
9,2025-01-10,1261.5979,,1262,True


In [58]:
pd.set_option("display.max_rows", None)
all_2025["yu_packet"]

Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,95.461128,,95,True
1,2025-01-02,161.062943,,161,True
2,2025-01-03,153.478394,,153,True
3,2025-01-04,-6.220548,,0,True
4,2025-01-05,5.119742,,5,True
5,2025-01-06,179.706284,,180,True
6,2025-01-07,146.129105,,146,True
7,2025-01-08,169.358078,,169,True
8,2025-01-09,205.133499,,205,True
9,2025-01-10,171.599564,,172,True


In [59]:
pd.set_option("display.max_rows", None)
all_2025["letterpack_light"]

Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,164.792282,,165,True
1,2025-01-02,289.393982,,289,True
2,2025-01-03,280.583618,,281,True
3,2025-01-04,67.066124,,67,True
4,2025-01-05,84.51149,,85,True
5,2025-01-06,143.089706,,143,True
6,2025-01-07,328.754486,,329,True
7,2025-01-08,359.589539,,360,True
8,2025-01-09,442.419128,,442,True
9,2025-01-10,518.772217,,519,True


In [60]:
pd.set_option("display.max_rows", None)
all_2025["letterpack_plus"]

Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,64.664726,,65,True
1,2025-01-02,131.997147,,132,True
2,2025-01-03,120.951752,,121,True
3,2025-01-04,21.775505,,22,True
4,2025-01-05,27.042637,,27,True
5,2025-01-06,69.335373,,69,True
6,2025-01-07,144.504883,,145,True
7,2025-01-08,147.890579,,148,True
8,2025-01-09,199.797516,,200,True
9,2025-01-10,248.560242,,249,True


In [61]:
pd.set_option("display.max_rows", None)
all_2025["tokutei_kiroku"]

Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,57.102745,,0,False
1,2025-01-02,107.540421,57.102745,165,True
2,2025-01-03,114.81012,,115,True
3,2025-01-04,8.346893,,0,False
4,2025-01-05,12.378715,,0,False
5,2025-01-06,248.028488,20.725608,269,True
6,2025-01-07,392.115021,,392,True
7,2025-01-08,319.325378,,319,True
8,2025-01-09,486.945374,,487,True
9,2025-01-10,558.212036,,558,True


In [62]:
pd.set_option("display.max_rows", None)
all_2025["e_packet"]

Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,103.979355,,0,False
1,2025-01-02,110.801353,103.979355,215,True
2,2025-01-03,170.386276,,170,True
3,2025-01-04,-1.238945,,0,False
4,2025-01-05,-4.17001,,0,False
5,2025-01-06,445.944305,,446,True
6,2025-01-07,515.682495,,516,True
7,2025-01-08,162.434433,,162,True
8,2025-01-09,191.789352,,192,True
9,2025-01-10,160.406494,,160,True


In [63]:
pd.set_option("display.max_rows", None)
all_2025["yu_pack"]

Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,938.823486,,939,True
1,2025-01-02,1156.022705,,1156,True
2,2025-01-03,972.759094,,973,True
3,2025-01-04,848.638062,,849,True
4,2025-01-05,839.551086,,840,True
5,2025-01-06,1119.959717,,1120,True
6,2025-01-07,1049.660522,,1050,True
7,2025-01-08,1261.243774,,1261,True
8,2025-01-09,1275.302246,,1275,True
9,2025-01-10,1937.364746,,1937,True


In [64]:
pd.set_option("display.max_rows", None)
all_2025["ems"]

Unnamed: 0,date,raw_pred,carry_in,deliver_pred,is_delivery_day
0,2025-01-01,79.05899,,79,True
1,2025-01-02,59.225056,,59,True
2,2025-01-03,78.427444,,78,True
3,2025-01-04,45.949993,,46,True
4,2025-01-05,76.833641,,77,True
5,2025-01-06,111.808868,,112,True
6,2025-01-07,97.431252,,97,True
7,2025-01-08,115.311867,,115,True
8,2025-01-09,120.562943,,121,True
9,2025-01-10,166.034271,,166,True


In [65]:
from posms.features.builder import FeatureBuilder

# 書留の履歴
fb_kaki = FeatureBuilder(office_id=1, mail_kind="kakitome")
hist_kaki = fb_kaki._load_mail().copy()   # date, office_id, actual_volume, price_increase_flag

# レターパックプラスの履歴
fb_lp = FeatureBuilder(office_id=1, mail_kind="letterpack_plus")
hist_lp = fb_lp._load_mail().copy()


In [66]:
import pandas as pd

df_kaki = hist_kaki[["date", "office_id", "actual_volume"]].rename(
    columns={"actual_volume": "kaki"}
)
df_lp = hist_lp[["date", "office_id", "actual_volume"]].rename(
    columns={"actual_volume": "lp"}
)

df_merge = (
    pd.merge(df_kaki, df_lp[["date", "lp"]], on="date", how="outer")
    .sort_values("date")
    .reset_index(drop=True)
)

# 欠損は 0 扱いで合計
df_merge["kaki"] = df_merge["kaki"].fillna(0.0)
df_merge["lp"]   = df_merge["lp"].fillna(0.0)

df_merge["actual_volume"] = df_merge["kaki"] + df_merge["lp"]

# office_id が欠けているところは 1 で埋める（前提：同じ局）
df_merge["office_id"] = df_merge["office_id"].fillna(df_merge["office_id"].mode()[0])

# price_increase_flag は当面 0 にしておく
df_total = df_merge[["date", "office_id", "actual_volume"]].copy()
df_total["price_increase_flag"] = 0

df_total.head()


Unnamed: 0,date,office_id,actual_volume,price_increase_flag
0,2021-10-01,1,2047,0
1,2021-10-02,1,2095,0
2,2021-10-03,1,1119,0
3,2021-10-04,1,698,0
4,2021-10-05,1,911,0


In [67]:
from posms.models.trainer import ModelTrainer

# 合成用 FeatureBuilder（mail_kind は仮に "kaki_lp_total" というタグ用）
fb_total = FeatureBuilder(office_id=1, mail_kind="kaki_lp_total")

# _load_mail を差し替え → 以降の build() / predict() は df_total を履歴として使う
df_total_sorted = df_total.sort_values("date").reset_index(drop=True)
fb_total._load_mail = lambda df=df_total_sorted: df

# 特徴量を作って学習
X_tot, y_tot = fb_total.build()

trainer_tot = ModelTrainer(experiment="posms_kaki_lp_total")
run_id_kaki_lp_total = trainer_tot.train(X_tot, y_tot, tags={"mail_kind": "kaki_lp_total"})

print("run_id_kaki_lp_total =", run_id_kaki_lp_total)


2025/12/06 23:24:25 INFO mlflow.tracking.fluent: Experiment with name 'posms_kaki_lp_total' does not exist. Creating a new experiment.


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

run_id_kaki_lp_total = 3b2ba46977f8433abf2d9f5a4a014244


In [68]:
from datetime import date, timedelta

def forecast_range_raw_total(
    fb: FeatureBuilder,
    df_history: pd.DataFrame,
    run_id: str,
    start: date,
    end: date,
) -> pd.DataFrame:
    """
    合成系列用：FeatureBuilder の _load_mail が df_history を返す前提で、
    start〜end を再帰予測する。
    """
    # 履歴をコピーして、予測を足しながら進める
    history = df_history.copy()
    results = []
    current = start

    while current <= end:
        # 予測
        pred = fb.predict(
            target_date=current,
            run_id=run_id,
            model_name="posms_kaki_lp_total",
            stage=None,
        )
        results.append((current, float(pred)))

        # 履歴に追加
        new_row = {
            "date": pd.Timestamp(current),
            "office_id": history["office_id"].iloc[0],
            "actual_volume": float(pred),
            "price_increase_flag": 0,
        }
        history = pd.concat([history, pd.DataFrame([new_row])], ignore_index=True)
        history = history.sort_values("date").reset_index(drop=True)

        # 次ループ用に _load_mail を更新
        fb._load_mail = lambda hist=history: hist

        current += timedelta(days=1)

    return pd.DataFrame(results, columns=["date", "raw_pred"]).set_index("date")


In [70]:
START = date(2025, 1, 1)
END   = date(2025, 12, 31)

df_raw_total_2025 = forecast_range_raw_total(
    fb=fb_total,
    df_history=df_total_sorted,
    run_id=run_id_kaki_lp_total,
    start=START,
    end=END,
)

# 1通単位に丸める（毎日配達・繰越なし）
import numpy as np

rows = []
for dt, v in df_raw_total_2025["raw_pred"].items():
    v_clip = max(0.0, float(v))
    delivered = int(np.round(v_clip))
    rows.append((pd.to_datetime(dt).date(), float(v), delivered))

df_total_2025 = pd.DataFrame(rows, columns=["date","raw_pred","deliver_pred"])
pd.set_option("display.max_rows", None)
df_total_2025

Unnamed: 0,date,raw_pred,deliver_pred
0,2025-01-01,345.565704,346
1,2025-01-02,450.180969,450
2,2025-01-03,490.75061,491
3,2025-01-04,515.254028,515
4,2025-01-05,275.673553,276
5,2025-01-06,345.677155,346
6,2025-01-07,1005.627502,1006
7,2025-01-08,1293.189819,1293
8,2025-01-09,1535.774902,1536
9,2025-01-10,1623.808594,1624
