In [1]:
# === セル1: ライブラリ読み込み ===
import pandas as pd
import numpy as np

from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

import matplotlib.pyplot as plt


In [31]:
# === セル2: データ読み込み ===
DATA_PATH = "/Users/hiraokatatsuru/Library/Mobile Documents/com~apple~CloudDocs/postal-operation-shift-management-system/db/init/csv/postal_datas.csv"

df = pd.read_csv(DATA_PATH, parse_dates=["日付"])

# 日付でソート
df = df.sort_values("日付").reset_index(drop=True)

print(df.head())
print(df.columns)
print("行数:", len(df))


          日付      通常郵便      書留  ゆうパケット  レターパックライト  レターパックプラス   特定記録   ゆうパック  \
0 2021-10-01   63000.0  1783.0   882.0      615.0      264.0  588.0  1126.0   
1 2021-10-02       0.0  1951.0   673.0      337.0      144.0    0.0  1534.0   
2 2021-10-03       0.0  1054.0   898.0      151.0       65.0    0.0  1177.0   
3 2021-10-04  102000.0   540.0   688.0      369.0      158.0  557.0  1595.0   
4 2021-10-05   45000.0   721.0   877.0      443.0      190.0  451.0  1083.0   

   eパケット    EMS  年賀組立  年賀配達  
0  200.0  150.0   0.0   0.0  
1    0.0  158.0   0.0   0.0  
2    0.0  142.0   0.0   0.0  
3  400.0  274.0   0.0   0.0  
4  150.0  179.0   0.0   0.0  
Index(['日付', '通常郵便', '書留', 'ゆうパケット', 'レターパックライト', 'レターパックプラス', '特定記録', 'ゆうパック',
       'eパケット', 'EMS', '年賀組立', '年賀配達'],
      dtype='object')
行数: 1211


In [15]:
TARGET_NENGA_PREP = "年賀組立"
TARGET_NENGA_DELIVERY = "年賀配達"

In [37]:
import pandas as pd
import numpy as np

# 前提: df は以下のカラムを持つ DataFrame
# ['日付', '通常郵便', '書留', 'ゆうパケット', 'レターパックライト',
#  'レターパックプラス', '特定記録', 'ゆうパック', 'eパケット', 'EMS',
#  '年賀組立', '年賀配達']

# ========== 0. 前処理（日付の型とソート） ==========
df = df.copy()
df["date"] = pd.to_datetime(df["日付"])
df = df.sort_values("date").reset_index(drop=True)

# ========== 1. カレンダー系特徴量 ==========
df["year"]      = df["date"].dt.year
df["month"]     = df["date"].dt.month
df["day"]       = df["date"].dt.day
df["weekday"]   = df["date"].dt.weekday      # 0=月, 6=日
df["dayofyear"] = df["date"].dt.dayofyear

# とりあえず「土日=1、それ以外=0」の簡易祝日フラグ（あとで jpholiday に差し替え可）
df["is_weekend"] = df["weekday"].isin([5, 6]).astype(int)
df["holiday"]    = df["is_weekend"]

# ========== 2. 年賀フラグ（期間フラグ） ==========
# 年賀組立: 12/26〜1/15 を 1
def is_nenga_prep(d: pd.Timestamp) -> int:
    if (d.month == 12 and d.day >= 26) or (d.month == 1 and d.day <= 15):
        return 1
    return 0

# 年賀配達: 1/1〜1/15 を 1
def is_nenga_delivery(d: pd.Timestamp) -> int:
    if d.month == 1 and d.day <= 15:
        return 1
    return 0

df["is_nenga_prep"]     = df["date"].map(is_nenga_prep)
df["is_nenga_delivery"] = df["date"].map(is_nenga_delivery)

# ========== 3. 年賀用オフセット（日付位置特徴量） ==========
# ▼ 組立用: 12/26→0, 12/27→1, ..., 12/31→5, 1/1→6, ..., 1/15→20, それ以外→-1
def nenga_prep_offset(d: pd.Timestamp) -> int:
    if d.month == 12 and d.day >= 26:
        return d.day - 26
    if d.month == 1 and d.day <= 15:
        return (31 - 26 + 1) + d.day  # 6 + day
    return -1

df["nenga_prep_offset"] = df["date"].map(nenga_prep_offset)

# ▼ 配達用: 1/1→0, 1/2→1, ..., 1/15→14, 期間外→-1
def nenga_delivery_offset(d: pd.Timestamp) -> int:
    if d.month == 1 and d.day <= 15:
        return d.day - 1
    return -1

df["nenga_delivery_offset"] = df["date"].map(nenga_delivery_offset)

# ========== 4. 年賀組立・年賀配達のラグ・移動平均 ==========
targets = ["年賀組立", "年賀配達"]

for col in targets:
    # ラグ
    df[f"{col}_lag_1"]   = df[col].shift(1)
    df[f"{col}_lag_7"]   = df[col].shift(7)
    df[f"{col}_lag_365"] = df[col].shift(365)  # 2021〜25 が連続なら有効

    # 移動平均（急激な変動を少しならす）
    df[f"{col}_rm3"] = df[col].rolling(3, min_periods=1).mean()
    df[f"{col}_rm7"] = df[col].rolling(7, min_periods=1).mean()

    # 累積和（参考用）
    df[f"{col}_cumsum"] = df[col].cumsum()

# ========== 5. モデルで使う特徴量リスト ==========
# 年賀組立: 26日ピーク→その後減少
FEATURES_NENGA_PREP = [
    "weekday",
    "holiday",
    "year",
    "is_nenga_prep",
    "nenga_prep_offset",   # ← 山の形
    "年賀組立_lag_1",
    "年賀組立_lag_7",
    "年賀組立_lag_365",
    "年賀組立_rm3",
    "年賀組立_rm7",
]

# 年賀配達: 元日が頂点、その後 15日まで階段状に減少
FEATURES_NENGA_DELIVERY = [
    "weekday",
    "holiday",
    "year",
    "is_nenga_delivery",
    "nenga_delivery_offset",   # ← ここが肝
    "年賀配達_lag_365",       # 前年元日など
    "年賀配達_rm3",
    "年賀配達_rm7",
]

print("年賀組立 用特徴量:", FEATURES_NENGA_PREP)
print("年賀配達 用特徴量:", FEATURES_NENGA_DELIVERY)


年賀組立 用特徴量: ['weekday', 'holiday', 'year', 'is_nenga_prep', 'nenga_prep_offset', '年賀組立_lag_1', '年賀組立_lag_7', '年賀組立_lag_365', '年賀組立_rm3', '年賀組立_rm7']
年賀配達 用特徴量: ['weekday', 'holiday', 'year', 'is_nenga_delivery', 'nenga_delivery_offset', '年賀配達_lag_365', '年賀配達_rm3', '年賀配達_rm7']


In [33]:
X_prep = df[FEATURES_NENGA_PREP]
y_prep = df["年賀組立"]

X_deliv = df[FEATURES_NENGA_DELIVERY]
y_deliv = df["年賀配達"]


In [38]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import xgboost as xgb

# ================================
# 年賀組立モデル
# ================================

TARGET_NENGA_PREP = "年賀組立"

# 特徴量 + 目的変数をまとめて取り出して、
# NaN を含む行をまとめて落とす
cols_prep = FEATURES_NENGA_PREP + [TARGET_NENGA_PREP]
df_prep = df[cols_prep].dropna().copy()

X_prep = df_prep[FEATURES_NENGA_PREP]
y_prep = df_prep[TARGET_NENGA_PREP]

X_train_prep, X_valid_prep, y_train_prep, y_valid_prep = train_test_split(
    X_prep, y_prep, test_size=0.2, shuffle=False
)

model_prep = xgb.XGBRegressor(
    n_estimators=400,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
)

model_prep.fit(X_train_prep, y_train_prep)

pred_prep = model_prep.predict(X_valid_prep)
mae_prep = mean_absolute_error(y_valid_prep, pred_prep)

print("年賀組立 MAE:", mae_prep)


# ================================
# 年賀配達モデル
# ================================

TARGET_NENGA_DELIVERY = "年賀配達"

cols_deliv = FEATURES_NENGA_DELIVERY + [TARGET_NENGA_DELIVERY]
df_deliv = df[cols_deliv].dropna().copy()

X_deliv = df_deliv[FEATURES_NENGA_DELIVERY]
y_deliv = df_deliv[TARGET_NENGA_DELIVERY]

X_train_deliv, X_valid_deliv, y_train_deliv, y_valid_deliv = train_test_split(
    X_deliv, y_deliv, test_size=0.2, shuffle=False
)

model_deliv = xgb.XGBRegressor(
    n_estimators=400,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
)

model_deliv.fit(X_train_deliv, y_train_deliv)

pred_deliv = model_deliv.predict(X_valid_deliv)
mae_deliv = mean_absolute_error(y_valid_deliv, pred_deliv)

print("年賀配達 MAE:", mae_deliv)


年賀組立 MAE: 2638.10525155583
年賀配達 MAE: 4298.307237217092


In [39]:
# --- 年賀配達: log1p + weight ---
TARGET_NENGA_DELIVERY = "年賀配達"

df_deliv = df[FEATURES_NENGA_DELIVERY + [TARGET_NENGA_DELIVERY]].dropna().copy()

X_deliv = df_deliv[FEATURES_NENGA_DELIVERY]
y_deliv = np.log1p(df_deliv[TARGET_NENGA_DELIVERY])

X_train_deliv, X_valid_deliv, y_train_deliv, y_valid_deliv = train_test_split(
    X_deliv, y_deliv, test_size=0.2, shuffle=False
)

# 元日だけ重み10、その他1
sample_weight = np.where(
    X_train_deliv["nenga_delivery_offset"] == 0,
    10,
    1
)

model_deliv = xgb.XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
)

model_deliv.fit(X_train_deliv, y_train_deliv, sample_weight=sample_weight)

pred_log = model_deliv.predict(X_valid_deliv)
pred_deliv = np.expm1(pred_log)  # log1p の逆変換

mae_log = mean_absolute_error(df_deliv[TARGET_NENGA_DELIVERY].iloc[len(X_train_deliv):], pred_deliv)
print("年賀配達 MAE (log1p改善):", mae_log)


年賀配達 MAE (log1p改善): 3201.321900574735


In [40]:
import numpy as np
import pandas as pd

df = df.copy()
df["date"] = pd.to_datetime(df["date"] if "date" in df.columns else df["日付"])

# 元日 / 1/2〜1/15 フラグ
df["is_newyear_day"] = ((df["date"].dt.month == 1) & (df["date"].dt.day == 1)).astype(int)
df["is_after_newyear"] = ((df["date"].dt.month == 1) &
                          (df["date"].dt.day >= 2) &
                          (df["date"].dt.day <= 15)).astype(int)

# 1/2〜1/15 用のオフセット（1/2→0, 1/3→1, ... 1/15→13）
def after_newyear_offset(d: pd.Timestamp) -> int:
    if d.month == 1 and 2 <= d.day <= 15:
        return d.day - 2  # 1/2→0
    return -1

df["after_newyear_offset"] = df["date"].map(after_newyear_offset)


In [41]:
# 元日専用モデル用特徴量
FEATURES_DELIVERY_NY = [
    "year",
    "weekday",
    "holiday",
    "年賀配達_lag_365",   # 前年元日の値（あれば）
]

# 1/2〜1/15 専用モデル用特徴量
FEATURES_DELIVERY_AFTER = [
    "year",
    "weekday",
    "holiday",
    "after_newyear_offset",  # 2日以降の位置（0〜13）
    "年賀配達_lag_365",
    "年賀配達_rm3",
    "年賀配達_rm7",
]


In [42]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import xgboost as xgb

TARGET = "年賀配達"

# 元日の行だけ
df_ny = df[df["is_newyear_day"] == 1][FEATURES_DELIVERY_NY + [TARGET]].dropna().copy()

X_ny = df_ny[FEATURES_DELIVERY_NY]
y_ny = df_ny[TARGET]

# データが少ないので test_size は少なめ（0.2〜0.25）で
X_train_ny, X_valid_ny, y_train_ny, y_valid_ny = train_test_split(
    X_ny, y_ny, test_size=0.2, shuffle=False
)

model_ny = xgb.XGBRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=3,          # データ少ないので浅め
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
)

model_ny.fit(X_train_ny, y_train_ny)

pred_ny = model_ny.predict(X_valid_ny)
mae_ny = mean_absolute_error(y_valid_ny, pred_ny)
print("元日専用モデル MAE:", mae_ny)


元日専用モデル MAE: 163734.5


In [43]:
# 1/2〜1/15 の行だけ
df_after = df[df["is_after_newyear"] == 1][FEATURES_DELIVERY_AFTER + [TARGET]].dropna().copy()

X_after = df_after[FEATURES_DELIVERY_AFTER]
y_after = np.log1p(df_after[TARGET])  # ← log1p でスケール圧縮

X_train_after, X_valid_after, y_train_after, y_valid_after = train_test_split(
    X_after, y_after, test_size=0.2, shuffle=False
)

model_after = xgb.XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
)

model_after.fit(X_train_after, y_train_after)

pred_after_log = model_after.predict(X_valid_after)
pred_after = np.expm1(pred_after_log)  # 逆変換

mae_after = mean_absolute_error(np.expm1(y_valid_after), pred_after)
print("1/2〜1/15 モデル MAE:", mae_after)


1/2〜1/15 モデル MAE: 2396.730083398518


In [44]:
# 評価対象: 年賀配達期間（1/1〜1/15）のみ
mask_eval = (df["date"].dt.month == 1) & (df["date"].dt.day <= 15)
df_eval = df[mask_eval].copy()

# ===== 1) 元日の予測 =====
idx_ny_eval = df_eval["is_newyear_day"] == 1
df_eval_ny = df_eval[idx_ny_eval].dropna(subset=FEATURES_DELIVERY_NY)

pred_eval_ny = model_ny.predict(df_eval_ny[FEATURES_DELIVERY_NY])
s_pred_ny = pd.Series(pred_eval_ny, index=df_eval_ny.index)

# ===== 2) 1/2〜1/15 の予測 =====
idx_after_eval = df_eval["is_after_newyear"] == 1
df_eval_after = df_eval[idx_after_eval].dropna(subset=FEATURES_DELIVERY_AFTER)

pred_eval_after_log = model_after.predict(df_eval_after[FEATURES_DELIVERY_AFTER])
pred_eval_after = np.expm1(pred_eval_after_log)
s_pred_after = pd.Series(pred_eval_after, index=df_eval_after.index)

# ===== 3) 結合してトータル MAE =====
pred_all = pd.concat([s_pred_ny, s_pred_after]).sort_index()
y_true_all = df.loc[pred_all.index, TARGET]

mae_total = mean_absolute_error(y_true_all, pred_all)
print("【元日 + 1/2〜1/15 合算】年賀配達 MAE:", mae_total)


【元日 + 1/2〜1/15 合算】年賀配達 MAE: 4126.721240260639


In [45]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# 元日データだけ抽出
df_ny = df[df["is_newyear_day"] == 1].copy()

# lag_365（前年元日）が欠損する最初の年は除外
df_ny = df_ny.dropna(subset=["年賀配達_lag_365"])

X_ny = df_ny[["year", "年賀配達_lag_365"]]
y_ny = df_ny["年賀配達"]

# 線形回帰
model_ny = LinearRegression()
model_ny.fit(X_ny, y_ny)

# クロスバリデーションではなく、学習データ上で確認（元日データが極端に少ないため）
pred_ny = model_ny.predict(X_ny)
mae_ny = mean_absolute_error(y_ny, pred_ny)

print("【線形回帰】元日モデル MAE:", mae_ny)
print("回帰式: 今年の元日は ≈ a × 昨年の元日 + b × year + c")


【線形回帰】元日モデル MAE: 0.0
回帰式: 今年の元日は ≈ a × 昨年の元日 + b × year + c


In [48]:
from sklearn.linear_model import LinearRegression
import xgboost as xgb
import numpy as np
import pandas as pd

TARGET = "年賀配達"

def train_nenga_delivery_models(df: pd.DataFrame):
    df = df.copy()

    # ---------- 元日モデル（線形回帰） ----------
    mask_ny = df["is_newyear_day"] == 1
    df_ny = df[mask_ny].dropna(subset=["年賀配達_lag_365", TARGET])

    X_ny = df_ny[["year", "年賀配達_lag_365"]]
    y_ny = df_ny[TARGET]

    model_ny = LinearRegression()
    model_ny.fit(X_ny, y_ny)

    # ---------- 1/2〜1/15 モデル（XGBoost + log1p） ----------
    mask_after = df["is_after_newyear"] == 1
    feature_after = [
        "year", "weekday", "holiday",
        "after_newyear_offset",
        "年賀配達_lag_365",
        "年賀配達_rm3",
        "年賀配達_rm7",
    ]

    df_after = df[mask_after].dropna(subset=feature_after + [TARGET])

    X_after = df_after[feature_after]
    y_after = np.log1p(df_after[TARGET])

    model_after = xgb.XGBRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.9,
        colsample_bytree=0.9,
        random_state=42,
    )
    model_after.fit(X_after, y_after)

    return model_ny, model_after


In [49]:
from sklearn.metrics import mean_absolute_error

def predict_nenga_delivery(df: pd.DataFrame,
                           model_ny,
                           model_after):
    df = df.copy()
    preds = pd.Series(index=df.index, dtype=float)

    # ---------- 元日 ----------
    feature_ny = ["year", "年賀配達_lag_365"]
    mask_ny = (df["is_newyear_day"] == 1) & df["年賀配達_lag_365"].notna()

    if mask_ny.any():
        X_ny = df.loc[mask_ny, feature_ny]
        preds.loc[mask_ny] = model_ny.predict(X_ny)

    # ---------- 1/2〜1/15 ----------
    feature_after = [
        "year", "weekday", "holiday",
        "after_newyear_offset",
        "年賀配達_lag_365",
        "年賀配達_rm3",
        "年賀配達_rm7",
    ]
    mask_after = (
        (df["is_after_newyear"] == 1)
        & df["after_newyear_offset"].notna()
        & df["年賀配達_lag_365"].notna()
    )

    if mask_after.any():
        X_after = df.loc[mask_after, feature_after]
        pred_log = model_after.predict(X_after)
        preds.loc[mask_after] = np.expm1(pred_log)

    return preds


In [50]:
model_ny, model_after = train_nenga_delivery_models(df)

pred_all = predict_nenga_delivery(df, model_ny, model_after)

# 評価対象: 1/1〜1/15 かつ 予測値が存在する行のみ
mask_eval = (
    (df["date"].dt.month == 1)
    & (df["date"].dt.day <= 15)
    & pred_all.notna()
)

mae_total = mean_absolute_error(df.loc[mask_eval, TARGET], pred_all.loc[mask_eval])
print("最終総合モデル 年賀配達 MAE:", mae_total)


最終総合モデル 年賀配達 MAE: 12.774984583270008
