In [1]:
# --- ライブラリ読み込み ---
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.preprocessing import LabelEncoder

# --- データ読み込み ---
train = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/train.csv", parse_dates=["date"])
test = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/test.csv", parse_dates=["date"])
holiday = pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/holidays_events.csv", parse_dates=["date"])

# --- holidays処理 ---
holiday = holiday[(holiday["transferred"] == False) & (holiday["locale"] == "National")]
holiday_daily = holiday.groupby("date").first().reset_index()
holiday_daily["is_holiday"] = 1

# --- 日付特徴量＋祝日フラグ追加 ---
for df in [train, test]:
    df["dayofweek"] = df["date"].dt.dayofweek
    df["month"] = df["date"].dt.month
    df["year"] = df["date"].dt.year
    df["is_holiday"] = df["date"].isin(holiday_daily["date"]).astype(int)

# --- ラベルエンコーディング ---
le = LabelEncoder()
train["family"] = le.fit_transform(train["family"])
test["family"] = le.transform(test["family"])

# --- ラグ特徴量追加（trainのみ） ---
df_lag = train.sort_values(by=["store_nbr", "family", "date"]).copy()
df_lag["lag_7"] = df_lag.groupby(["store_nbr", "family"])["sales"].shift(7)
df_lag["lag_14"] = df_lag.groupby(["store_nbr", "family"])["sales"].shift(14)
df_lag["rolling_mean_7"] = df_lag.groupby(["store_nbr", "family"])["sales"].shift(1).rolling(7).mean()
df_lag["rolling_std_7"] = df_lag.groupby(["store_nbr", "family"])["sales"].shift(1).rolling(7).std()

# 欠損除去
df_lag = df_lag.dropna()

# --- 特徴量と目的変数の指定 ---
features = ['store_nbr', 'family', 'onpromotion', 'dayofweek', 'month', 'year',
            'lag_7', 'lag_14', 'rolling_mean_7', 'rolling_std_7', 'is_holiday']
target = 'sales'

X_train = df_lag[features]
y_train = df_lag[target]

# --- モデル訓練（LightGBM） ---
train_set = lgb.Dataset(X_train, label=y_train)
params = {
    'objective': 'regression',
    'metric': 'rmse',
    'verbosity': -1
}
model = lgb.train(params, train_set, num_boost_round=100)

# --- テストデータにラグ特徴量・祝日フラグを追加 ---
full_data = pd.concat([train, test], sort=False).sort_values(by=["store_nbr", "family", "date"])
full_data["sales"] = full_data["sales"].fillna(0)

full_data["lag_7"] = full_data.groupby(["store_nbr", "family"])["sales"].shift(7)
full_data["lag_14"] = full_data.groupby(["store_nbr", "family"])["sales"].shift(14)
full_data["rolling_mean_7"] = full_data.groupby(["store_nbr", "family"])["sales"].shift(1).rolling(7).mean()
full_data["rolling_std_7"] = full_data.groupby(["store_nbr", "family"])["sales"].shift(1).rolling(7).std()
full_data["is_holiday"] = full_data["date"].isin(holiday_daily["date"]).astype(int)

test_lag = full_data[full_data["date"].isin(test["date"])].copy()
test_lag[["lag_7", "lag_14", "rolling_mean_7", "rolling_std_7"]] = (
    test_lag[["lag_7", "lag_14", "rolling_mean_7", "rolling_std_7"]].bfill()
)

X_test = test_lag[features]
test_lag["sales"] = model.predict(X_test)

# --- 提出ファイル作成 ---
submission = test_lag[["id", "sales"]]
submission.to_csv("submission.csv", index=False)
