In [1]:
import pandas as pd
from ydata_profiling import ProfileReport
import lightgbm as lgb
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import numpy as np
import seaborn as sns
import xgboost as xgb


In [9]:
test_df = pd.read_csv("date/test.csv")
train_df = pd.read_csv("date/train.csv")
weather = pd.read_csv("date/weather.csv")
sample = pd.read_csv("date/sample_submission.csv")

In [3]:
train_df = train_df.drop(columns=['amount'])

In [4]:
def Data_cre(df):
    # --- area を分解して複数行に展開 ---
    df["area_list"] = df["area"].str.split("_")

    # explodeで行を増やす
    df = df.explode("area_list")
    df = df.drop(columns=["area"])
    df = df.rename(columns={"area_list": "area"})

    # --- 日付フォーマット揃え ---
    df["date"] = df["date"].astype(str)
    df["date"] = pd.to_datetime(df["date"])

    # --- weather と結合 ---
    df = pd.merge(df, weather, on=["date", "area"], how="left")

    # --- 年月日列を追加 ---
    df["year"] = df["date"].dt.year
    df["month"] = df["date"].dt.month
    df["day"] = df["date"].dt.day

    # --- max_temp_time / min_temp_time 分解 ---
    df["max_temp_time"] = pd.to_datetime(df["max_temp_time"])
    df["maxtemp_year"] = df["max_temp_time"].dt.year
    df["maxtemp_month"] = df["max_temp_time"].dt.month
    df["maxtemp_day"] = df["max_temp_time"].dt.day
    df["maxtemp_hour"] = df["max_temp_time"].dt.hour

    df["min_temp_time"] = pd.to_datetime(df["min_temp_time"])
    df["mintemp_year"] = df["min_temp_time"].dt.year
    df["mintemp_month"] = df["min_temp_time"].dt.month
    df["mintemp_day"] = df["min_temp_time"].dt.day
    df["mintemp_hour"]= df["min_temp_time"].dt.hour

    # --- 不要列削除 ---
    df = df.drop(columns=["date", "max_temp_time", "min_temp_time"])

    # --- category 型に変換 ---
    df["kind"] = df["kind"].astype("category")
    df["area"] = df["area"].astype("category")

    return df
weather["date"] = pd.to_datetime(weather["date"])

train_df = Data_cre(train_df)

test_df = Data_cre(test_df) 

In [5]:
### chatgpt
from sklearn.model_selection import KFold
import numpy as np
import lightgbm as lgb

models = {}  # kind → model list or averaged は辞書に保存

K = 5  # ← Fold数

for kd, group in train_df.groupby("kind", observed=False):

    X = group.drop(columns=["mode_price"],axis=1)
    y = group["mode_price"]

    kf = KFold(n_splits=K, shuffle=True, random_state=42)

    fold_models = []  # foldごとのモデルを保存

    for fold, (train_index, valid_index) in enumerate(kf.split(X)):

        X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
        y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]

        train_data = lgb.Dataset(X_train, y_train)
        valid_data = lgb.Dataset(X_valid, y_valid)

        model = xgb.XGBRegressor(
            n_estimators=10,
            learning_rate=0.05,
            max_depth=6,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=fold,
            early_stopping_rounds = 5,
            enable_categorical=True,
            nthread=2,             
            tree_method='approx',  # 最も安定
        )

        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            verbose=True
        )
        
        fold_models.append(model)

    models[kd] = fold_models  # kindごとにfoldモデルを保存


[0]	validation_0-rmse:576.39367
[1]	validation_0-rmse:569.91808
[2]	validation_0-rmse:562.85083
[3]	validation_0-rmse:557.49964
[4]	validation_0-rmse:554.90650
[5]	validation_0-rmse:549.79480
[6]	validation_0-rmse:544.55263
[7]	validation_0-rmse:541.85141
[8]	validation_0-rmse:539.89978
[9]	validation_0-rmse:535.14883
[0]	validation_0-rmse:552.86300
[1]	validation_0-rmse:549.45872
[2]	validation_0-rmse:542.83827
[3]	validation_0-rmse:539.23593
[4]	validation_0-rmse:535.65281
[5]	validation_0-rmse:530.06174
[6]	validation_0-rmse:525.63117
[7]	validation_0-rmse:518.42429
[8]	validation_0-rmse:518.10839
[9]	validation_0-rmse:514.22819
[0]	validation_0-rmse:535.78260
[1]	validation_0-rmse:530.29401
[2]	validation_0-rmse:523.30692
[3]	validation_0-rmse:520.27581
[4]	validation_0-rmse:517.24136
[5]	validation_0-rmse:514.31663
[6]	validation_0-rmse:508.14226
[7]	validation_0-rmse:506.79828
[8]	validation_0-rmse:504.64936
[9]	validation_0-rmse:498.11357
[0]	validation_0-rmse:540.40668
[1]	vali

In [6]:
import numpy as np

pred_list = []

for kd, group in test_df.groupby("kind"):

    fold_models = models[kd]  # ← foldモデルが複数入っている
    X_test = group.copy()

    # 全 fold モデルの予測を行い平均
    fold_pred = np.mean([m.predict(X_test) for m in fold_models], axis=0)

    X_test["mode_price"] = fold_pred
    pred_list.append(X_test)

# まとめ・並び順合わせ
result = pd.concat(pred_list).sort_index()

# 日付整形（year,month,day → YYYYMMDD）
result["date"] = (
    result["year"].astype(str)
    + result["month"].astype(str).str.zfill(2)
    + result["day"].astype(str).str.zfill(2)
)
result["date"] = result["date"].astype(int)

# 提出形式に整形
submit = result[["kind", "date", "mode_price"]]

submit

  for kd, group in test_df.groupby("kind"):


Unnamed: 0,kind,date,mode_price
0,だいこん,20220502,979.188171
1,だいこん,20220502,979.188171
2,だいこん,20220502,979.188171
3,だいこん,20220506,978.912231
4,だいこん,20220506,978.912231
...,...,...,...
815,ミニトマト,20220528,130.996155
816,ミニトマト,20220530,131.394302
817,ミニトマト,20220530,131.394302
818,ミニトマト,20220531,131.822922


In [7]:
submit_mean = (
    submit.groupby(["kind", "date"], as_index=False)["mode_price"]
          .mean()
)
# ① area 側の kind の出現順を取得
kind_order = test_df["kind"].drop_duplicates().tolist()

# ② price 側の kind を Categorical に変換して順序を固定
submit_mean["kind"] = pd.Categorical(submit_mean["kind"], categories=kind_order, ordered=True)

# ③ 並び替え
df_price_sorted = submit_mean.sort_values(["kind", "date"]).reset_index(drop=True)

  submit.groupby(["kind", "date"], as_index=False)["mode_price"]


In [8]:
df_price_sorted.to_csv("submit_xgb.csv", index=False)
df_price_sorted

Unnamed: 0,kind,date,mode_price
0,だいこん,20220502,979.188171
1,だいこん,20220506,978.912292
2,だいこん,20220507,979.473328
3,だいこん,20220509,980.097351
4,だいこん,20220510,980.097351
...,...,...,...
315,ミニトマト,20220526,130.908325
316,ミニトマト,20220527,130.890442
317,ミニトマト,20220528,130.996155
318,ミニトマト,20220530,131.394302
