# データ読込

In [1]:
import pandas as pd
from logic.factory_manage.sql import load_data_from_sqlite
from utils.get_holydays import get_japanese_holidays
from logic.factory_manage.predict_model_v3 import generate_features, train_and_predict

# CSVファイル読み込み（パスは適宜変更）
path = "/work/app/data/factory_manage/weight_data.db"
df = load_data_from_sqlite()
df["伝票日付"].max()

hol_max = df["伝票日付"].max()
hol_min = df["伝票日付"].min()
print(f"最小日付: {hol_min}, 最大日付: {hol_max}")
holiday = get_japanese_holidays(hol_min, hol_max)
df.head()

最小日付: 2024-05-01 00:00:00, 最大日付: 2025-06-23 00:00:00


Unnamed: 0,伝票日付,品名,正味重量,祝日フラグ
0,2024-05-01,混合廃棄物A,1620.0,0
1,2024-05-01,混合廃棄物A,1840.0,0
2,2024-05-01,混合廃棄物A,1730.0,0
3,2024-05-01,混合廃棄物B,2000.0,0
4,2024-05-01,混合廃棄物A,360.0,0


In [2]:
# 品目別合計重量を集計
item_totals = df.groupby("品名")["正味重量"].sum().sort_values(ascending=False)

print(item_totals)


品名
混合廃棄物A          14973160.0
混合廃棄物B           3876330.0
GC 軽鉄･ｽﾁｰﾙ類      2463090.0
選別               1524260.0
木くず              1402530.0
                   ...    
混合廃棄物A(廃ﾌﾟﾗ)          30.0
雑誌                    30.0
選別（水銀灯）               30.0
ﾀｲﾔ                   20.0
混合廃棄物(羽毛)             10.0
Name: 正味重量, Length: 156, dtype: float64


In [3]:
df_reserve = pd.read_csv(
    "/work/app/data/input/yoyaku_data.csv")
df_reserve["予約日"] = pd.to_datetime(df_reserve["予約日"])
df_reserve = df_reserve[df_reserve["予約日"] >= hol_min]
df_reserve = df_reserve[df_reserve["予約日"] <= hol_max]
df_reserve

Unnamed: 0,予約日,予約得意先名,固定客
0,2025-03-02,泉土木,True
1,2025-03-02,ASMU,False
2,2025-03-02,青木サービス,False
3,2025-03-02,オネストワン,False
4,2025-03-02,おかたずけ田中くん,False
...,...,...,...
4743,2025-05-31,山口興業,True
4744,2025-05-31,谷津商会,True
4745,2025-05-31,吉田機電,False
4746,2025-05-31,ヨシモリ,True


In [4]:
print(df_reserve.columns)

Index(['予約日', '予約得意先名', '固定客'], dtype='object')


### 受入番号用

In [5]:
import os
import glob
import pandas as pd

# ディレクトリ内の全CSVファイルパスを取得
csv_dir = "/work/app/data/input/受入_時刻"
csv_files = glob.glob(os.path.join(csv_dir, "*.csv"))

# 全CSVを読み込んで結合
dfs = []
for f in csv_files:
    df_tmp = pd.read_csv(f)

    # 伝票日付を整形 → 日付型へ変換
    df_tmp["伝票日付"] = df_tmp["伝票日付"].str.replace(r"\(.*?\)", "", regex=True).str.strip()
    df_tmp["伝票日付"] = pd.to_datetime(df_tmp["伝票日付"], format="%Y/%m/%d")

    # 正味重量をカンマ除去して数値化
    df_tmp["正味重量"] = df_tmp["正味重量"].replace({',': ''}, regex=True).astype(float)

    # 受入番号の欠損を埋めて型変換（念のため）
    df_tmp["受入番号"] = df_tmp["受入番号"].fillna(-1).astype(int)

    dfs.append(df_tmp)

df_Ukeire = pd.concat(dfs, ignore_index=True)

# 必要カラムだけ抽出（品名・重量・受入番号）
df_Ukeire = df_Ukeire[["伝票日付", "品名", "正味重量", "受入番号"]].copy()

# 台数カウント準備（日付・品名ごとの受入番号ユニーク数）
df_count = (
    df_Ukeire.groupby(["伝票日付", "品名"])["受入番号"]
    .nunique()
    .reset_index()
    .rename(columns={"受入番号": "台数"})
)

# （参考）結合しておく場合
df_merged = pd.merge(df_Ukeire, df_count, on=["伝票日付", "品名"], how="left")

# 結果確認
df_merged


Unnamed: 0,伝票日付,品名,正味重量,受入番号,台数
0,2025-06-01,混合廃棄物A,1110.0,54050,35
1,2025-06-01,混合廃棄物A,120.0,54094,35
2,2025-06-01,混合廃棄物A,270.0,54060,35
3,2025-06-01,混合廃棄物A,1220.0,54044,35
4,2025-06-01,混合廃棄物A,1650.0,54089,35
...,...,...,...,...,...
64680,2024-11-30,混合廃棄物A,50.0,31824,74
64681,2024-11-30,混合廃棄物A,310.0,31822,74
64682,2024-11-30,混合廃棄物A,1140.0,31805,74
64683,2024-11-30,木くず,870.0,31824,9


In [6]:
target_items = ["混合廃棄物A", "混合廃棄物B", "GC 軽鉄･ｽﾁｰﾙ類", "選別", "木くず"]

# 特徴量生成

In [7]:
# 最初のモデル
from logic.factory_manage.predict_model_v4_5 import full_walkforward_pipeline,print_metrics
columns = pd.to_datetime("today") - pd.Timedelta(days=90)
df_90 = df[df["伝票日付"] > columns]

all_actual, all_pred  = full_walkforward_pipeline(
    df_90
)

print_metrics(all_actual, all_pred)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_raw["伝票日付"] = pd.to_datetime(df_raw["伝票日付"])


KeyboardInterrupt: 

In [8]:
pred_df_slim

([np.float64(70230.0),
  np.float64(72260.0),
  np.float64(62360.0),
  np.float64(63400.0),
  np.float64(64430.0),
  np.float64(17530.0),
  np.float64(53000.0),
  np.float64(64370.0),
  np.float64(53400.0),
  np.float64(60180.0),
  np.float64(51850.0),
  np.float64(72260.0),
  np.float64(34490.0),
  np.float64(69510.0),
  np.float64(65670.0),
  np.float64(59890.0),
  np.float64(63510.0),
  np.float64(51390.0),
  np.float64(19380.0),
  np.float64(60470.0),
  np.float64(31970.0),
  np.float64(10550.0),
  np.float64(61320.0),
  np.float64(81600.0),
  np.float64(67790.0),
  np.float64(27040.0),
  np.float64(71270.0),
  np.float64(82090.0),
  np.float64(63920.0),
  np.float64(60510.0),
  np.float64(57870.0),
  np.float64(60170.0),
  np.float64(14520.0),
  np.float64(57260.0),
  np.float64(71360.0),
  np.float64(77470.0),
  np.float64(75750.0),
  np.float64(73130.0),
  np.float64(64540.0),
  np.float64(26020.0),
  np.float64(32150.0),
  np.float64(0.0),
  np.float64(60280.0),
  np.float64(47

In [7]:
# 交差検証パイプライン
from logic.factory_manage.predict_model_v4_5 import cross_validation_pipeline
    
pred_df_slim = cross_validation_pipeline(
    df,
    n_splits=5,)



===== クロスバリデーション分割: 30日目以降 =====


KeyboardInterrupt: 

In [8]:
from logic.factory_manage.predict_model_v4_5_val import history_window_search
pred = history_window_search(
    df,
    window_list=[60,120,180,240,300,360],
    min_eval_data=30
)



履歴60日で評価中...


  mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100


  R² = -0.174, MAE = 18,134kg, RMSE = 24,175kg, MAPE = inf%, 最大誤差=90,591kg

履歴120日で評価中...


  mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100


  R² = -0.189, MAE = 18,512kg, RMSE = 24,763kg, MAPE = inf%, 最大誤差=84,771kg

履歴180日で評価中...


  mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100


  R² = -0.088, MAE = 18,704kg, RMSE = 25,088kg, MAPE = inf%, 最大誤差=74,810kg

履歴240日で評価中...


  mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100


  R² = -0.074, MAE = 18,744kg, RMSE = 25,230kg, MAPE = inf%, 最大誤差=72,526kg

履歴300日で評価中...


  mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100


  R² = 0.012, MAE = 16,875kg, RMSE = 24,407kg, MAPE = inf%, 最大誤差=76,979kg

履歴360日で評価中...
  -> 評価データが少ないためスキップ (11件)
