In [None]:
import pandas as pd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import japanize_matplotlib

In [None]:
# データ生成
np.random.seed(42)  # 再現性のための乱数シード

# 生理周期の分布
cycle_mean = 20.73
cycle_std = 8.35

# 生理期間の分布
organ_mean = 4.07
organ_std = 1.76

# サンプル数と周期数
num_samples = 100
num_cycles = 4

# 各IDごとにデータ生成
data = []
for id in range(num_samples):
    # IDごとの生理周期の基準値を設定
    base_cycle = np.random.normal(cycle_mean, cycle_std) # 基準値を生成
    
    while True:
        # 生理周期のサンプリング (基準値を中心に標準偏差8.35で生成)
        cycles = np.random.normal(base_cycle, 8.35, num_cycles)
        cycles = np.clip(cycles, 7, 90)  # 生理周期は7以上90以下に制約
        
        # 生理期間のサンプリング
        organs = np.random.normal(organ_mean, organ_std, num_cycles)
        organs = np.clip(organs, 2, np.inf)  # 生理期間は1以上に制約
        
        # 生理周期が生理期間より必ず長いか確認
        if np.all(cycles > organs):
            break  # 条件を満たした場合、ループを抜ける
    
    data.append({
        "ID": id,
        "Base_Cycle": base_cycle,  # 基準値を保存
        "Cycles": cycles,
        "Organs": organs
    })

# データフレーム化
df = pd.DataFrame(data)
df = df.explode(["Cycles", "Organs"]).reset_index(drop=True)
df["Cycle_Index"] = df.groupby("ID").cumcount() + 1
print(df)

     ID  Base_Cycle     Cycles    Organs  Cycle_Index
0     0   24.877563  23.723056  3.657919            1
1     0   24.877563  30.285762  6.849415            2
2     0   24.877563  37.594862  5.420685            3
3     0   24.877563  22.922382  3.243725            4
4     1   25.260376  21.390839       2.0            1
..   ..         ...        ...       ...          ...
395  98   16.094084  19.768205  4.499089            4
396  99   16.019943    15.6393       2.0            1
397  99   16.019943  18.051828  6.611026            2
398  99   16.019943  14.005622  3.925414            3
399  99   16.019943  18.959606  6.036441            4

[400 rows x 5 columns]


In [64]:
from scipy.stats import norm

# 各IDごとにモデルを作成
results = []
for id, group in df.groupby("ID"):
    # 10サイクル目までのデータを使用
    train_data = group[group["Cycle_Index"] < num_cycles]
    
    # 11サイクル目のデータを予測対象
    test_data = group[group["Cycle_Index"] == num_cycles]
    
    
    # 平均値と分散を計算
    cycle_mean = train_data["Cycles"].mean()
    cycle_std = train_data["Cycles"].std()
    
    # 次の生理周期の予測
    predicted_cycle = cycle_mean
    
    # 信頼区間の計算 (95%信頼区間)
    lower_bound = norm.ppf(0.1, loc=cycle_mean, scale=cycle_std)
    upper_bound = norm.ppf(0.9, loc=cycle_mean, scale=cycle_std)
    
    # 実測値
    actual_cycle = test_data["Cycles"].values[0]
    
    results.append({
        "ID": id,
        "Predicted_Cycle": predicted_cycle,
        "Actual_Cycle": actual_cycle,
        "Lower_Bound": lower_bound,
        "Upper_Bound": upper_bound
    })

# 結果の表示
for result in results:
    print(f"ID: {result['ID']}, Predicted: {result['Predicted_Cycle']:.2f}, "
          f"Actual: {result['Actual_Cycle']:.2f}, "
          f"90% CI: [{result['Lower_Bound']:.2f}, {result['Upper_Bound']:.2f}]")

ID: 0, Predicted: 30.53, Actual: 22.92, 90% CI: [21.64, 39.43]
ID: 1, Predicted: 23.35, Actual: 9.28, 90% CI: [18.98, 27.71]
ID: 2, Predicted: 14.55, Actual: 13.71, 90% CI: [2.22, 26.88]
ID: 3, Predicted: 19.71, Actual: 39.33, 90% CI: [17.80, 21.62]
ID: 4, Predicted: 14.17, Actual: 28.64, 90% CI: [2.77, 25.56]
ID: 5, Predicted: 17.34, Actual: 7.00, 90% CI: [9.21, 25.46]
ID: 6, Predicted: 28.73, Actual: 32.10, 90% CI: [19.01, 38.46]
ID: 7, Predicted: 16.58, Actual: 19.12, 90% CI: [8.86, 24.29]
ID: 8, Predicted: 22.60, Actual: 21.16, 90% CI: [4.84, 40.36]
ID: 9, Predicted: 24.13, Actual: 19.52, 90% CI: [10.81, 37.45]
ID: 10, Predicted: 21.37, Actual: 18.27, 90% CI: [11.99, 30.75]
ID: 11, Predicted: 12.72, Actual: 12.07, 90% CI: [6.36, 19.09]
ID: 12, Predicted: 17.31, Actual: 23.38, 90% CI: [5.86, 28.75]
ID: 13, Predicted: 8.60, Actual: 7.00, 90% CI: [5.05, 12.16]
ID: 14, Predicted: 14.45, Actual: 19.85, 90% CI: [1.46, 27.44]
ID: 15, Predicted: 21.73, Actual: 11.25, 90% CI: [10.51, 32.94]

  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
