# 人工データ実験

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import csv

import os
import random
import itertools
from collections import defaultdict

In [2]:
# --- put this at the VERY TOP of your notebook ---
import sys
from pathlib import Path

# プロジェクト直下（= synthetic_data の親）を sys.path に追加
proj_root = Path.cwd().parent  # ← ノートが synthetic_data/ 内にある前提
sys.path.insert(0, str(proj_root))

# 動作確認
import importlib, pkgutil
assert any(p.name == "synthetic_data" for p in pkgutil.iter_modules([str(proj_root)])), "project root not found"

from src.experiments.trainer import train_model
from src.models.proposed import Model
from src.data.generate import generate_data
from src.utils.common import set_seed

In [3]:
# デバイス設定

use_cuda = torch.cuda.is_available()
use_mps = torch.backends.mps.is_available()

if use_cuda:
    device = torch.device("cuda")
# elif use_mps:
#     device = torch.device("mps")
else:
    device = torch.device("cpu")
    
print(f"CUDA available: {use_cuda}")
print(f"MPS available: {use_mps}")
print(f"Using device: {device}")

CUDA available: False
MPS available: True
Using device: cpu


### 実験設定

In [4]:
from src.utils.common import get_args

args = get_args()
print('args:', args)

In [5]:
from src.utils.common import generate_grid_configs

configs = generate_grid_configs()

for i, c in enumerate(configs[:5]):
    print(f"[{i}] students={c['n_students']}, skills={c['n_skills']}, smoothing={c['smoothing']}, "
          f"alpha={c['alpha']}, beta={c['beta']}, growth_alpha={c['growth_alpha']}, growth_beta={c['growth_beta']}")


In [6]:
models_dict = {
    "proposed": None,
    "upper": None,
    "all": None,
    "population_base": None,
    "next_node": None,
    "uniform": None
}

from result import generate_header, ensure_csv_header, append_row, make_row, merge_metrics

header = generate_header(models_dict)

## 実験

In [7]:
import os
import pandas as pd

def _done_iterations(csv_path: str) -> set[int]:
    """既存CSVに記録済みのIteration(seed)を集合で返す。CSVが無ければ空集合。"""
    if not os.path.exists(csv_path):
        return set()
    try:
        # 列名の揺れに強くする（Iteration / iteration など）
        df = pd.read_csv(csv_path)
        col = None
        for c in df.columns:
            if c.strip().lower() == "iteration":
                col = c
                break
        if col is None:
            return set()
        return set(pd.to_numeric(df[col], errors="coerce").dropna().astype(int).tolist())
    except Exception:
        return set()


In [None]:
from synthetic_data.split_data import split_balanced_data
from src.data.generate import dependency_matrix
from src.utils.metrics import evaluate_distributions, evaluate_test

# パラメータを変化させて実験
for i, config in enumerate(configs):
    print(f"Experiment {i+1}/{len(configs)}: {config}")

    # このconfigに対応するCSVパスを先に決めておく（スキップ判定に使う）
    csv_path = (
        f'./results/simulation_results_{config["n_students"]}_{config["n_skills"]}_'
        f'{config["smoothing"]}_{config["alpha"]}_{config["beta"]}_'
        f'{config["growth_alpha"]}_{config["growth_beta"]}.csv'
    )
    done = _done_iterations(csv_path)

    for pattern in range(args.n_experiments):
        seed = pattern

        # --- 既にこのseedの結果がCSVにあればスキップ ---
        if pattern in done:
            print(f"[SKIP] config={i} seed={pattern} は既存CSVにあり: {csv_path}")
            continue

        set_seed(seed)
        A = dependency_matrix(config["n_skills"])

        # 学習データ生成
        dataset, dataset_all, dataset2 = generate_data(A, config)

        # proposedモデル
        print("Proposed Model")
        set_seed(seed)
        X_train, y_train, X_val, y_val = split_balanced_data(dataset, args.split_ratio, device=device)
        model = Model(config["n_skills"]).to(device)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
        t_loss, v_loss = train_model(args, model, X_train, y_train, X_val, y_val, criterion, optimizer, seed, "proposed_model", config, device)
        models_dict["proposed"] = model

        # 上限モデル
        print("Upper Model")
        set_seed(seed)
        upper_data = np.concatenate([dataset, dataset2], axis=0)
        upper_X_train_X, upper_X_train_Y, upper_X_val_X, upper_X_val_Y = split_balanced_data(upper_data, args.split_ratio, device=device)
        upper_model = Model(config["n_skills"]).to(device)
        upper_criterion = nn.MSELoss()
        upper_optimizer = optim.Adam(upper_model.parameters(), lr=args.lr)
        upper_t_loss, upper_v_loss = train_model(args, upper_model, upper_X_train_X, upper_X_train_Y, upper_X_val_X, upper_X_val_Y, upper_criterion, upper_optimizer, seed, "upper_model", config, device)
        models_dict["upper"] = upper_model

        # allモデル
        print("All Model")
        set_seed(seed)
        all_X_train, all_y_train, all_X_val, all_y_val = split_balanced_data(dataset_all, args.split_ratio, device=device)
        all_model = Model(config["n_skills"]).to(device)
        all_criterion = nn.MSELoss()
        all_optimizer = optim.Adam(all_model.parameters(), lr=args.lr)
        all_t_loss, all_v_loss = train_model(args, all_model, all_X_train, all_y_train, all_X_val, all_y_val, all_criterion, all_optimizer, seed, "all_model", config, device)
        models_dict["all"] = all_model

        # テスト生成・評価
        _, _, test_dataset = generate_data(A, config, n_test=100)
        test_results = evaluate_test(models_dict, dataset, test_dataset)
        distribution_results = evaluate_distributions(models_dict, A, dataset, config)

        merged = merge_metrics(test_results, distribution_results)
        header = generate_header(models_dict)

        # ここでcsv_pathは既に定義済み
        ensure_csv_header(csv_path, header)
        row = make_row(pattern, header, merged)
        append_row(csv_path, row)

        # 直ちに「完了seed」を更新（クラッシュ時の再実行でもスキップが効くように）
        done.add(pattern)


Experiment 1/675: {'n_students': 50, 'n_skills': 5, 'smoothing': 0.1, 'alpha': 2.5, 'beta': 2.5, 'growth_alpha': 2.5, 'growth_beta': 2.5}
[SKIP] config=0 seed=0 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=1 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=2 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=3 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=4 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=5 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=6 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=7 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=8 は既存CSVにあり: ./results/simulation_results_50_5_0.1_2.5_2.5_2.5_2.5.csv
[SKIP] config=0 seed=9 は既存CSVにあり: 