In [None]:
pip install gurobipy 

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
pip show gurobipy

Name: gurobipy
Version: 12.0.2
Summary: Python interface to Gurobi
Home-page: https://www.gurobi.com
Author: Gurobi Optimization, LLC
Author-email: 
License: Proprietary
Location: c:\Users\sueya\AppData\Local\pypoetry\Cache\virtualenvs\datacollaborationanalysis-jS9KJoip-py3.11\Lib\site-packages
Requires: 
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [3]:
from gurobipy import Model, GRB

# モデル作成
model = Model("test_model")

# 変数追加（x, y ≥ 0）
x = model.addVar(name="x", lb=0)
y = model.addVar(name="y", lb=0)

# 目的関数: maximize x + y
model.setObjective(x + y, GRB.MAXIMIZE)

# 制約: x + 2y ≤ 4
model.addConstr(x + 2 * y <= 4, name="c1")

# 解く
model.optimize()

# 結果表示
if model.status == GRB.OPTIMAL:
    print("Optimal solution found!")
    for v in model.getVars():
        print(f"{v.VarName} = {v.X}")
    print(f"Objective value: {model.ObjVal}")
else:
    print("No optimal solution found.")


Set parameter Username
Set parameter LicenseID to value 2687427
Academic license - for non-commercial use only - expires 2026-07-11
Gurobi Optimizer version 12.0.2 build v12.0.2rc0 (win64 - Windows 11.0 (26100.2))

CPU model: AMD Ryzen 5 7530U with Radeon Graphics, instruction set [SSE2|AVX|AVX2]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 1 rows, 2 columns and 2 nonzeros
Model fingerprint: 0x31e8eabe
Coefficient statistics:
  Matrix range     [1e+00, 2e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [4e+00, 4e+00]
Presolve removed 1 rows and 2 columns
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    4.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 0 iterations and 0.01 seconds (0.00 work units)
Optimal objective  4.000000000e+00
Optimal solution found!
x = 4.0
y = 0.0
Objective value: 4

In [4]:
import numpy as np
from numpy.linalg import svd, pinv, eig
from sklearn.decomposition import KernelPCA
from sklearn.metrics.pairwise import rbf_kernel
import pandas as pd

def generate_data_random(m=3, r=50, d=20, seed=None):
    rng = np.random.RandomState(seed)
    S = [rng.randint(0, 2, size=(r, d)).astype(float) for _ in range(m)]
    return S

def generate_data_kpca(m=3, r=50, d=20, seed=None):
    rng = np.random.RandomState(seed)
    S_origin = rng.randint(0, 2, size=(r, d)).astype(float)
    S_list = []
    gammas = rng.uniform(0.1, 2.0, size=m)
    for gamma in gammas:
        kpca = KernelPCA(n_components=d, kernel='rbf', gamma=gamma, fit_inverse_transform=False)
        X_kpca = kpca.fit_transform(S_origin)
        S_list.append(X_kpca)
    return S_list

def existing_method(S_list, p):
    r = S_list[0].shape[0]
    W = np.hstack(S_list)  # r x (m*d)
    U, _, _ = svd(W, full_matrices=False)
    Z = U[:, :p]  # r x p
    errors = []
    for S in S_list:
        G = pinv(S) @ Z
        err = np.linalg.norm(Z - S @ G, 'fro')**2
        errors.append(err)
    return Z, np.sum(errors)

def proposed_method(S_list, Z_dim, lam=1e-3):
    m = len(S_list)
    r = S_list[0].shape[0]
    Ks = []
    Ps = []
    for S in S_list:
        K = rbf_kernel(S, S, gamma=1.0 / S.shape[1])
        Ks.append(K)
        P = K @ np.linalg.inv(K + lam * np.eye(r))
        Ps.append(P)
    # Build M
    M = np.zeros((r, r))
    I = np.eye(r)
    for P in Ps:
        diff = P - I
        M += diff.T @ diff
    # eigen-decomposition
    eigvals, eigvecs = eig(M)
    idx = np.argsort(eigvals)
    Z_basis = eigvecs[:, idx[:Z_dim]]
    # normalize columns to have Frobenius norm 1
    Z = Z_basis
    Z = Z / np.linalg.norm(Z, 'fro')
    # compute error
    err = 0.0
    for P in Ps:
        err += np.linalg.norm(P @ Z - Z, 'fro')**2
    return Z, err

def run_experiment(m=3, r=50, d=20, trials=5, seed=0):
    rng = np.random.RandomState(seed)
    ratios_rand = []
    ratios_kpca = []
    for t in range(trials):
        # scenario 1 random
        S_list = generate_data_random(m, r, d, seed=rng.randint(1e6))
        _, err_old = existing_method(S_list, d)
        _, err_new = proposed_method(S_list, d)
        ratios_rand.append(err_new / err_old)
        # scenario 2 kpca
        S_list2 = generate_data_kpca(m, r, d, seed=rng.randint(1e6))
        _, err_old2 = existing_method(S_list2, d)
        _, err_new2 = proposed_method(S_list2, d)
        ratios_kpca.append(err_new2 / err_old2)
    return np.mean(ratios_rand), np.mean(ratios_kpca)

ratio_rand, ratio_kpca = run_experiment()
ratio_rand, ratio_kpca


(5.970394365302546e-06, 0.01090604340432922)

In [1]:
from __future__ import annotations

import argparse
from logging import INFO, FileHandler, getLogger

import yaml

from config.config import Config
from src.data_collaboration import DataCollaborationAnalysis
from src.load_data import load_data # , load_data_breast_cancer, load_data_diabetes, load_data_har
from src.paths import CONFIG_DIR, INPUT_DIR, OUTPUT_DIR
from src.institutional_analysis import centralize_analysis, dca_analysis, individual_analysis

import sys
sys.argv = ["notebook", "exp004"]
parser = argparse.ArgumentParser()
parser.add_argument("name", type=str)
args = parser.parse_args()


import os 
child_path = os.getcwd()
# 親ディレクトリを取得
parent_path = os.path.dirname(child_path)

# カレントディレクトリを移動
os.chdir(parent_path)

# 引数の設定
parser = argparse.ArgumentParser()
parser.add_argument("name", type=str, default="exp004")
args = parser.parse_args()
# yaml のパスと出力先パス
cfg_path    = CONFIG_DIR / f"{args.name}.yaml"
output_path = OUTPUT_DIR / args.name

# UTF-8 で読み込んで Config を生成
with cfg_path.open(encoding="utf-8") as f:
    cfg_dict = yaml.safe_load(f)

config = Config(**cfg_dict,
                output_path=output_path,
                input_path=INPUT_DIR)


# 出力ディレクトリ作成
output_path.mkdir(parents=True, exist_ok=True)

# ログの設定
logger = getLogger(__name__)
logger.setLevel(INFO)
handler = FileHandler(filename=config.output_path / "result.log", encoding="utf-8")
logger.addHandler(handler)
# UTF-8 で読み込んで Config を生成
with cfg_path.open(encoding="utf-8") as f:
    cfg_dict = yaml.safe_load(f)

train_df, test_df = load_data(config=config)
train_df

Unnamed: 0,SpMax_L,J_Dz(e),nHM,F01[N-N],F04[C-N],NssssC,nCb-,C%,nCp,nO,...,C-026,F02[C-N],nHDon,SpMax_B(m),Psi_i_A,nN,SM6_B(m),nArCOOR,nX,target
0,5.313,3.4571,1,0,0,0,2,35.3,0,4,...,2,0,2,4.735,3.477,0,9.513,0,0,1
1,4.517,2.6567,1,0,0,0,1,44.4,1,0,...,0,0,0,3.622,2.086,0,8.370,0,1,0
2,4.985,2.5536,0,0,3,0,3,50.0,0,0,...,1,2,2,3.765,2.077,1,8.528,0,0,0
3,3.902,2.6931,2,0,0,0,0,30.8,2,0,...,0,0,0,3.547,2.022,0,8.444,0,2,0
4,4.414,3.4137,0,0,0,0,0,33.3,2,2,...,0,0,0,3.426,3.262,0,7.758,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,4.982,2.8025,0,0,2,0,2,50.0,0,2,...,0,2,1,3.853,2.833,1,8.508,0,0,1
516,4.732,3.2417,0,0,2,0,2,37.5,0,0,...,2,4,4,3.655,2.417,2,7.973,0,0,0
517,4.887,2.2910,2,0,2,0,6,44.8,0,0,...,4,4,4,3.954,2.337,2,9.239,0,2,0
518,4.380,3.3996,0,0,0,0,0,30.8,1,4,...,0,0,1,3.379,3.028,0,8.090,0,0,1


In [2]:
from __future__ import annotations

from typing import Optional, TypeVar

import category_encoders as ce
import numpy as np
import pandas as pd

from config.config import Config
from config.config_logger import record_config_to_cfg, record_value_to_cfg
from src.model import h_models
from src.utils import reduce_dimensions


def h_ml_model(
    X_train: np.ndarray,
    y_train: np.ndarray,
    X_test: np.ndarray,
    y_test: np.ndarray,
    config: Config,
) -> float:
    """機械学習モデルを実行し、評価値を返す"""
    evaluate_model = h_models[config.h_model]
    metrics = evaluate_model(X_train, y_train, X_test, y_test)
    return metrics

LOADERS = [
       "statlog",
       'qsar',
      "breast_cancer",
       "har",
       "adult",
       "diabetes130",
       "bank_marketing", # 性能に変化でない
    ]

y_name = 'target'
config.F_type = "kernel_pca"  # SVD or kernel_pca
for data in LOADERS:
    config.dataset = data
    train_df, test_df = load_data(config=config)
    best_score  = -float("inf")            #  or  np.inf (↓で min/max 切替)
    best_param  = None
    score_dict  = {}                       #  param → metric の記録
    for param in [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0, 1000.0, 10000.0]:
    #for param in [0.0001, 0.0005, 0.005]:

        # 目的変数と特徴量を分離
        y_train = train_df[y_name].to_numpy()
        y_test  = test_df[y_name].to_numpy()

        X_train = train_df.drop(columns=[y_name]).to_numpy()
        X_test  = test_df.drop(columns=[y_name]).to_numpy()

        # SVD
        X_tr_svd, X_te_svd = reduce_dimensions(X_train, X_test, F_type=config.F_type, n_components=config.dim_integrate, param=param)
        metric = h_ml_model(
            X_train=X_tr_svd,
            y_train=y_train,
            X_test=X_te_svd,
            y_test=y_test,
            config=config,
        )

        if metric > best_score:            #  ← 最大化指標の場合
            best_score, best_param = metric, param
        
    print("\n===== Grid-Search Result =====")
    print(f" dataset   : {config.dataset}")
    print(f" best param : {best_param}")
    print(f" best metric: {best_score:.4f}")


  df = pd.read_csv("input/statlog_german.data", delim_whitespace=True, header=None, names=colnames)



===== Grid-Search Result =====
 dataset   : statlog
 best param : 1e-05
 best metric: 0.7023

===== Grid-Search Result =====
 dataset   : qsar
 best param : 0.0001
 best metric: 0.9124

===== Grid-Search Result =====
 dataset   : breast_cancer
 best param : 1e-05
 best metric: 0.9869
['fBodyAcc-bandsEnergy()-1,8', 'fBodyAcc-bandsEnergy()-9,16', 'fBodyAcc-bandsEnergy()-17,24', 'fBodyAcc-bandsEnergy()-25,32', 'fBodyAcc-bandsEnergy()-33,40', 'fBodyAcc-bandsEnergy()-41,48', 'fBodyAcc-bandsEnergy()-49,56', 'fBodyAcc-bandsEnergy()-57,64', 'fBodyAcc-bandsEnergy()-1,16', 'fBodyAcc-bandsEnergy()-17,32', 'fBodyAcc-bandsEnergy()-33,48', 'fBodyAcc-bandsEnergy()-49,64', 'fBodyAcc-bandsEnergy()-1,24', 'fBodyAcc-bandsEnergy()-25,48', 'fBodyAccJerk-bandsEnergy()-1,8', 'fBodyAccJerk-bandsEnergy()-9,16', 'fBodyAccJerk-bandsEnergy()-17,24', 'fBodyAccJerk-bandsEnergy()-25,32', 'fBodyAccJerk-bandsEnergy()-33,40', 'fBodyAccJerk-bandsEnergy()-41,48', 'fBodyAccJerk-bandsEnergy()-49,56', 'fBodyAccJerk-bandsEn

  X = pd.read_csv(root / split / f"X_{split}.txt", delim_whitespace=True, header=None, names=features)
  X = pd.read_csv(root / split / f"X_{split}.txt", delim_whitespace=True, header=None, names=features)



===== Grid-Search Result =====
 dataset   : har
 best param : 0.0001
 best metric: 0.9918

===== Grid-Search Result =====
 dataset   : adult
 best param : 0.001
 best metric: 0.8792

===== Grid-Search Result =====
 dataset   : diabetes130
 best param : 0.0001
 best metric: 0.6092

===== Grid-Search Result =====
 dataset   : bank_marketing
 best param : 0.01
 best metric: 0.9128


In [None]:
# har 0.0001
#