In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns #必要なライブラリをインポート（使えるようにする）
import itertools
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, cross_val_score


In [18]:
#sklearnの線形回帰モデルをインポート


In [19]:
#データを学習用とテスト用に分割するための関数をインポート


In [20]:
df = pd.read_excel("result_seebeck_2.xlsx") #実験結果を読み込んで、dfという変数に格納


In [21]:
df.columns = ['halfgold_ratio', 'doping_concentration', 'resistance_reduction'] #列名が日本語になっているので、英語に変更


In [22]:
# 前処理: 型と欠損の確認
df['halfgold_ratio'] = pd.to_numeric(df['halfgold_ratio'], errors='coerce')
df['doping_concentration'] = pd.to_numeric(df['doping_concentration'], errors='coerce')
df['resistance_reduction'] = pd.to_numeric(df['resistance_reduction'], errors='coerce')
df = df.dropna().reset_index(drop=True)

# 説明変数・目的変数
X = df[['halfgold_ratio','doping_concentration']]
y = df['resistance_reduction']

# 分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# モデル定義
pipe_poly = Pipeline([
    ('poly', PolynomialFeatures(degree=3, include_bias=False)),
    ('scaler', StandardScaler()),
    ('lr', LinearRegression())
])
rf = RandomForestRegressor(n_estimators=300, random_state=42, n_jobs=-1)
gbr = GradientBoostingRegressor(n_estimators=300, learning_rate=0.05, max_depth=3, random_state=42)
svr_pipe = Pipeline([('scaler', StandardScaler()), ('svr', SVR(kernel='rbf', C=10, epsilon=0.01))])

# 学習（RF/GBoost は interaction を入れて学習）
X_train2 = X_train.copy()
X_test2  = X_test.copy()
X_train2['interaction'] = X_train2['halfgold_ratio'] * X_train2['doping_concentration']
X_test2['interaction']  = X_test2['halfgold_ratio'] * X_test2['doping_concentration']

pipe_poly.fit(X_train, y_train)
rf.fit(X_train2, y_train)
gbr.fit(X_train2, y_train)
svr_pipe.fit(X_train, y_train)

# 評価（テスト R2 と CV）
print("poly test R2:", pipe_poly.score(X_test, y_test))
print("rf test R2:", rf.score(X_test2, y_test))
print("gbr test R2:", gbr.score(X_test2, y_test))
print("svr test R2:", svr_pipe.score(X_test, y_test))
print("rf CV R2 (5):", cross_val_score(rf, pd.concat([X_train2, X_test2]), pd.concat([y_train, y_test]), cv=5, scoring='r2').mean())

# 予測用グリッド作成
halfgold_vals = list(range(100, -1, -10))   # 100,90,...,0
doping_vals = list(range(0, 17, 1))         # 0..16 step 1
pred_grid = pd.DataFrame(list(itertools.product(halfgold_vals, doping_vals)),
                         columns=['halfgold_ratio','doping_concentration'])

# interaction を作る（RF/GBoost 用）
pred_grid['interaction'] = pred_grid['halfgold_ratio'] * pred_grid['doping_concentration']

# 予測列を追加
pred_grid['predicted_poly'] = pipe_poly.predict(pred_grid[['halfgold_ratio','doping_concentration']])
pred_grid['predicted_rf']   = rf.predict(pred_grid[['halfgold_ratio','doping_concentration','interaction']])
pred_grid['predicted_gbr']  = gbr.predict(pred_grid[['halfgold_ratio','doping_concentration','interaction']])
pred_grid['predicted_svr']  = svr_pipe.predict(pred_grid[['halfgold_ratio','doping_concentration']])

# 結果保存
out_path = "1003/predictions_nonlin_models.xlsx"
pred_grid.to_excel(out_path, index=False)
print(f"保存しました: {out_path}")


In [23]:
# 学習用とテスト用にデータを分割（例: 80%学習, 20%テスト）
