In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# ===================================================================
#  Library
# ===================================================================
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error

import warnings
warnings.simplefilter("ignore")

import numpy as np
from scipy.optimize import minimize

In [3]:
# ===================================================================
#  CFG
# ===================================================================
class CFG:
    seed = 42
    n_seeds = 3
    n_trials = 2000
    save_dir = "/content/drive/MyDrive/Colab Notebooks/signate2023/exp/"
    data_dir = "/content/drive/MyDrive/Colab Notebooks/signate2023/"
    filename = "exp00058"

In [4]:
# ===================================================================
#  Utils
# ===================================================================
def get_score(y_true, y_pred):
    """get MAPE score"""
    score = mean_absolute_percentage_error(y_true, y_pred)
    return score * 100

In [5]:
df_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/train.csv')
kun_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/kun_exp00052.csv')
kun_oof = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/kun_exp00052_oof_pred.csv')
yuji_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/exp050.csv')
yuji_oof = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/oof_df_exp050.csv')
df = pd.concat([kun_oof.merge(yuji_oof, on='id'), df_train['price']], axis=1)
test = kun_test.merge(yuji_test, on='id')

In [6]:
df['pred_0']

0         6881.892301
1         3740.634027
2         2954.247573
3         8430.949224
4         3972.418866
             ...     
27527    12650.381284
27528     6494.944663
27529    12231.954239
27530     6832.933932
27531     9925.656545
Name: pred_0, Length: 27532, dtype: float64

In [10]:


# あなたの予測結果の列名のプレフィックスと数
kun_prefix = "kun_pred_"
kun_num = 10

# もう一人の人の予測結果の列名のプレフィックスと数
other_prefix = "pred_"
other_num = 30

optimal_p = 0.9  # 例として1.5に設定。この値は最適化の結果から設定する

def objective(weights):
    kun_preds = [(df[f"{kun_prefix}{idx}"]**optimal_p) * weights[idx] for idx in range(kun_num)]
    other_preds = [(df[f"{other_prefix}{idx}"]**optimal_p) * weights[idx+kun_num] for idx in range(other_num)]

    df["pred"] = sum(kun_preds + other_preds)

    score = get_score(y_true=df["price"], y_pred=df["pred"])
    return score

# 初期の重みを設定（すべての予測の重みを等しくする）
initial_weights = [1 / (kun_num + other_num)] * (kun_num + other_num)

result = minimize(objective, initial_weights, method="nelder-mead")

best_weights = result.x

print("Optimized weights:", best_weights)
print("Best score with optimized weights:", result.fun)

Optimized weights: [-0.38403291  0.37768509  0.29492386 -0.13258896 -0.23954017  0.33817504
 -0.01381608  0.30340037 -0.09549457 -0.44270213  0.10034243 -0.40417352
 -0.03074066  0.00345154 -0.1674492  -0.14635804  0.23911993 -0.20549198
 -0.25767224  0.05126396  0.19354924  0.47300523  0.27070636  0.42243563
  0.43717631  0.15279361  0.25607169  0.02755712 -0.64737704 -0.01777145
  0.23766927  0.16422162  0.0860717   0.47083383  0.27038021 -0.29623149
 -0.18600805 -0.09940102  0.47798123  0.52640756]
Best score with optimized weights: 43.952820518229835


In [None]:
# 最適化された重みを使用してテストデータの予測を計算
kun_preds = [(test[f"kun_pred_{idx}"]**optimal_p) * best_weights[idx] for idx in range(10)]
other_preds = [(test[f"pred_{idx}"]**optimal_p) * best_weights[idx+kun_num] for idx in range(30)]
test["pred"] = sum(kun_preds + other_preds)

# 結果をCSVファイルとして保存
test[["id", "pred"]].to_csv(CFG.save_dir + f"kun_{CFG.filename}.csv", index=False, header=None)