In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# ===================================================================
#  Library
# ===================================================================
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error

import warnings
warnings.simplefilter("ignore")

import numpy as np
from scipy.optimize import minimize

In [3]:
# ===================================================================
#  CFG
# ===================================================================
class CFG:
    seed = 42
    n_seeds = 3
    n_trials = 2000
    save_dir = "/content/drive/MyDrive/Colab Notebooks/signate2023/exp/"
    data_dir = "/content/drive/MyDrive/Colab Notebooks/signate2023/"
    filename = "exp00055"

In [4]:
# ===================================================================
#  Utils
# ===================================================================
def get_score(y_true, y_pred):
    """get MAPE score"""
    score = mean_absolute_percentage_error(y_true, y_pred)
    return score * 100

In [5]:
df_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/train.csv')
kun_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/kun_exp00052.csv')
kun_oof = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/kun_exp00052_oof_pred.csv')
yuji_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/exp050.csv')
yuji_oof = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/oof_df_exp050.csv')
df = pd.concat([kun_oof.merge(yuji_oof, on='id'), df_train['price']], axis=1)
test = kun_test.merge(yuji_test, on='id')

In [6]:
df['pred_0']

0         6881.892301
1         3740.634027
2         2954.247573
3         8430.949224
4         3972.418866
             ...     
27527    12650.381284
27528     6494.944663
27529    12231.954239
27530     6832.933932
27531     9925.656545
Name: pred_0, Length: 27532, dtype: float64

In [7]:


# あなたの予測結果の列名のプレフィックスと数
kun_prefix = "kun_pred_"
kun_num = 10

# もう一人の人の予測結果の列名のプレフィックスと数
other_prefix = "pred_"
other_num = 30

def objective(weights):
    kun_preds = [df[f"{kun_prefix}{idx}"] * weights[idx] for idx in range(kun_num)]
    other_preds = [df[f"{other_prefix}{idx}"] * weights[idx+kun_num] for idx in range(other_num)]

    df["pred"] = sum(kun_preds + other_preds)

    score = get_score(y_true=df["price"], y_pred=df["pred"])
    return score

# 初期の重みを設定（すべての予測の重みを等しくする）
initial_weights = [1 / (kun_num + other_num)] * (kun_num + other_num)

result = minimize(objective, initial_weights, method="nelder-mead")

best_weights = result.x

print("Optimized weights:", best_weights)
print("Best score with optimized weights:", result.fun)

Optimized weights: [ 0.00591108 -0.06041368  0.1070986  -0.10989071 -0.0290474  -0.03138705
  0.08063373  0.19293613 -0.29987221  0.0224669  -0.01070242 -0.17032975
  0.01637476 -0.09843777  0.07730471  0.20516017 -0.04297591 -0.03618928
 -0.0839398  -0.12534295  0.23359235  0.15296045  0.28814598 -0.01245249
  0.07777378  0.00304637 -0.03007248 -0.02324332  0.01360605 -0.0573152
  0.07899044 -0.0067685   0.06196584  0.13106564  0.11289115  0.01973249
  0.0081551  -0.01470429  0.13408759  0.20914366]
Best score with optimized weights: 43.57204422204847


In [8]:
# 最適化された重みを使用してOOFデータの予測を計算
kun_oof_preds = [df[f"{kun_prefix}{idx}"] * best_weights[idx] for idx in range(kun_num)]
other_oof_preds = [df[f"{other_prefix}{idx}"] * best_weights[idx+kun_num] for idx in range(other_num)]
df["optimized_pred"] = sum(kun_oof_preds + other_oof_preds)

# 結果をCSVファイルとして保存
optimized_oof_filepath = CFG.save_dir + f"kun_{CFG.filename}_oof_pred.csv"
df[["id", "optimized_pred"]].to_csv(optimized_oof_filepath, index=False)

In [9]:
pd.read_csv(optimized_oof_filepath)

Unnamed: 0,id,optimized_pred
0,0,8805.219623
1,1,3799.844508
2,2,2959.861854
3,3,7934.654500
4,4,4461.927715
...,...,...
27527,27527,12385.902685
27528,27528,6939.817355
27529,27529,14186.293321
27530,27530,6658.054764


In [13]:
get_score(df['price'], pd.read_csv(optimized_oof_filepath)['optimized_pred'])

43.57204422204847

In [None]:
# 最適化された重みを使用してテストデータの予測を計算
kun_preds = [test[f"kun_pred_{idx}"] * best_weights[idx] for idx in range(10)]
other_preds = [test[f"pred_{idx}"] * best_weights[idx+kun_num] for idx in range(30)]
test["pred"] = sum(kun_preds + other_preds)

# 結果をCSVファイルとして保存
test[["id", "pred"]].to_csv(CFG.save_dir + f"kun_{CFG.filename}.csv", index=False, header=None)