In [None]:
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
from scipy.optimize import minimize

from sklearn.metrics import mean_squared_log_error, mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

In [None]:
train_df = pd.read_csv("../input/train_data.csv")
sub_df = pd.read_csv("../input/submission.csv")

In [None]:
def rmsle(y, pred):
    return np.sqrt(mean_squared_log_error(y, pred))

In [None]:
def load_preds(path):
    if isinstance(path, str):
        path = Path(path)

    with open(path/"oof_val_embedding.pkl", "rb") as f:
        val_pred = pickle.load(f)
        val_pred = val_pred.reshape(-1, 1)

    with open(path/"oof_test_embedding.pkl", "rb") as f:
        test_pred = pickle.load(f)
        test_pred = test_pred.reshape(-1, 1)

    preds = np.vstack([val_pred, test_pred])

    preds = pd.DataFrame(preds)
    preds.columns = ["exp010_pred"]
    return preds

In [None]:
treemodel_all_val_preds = []
treemodel_all_test_preds = []

with open("../output/exp007/oof_val_pred.pkl", "rb") as f:
    treemodel_all_val_preds.append(pickle.load(f))

treemodel_all_val_preds.append(pd.read_csv("../output/takahashi/220429_j_012_pred_oof.csv")["FOLD_mean"].values.reshape(-1, 1))

with open("../output/exp007/oof_test_pred.pkl", "rb") as f:
    treemodel_all_test_preds.append(pickle.load(f))

treemodel_all_test_preds.append(pd.read_csv("../output/takahashi/220429_j_012_submission.csv")["y"].values.reshape(-1, 1))

treemodel_all_val_preds = np.hstack(treemodel_all_val_preds)
treemodel_all_test_preds = np.hstack(treemodel_all_test_preds)

In [None]:
rmsle(train_df["y"].values, np.mean(treemodel_all_val_preds[:, :2], axis=1))

In [None]:
all_oof_val_preds = []
all_oof_test_preds = []
exps = [
    "exp012",
    "exp013",
    "exp014",
    "exp015",
    "exp016",
    "exp019",
]
for exp_name in exps:
    with open(f"../output/{exp_name}/oof_val_preds.pkl", "rb") as f:
        preds = pickle.load(f)
        score = rmsle(train_df["y"].values, preds)
        print(f"{exp_name}: score: {score:5f}")
        all_oof_val_preds.append(preds)
        
    with open(f"../output/{exp_name}/oof_test_preds.pkl", "rb") as f:
        all_oof_test_preds.append(pickle.load(f))

In [None]:
all_oof_val_preds = np.array(all_oof_val_preds).T
all_oof_test_preds = np.array(all_oof_test_preds).T

In [None]:
rmsle(train_df["y"].values, all_oof_val_preds.mean(axis=1))

In [None]:
all_oof_val_preds = np.hstack([all_oof_val_preds, treemodel_all_val_preds])
all_oof_test_preds = np.hstack([all_oof_test_preds, treemodel_all_test_preds])

In [None]:
print(all_oof_val_preds.shape)

In [None]:
print(all_oof_test_preds.shape)

In [None]:
def func(weights):
    pred = (all_oof_val_preds * weights).sum(axis=1)
    score = rmsle(train_df["y"].values, pred)
    return score

n_models = all_oof_val_preds.shape[1]
initial_weights = np.ones(n_models) / n_models
res = minimize(func, initial_weights, method='Nelder-Mead')
res["fun"]

In [None]:
res["x"]

In [None]:
rmsle(train_df["y"].values, np.average(all_oof_val_preds, axis=1, weights=res["x"]))

In [None]:
all_oof_test_preds = np.average(all_oof_test_preds, axis=1, weights=res["x"])

In [None]:
sub_df["y"] = all_oof_test_preds

In [None]:
sub_df.to_csv("../output/watanabe_exp_7_12_13_14_15_16_19_tk.csv", index=False)