In [None]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
import gc
import re 
from scipy import sparse
import time
import warnings
warnings.filterwarnings("ignore")
pd.options.display.max_colwidth=300
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

from sklearn.linear_model import Ridge


def print_score(df):
    print(df.shape)
    print((df['less_oof'] < df['more_oof']).mean())

# Loading Data 

In [None]:
oof_0874 = pd.read_csv('../input/jigsaw4-ridge-0-874-rm-space/oof.csv') # 0.874, 謎の train data, tf-idf ridge
print_score(oof_0874)

In [None]:
oof_017 = pd.read_csv('../input/kaerururu-jigsaw4-017/oof.csv') # 0.837, train with val data, roberta-base
print_score(oof_017)

In [None]:
oof_029 = pd.read_csv('../input/kaerururu-jigsaw4-029/oof.csv') # 0.836 train with Jigsaw1 data, roberta-base
print_score(oof_029)

In [None]:
oof_047 = pd.read_csv('../input/kaerururu-jigsaw4-047/oof.csv') # 0.793 train with Jigsaw2 data, roberta-base
print_score(oof_047)

In [None]:
oof_X2 = pd.read_csv('../input/jigsaw4-use-ridge/oof.csv')
print_score(oof_X2)

In [None]:
oof_X3 = pd.read_csv('../input/jigsaw4-use-ridge-jigsaw2/oof.csv')
print_score(oof_X3)

In [None]:
oof_X4 = pd.read_csv('../input/jigsaw4-use-ridge-jigsaw1/oof.csv')
print_score(oof_X4)

In [None]:
oof_049 = pd.read_csv('../input/kaerururu-jigsaw4-049/oof.csv')
print_score(oof_049)

In [None]:
oof_050 = pd.read_csv('../input/kaerururu-jigsaw4-050/oof.csv')
print_score(oof_050)

In [None]:
oof_051 = pd.read_csv('../input/kaerururu-jigsaw4-051/oof.csv')
print_score(oof_051)

In [None]:
oof_052 = pd.read_csv('../input/kaerururu-jigsaw4-052/oof.csv')
print_score(oof_052)

In [None]:
oof_X5 = pd.read_csv('../input/jigsaw4-ridge-jigsaw1-jigsaw2/jigsaw2-word-tfidf-ridge/oof.csv')
print_score(oof_X5)

In [None]:
oof_055 = pd.read_csv('../input/kaerururu-jigsaw4-055/oof.csv')
print_score(oof_055)

In [None]:
oof_057 = pd.read_csv('../input/kaerururu-jigsaw4-057/oof.csv')
print_score(oof_057)

In [None]:
oof_059 = pd.read_csv('../input/kaerururu-jigsaw4-059/oof.csv')
print_score(oof_059)

In [None]:
oof_062 = pd.read_csv('../input/kaerururu-jigsaw4-062/oof.csv')
print_score(oof_062)

In [None]:
oof_R1 = pd.read_csv('../input/jigsaw4-ridge-0-874-ruddit/oof.csv')
print_score(oof_R1)

In [None]:
oof_X6 = pd.read_csv('../input/jigsaw4-ridge-jigsaw1-weighted/oof.csv')
print_score(oof_X6)

In [None]:
oof_X7 = pd.read_csv('../input/jigsaw4-ridge-jigsaw1-v2/oof.csv')
print_score(oof_X7)

In [None]:
oof_X8 = pd.read_csv('../input/jigsaw4-ridge-jigsaw2-v2-raw/oof.csv')
print_score(oof_X8)

In [None]:
oof_X9 = pd.read_csv('../input/jigsaw4-ridge-jigsaw2-v2-raw-float/oof.csv')
print_score(oof_X9)

In [None]:
oof_066 = pd.read_csv('../input/kaerururu-jigsaw4-066/oof.csv')
print_score(oof_066)

In [None]:
oof_067 = pd.read_csv('../input/kaerururu-jigsaw4-067/oof.csv')
print_score(oof_067)

In [None]:
oof_068 = pd.read_csv('../input/kaerururu-jigsaw4-068/oof.csv')
print_score(oof_068)

In [None]:
oof_069 = pd.read_csv('../input/kaerururu-jigsaw4-069/oof.csv')
print_score(oof_069)

In [None]:
oof_076 = pd.read_csv('../input/kaerururu-jigsaw4-076/oof.csv')
print_score(oof_076)

In [None]:
oof_077 = pd.read_csv('../input/kaerururu-jigsaw4-077/oof.csv')
print_score(oof_077)

In [None]:
oof_079 = pd.read_csv('../input/kaerururu-jigsaw4-079/oof.csv')
print_score(oof_079)

In [None]:
oof_080 = pd.read_csv('../input/kaerururu-jigsaw4-0080/oof.csv')
print_score(oof_080)

In [None]:
oof_081 = pd.read_csv('../input/kaerururu-jigsaw4-0081/oof.csv')
print_score(oof_081)

In [None]:
oof_082 = pd.read_csv('../input/kaerururu-jigsaw4-0082/oof.csv')
print_score(oof_082)

In [None]:
def calc_score(w):
    return (
        w[0] * oof_0874['less_oof']\
        + w[1] * oof_017['less_oof']\
        + w[2] * oof_029['less_oof']\
        + w[3] * oof_047['less_oof']\
        + w[4] * oof_049['less_oof']\
        + w[5] * oof_050['less_oof']\
        + w[6] * oof_051['less_oof']\
        + w[7] * oof_052['less_oof']\
        + w[8] * oof_055['less_oof']\
        + w[9] * oof_057['less_oof']\
        + w[10] * oof_059['less_oof']\
        + w[11] * oof_R1['less_oof']\
        + w[12] * oof_X6['less_oof']\
        + w[13] * oof_X7['less_oof']\
        + w[14] * oof_066['less_oof']\
        + w[15] * oof_067['less_oof']\

        + w[16] * oof_076['less_oof']\
        + w[17] * oof_080['less_oof']\
        + w[18] * oof_082['less_oof']
 < 
        w[0] * oof_0874['more_oof']\
        + w[1] * oof_017['more_oof']\
        + w[2] * oof_029['more_oof']\
        + w[3] * oof_047['more_oof']\
        + w[4] * oof_049['more_oof']\
        + w[5] * oof_050['more_oof']\
        + w[6] * oof_051['more_oof']\
        + w[7] * oof_052['more_oof']\
        + w[8] * oof_055['more_oof']\
        + w[9] * oof_057['more_oof']\
        + w[10] * oof_059['more_oof']\
        + w[11] * oof_R1['more_oof']\
        + w[12] * oof_X6['more_oof']\
        + w[13] * oof_X7['more_oof']\
        + w[14] * oof_066['more_oof']\
        + w[15] * oof_067['more_oof']\

        + w[16] * oof_076['more_oof']\
        + w[17] * oof_080['more_oof']\
        + w[18] * oof_082['more_oof']
).mean()

In [None]:
import optuna

def objective(trial):
    num = 19
    w = [trial.suggest_uniform(f'w{i}', 0, 1) for i in range(num)]
    score = calc_score(w)
    # print(f'x: %1.3f, y: %1.3f, z: %1.3f, score: %1.3f' % (x, y, z, score))
    return score

SEED = 2022
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=SEED))
study.optimize(objective, n_trials=100)

In [None]:
best_weight = list(study.best_params.values())
best_weight = np.array(best_weight) / np.sum(best_weight)
best_score = study.best_value

best_score, best_weight

In [None]:
list(study.best_params.values())