In [1]:
import os
import glob

import numpy as np
import pandas as pd

In [2]:
paths = [x for x in glob.glob('./submission_*.csv')]
paths

['.\\submission_catboost.csv',
 '.\\submission_lightgbm.csv',
 '.\\submission_xgboost.csv']

In [3]:
models = []
for path in paths:
    file_name = os.path.basename(path)
    model = file_name.split('.csv')[-2].split('_')[-1]
    models.append(model)
models

['catboost', 'lightgbm', 'xgboost']

In [6]:
weights = []
for model in models:
    with open(f'./score_{model}.txt', 'r') as f:
        score = float(f.readline())
    weights.append(score)
weights

[0.7929203096948485, 0.785467448595047, 0.7916594139397315]

In [7]:
dfs = [pd.read_csv(x) for x in paths]
dfs = [x.sort_values(by='customer_ID') for x in dfs]

In [8]:
for df in dfs:
    df['prediction'] = np.clip(df['prediction'], 0, 1)

In [9]:
submit = pd.read_csv('./data/sample_submission.csv')
submit['prediction'] = 0

for df, weight in zip(dfs, weights):
    submit['prediction'] += (df['prediction'] * weight)

submit['prediction'] /= np.sum(weights)

submit.to_csv('submission_ensemble.csv', index=None)

In [18]:
from scipy.stats import rankdata

submit = pd.read_csv('./data/sample_submission.csv')
submit['prediction'] = 0

ranking_weights = rankdata(weights)
ranking_weights /= np.sum(ranking_weights)

for df, weight in zip(dfs, ranking_weights):
    submit['prediction'] += (df['prediction'] * weight)

submit.to_csv('submission_ranking.csv', index=None)