### 라이브러리

In [1]:
import os
import glob

import numpy as np
import pandas as pd

### 모델 예측 결과 경로

In [3]:
paths = [x for x in glob.glob('./ensemble/submission_*.csv')]
paths

['./ensemble\\submission_catboost.csv',
 './ensemble\\submission_lightgbm.csv',
 './ensemble\\submission_xgboost.csv']

In [4]:
models = []
for path in paths:
    file_name = os.path.basename(path)
    model = file_name.split('.csv')[-2].split('_')[-1]
    models.append(model)
models

['catboost', 'lightgbm', 'xgboost']

### 모델 가중치 설정

In [6]:
weights = []
for model in models:
    with open(f'./ensemble/score_{model}.txt', 'r') as f:
        score = float(f.readline())
    weights.append(score)
weights

[0.7929203096948485, 0.7894148507351776, 0.7916594139397315]

### customer_ID로 정렬

In [7]:
dfs = [pd.read_csv(x) for x in paths]
dfs = [x.sort_values(by='customer_ID') for x in dfs]

In [8]:
for df in dfs:
    df['prediction'] = np.clip(df['prediction'], 0, 1)

### Weighted Averaging

In [9]:
submit = pd.read_csv('./data/sample_submission.csv')
submit['prediction'] = 0

for df, weight in zip(dfs, weights):
    submit['prediction'] += (df['prediction'] * weight)

submit['prediction'] /= np.sum(weights)

submit.to_csv('submission_ensemble.csv', index=None)

### Rank Averaging

In [10]:
from scipy.stats import rankdata

submit = pd.read_csv('./data/sample_submission.csv')
submit['prediction'] = 0

ranking_weights = rankdata(weights)
ranking_weights /= np.sum(ranking_weights)

for df, weight in zip(dfs, ranking_weights):
    submit['prediction'] += (df['prediction'] * weight)

submit.to_csv('submission_ranking.csv', index=None)