# Blending

### Base imports

In [1]:
import os
import json

import numpy as np

### Local imports

In [2]:
from disease_ml.utils import make_submission

### Constants

In [3]:
EXPERIMENT_TITLE = 'Blending'

SUBMISSIONS_DIR = 'submissions/'
BLENDING_DIR = 'blending/'

### Loading predictions for blending

In [4]:
estimator_filenames = list(filter(lambda x: not x.startswith('.'), os.listdir(BLENDING_DIR)))
print('\n'.join(estimator_filenames))

Logreg[score=-0.22291][29-03-2017 16:08:30].json
Logreg[score=-0.22297][29-03-2017 16:03:21].json
Logreg[score=-0.22347][29-03-2017 16:00:19].json
RF[score=-0.21969][29-03-2017 16:13:49].json


In [5]:
estimator_params = {
    'estimators': []
}

estimator_predictions = []
for estimator_filename in estimator_filenames:
    with open(os.path.join(BLENDING_DIR, estimator_filename)) as fin:
        estimator_json = json.load(fin)
        
        estimator_predictions.append(np.array(estimator_json['predictions']))
        estimator_params['estimators'].append(estimator_filename.replace('.json', ''))

estimator_predictions = np.array(estimator_predictions)

### Function for blending

In [6]:
def blend(estimator_predictions, weights=None):
    if weights is not None:
        assert np.allclose(np.sum(weights), 1), 'Weights do not sum to one!'
    else:
        weights = np.ones((len(estimator_predictions,))) / len(estimator_predictions)
    
    blended_predictions = estimator_predictions.T.dot(weights)
    return blended_predictions

### Submitting results

In [7]:
params = {
    'experiment': {
        'title': EXPERIMENT_TITLE,
    },
    
    'estimator': estimator_params
}

predictions = blend(estimator_predictions)
make_submission(predictions, SUBMISSIONS_DIR, EXPERIMENT_TITLE,
                params=params)