I use a voting system, where each model only votes if they are sure about their value. If they are not sure, they do not vote.

For now, all models vote the same number of times.

The predictions were obtained without taking `te` and `val` into account.

In [1]:
import numpy as np
import scipy.sparse as sp
from datafile_methods.data_processing import load_data
from datafile_methods.data_io import save_csv
DATA_PATH = '../data/'
PREDICTION_PATH = '../data/predictions/'

### Load data

In [2]:
models = ['baseline', 'co_clustering', 'knn_baseline_i',
          'knn_baseline_u', 'mf_als_recommend',
          'mf_als', 'mf_svd_sci', 'mf_svd_skl', 'nmf',
          'slope_one', 'sur_svd']

n_models = len(models)

# Load predictions
sp_list_te = [load_data('{}model_{}_te.csv'.format(PREDICTION_PATH, e))
         for e in models]
sp_dict_te = dict(zip(models, sp_list_te))

sp_list_val = [load_data('{}model_{}_val.csv'.format(PREDICTION_PATH, e))
         for e in models]
sp_dict_val = dict(zip(models, sp_list_val))

sp_list_sub = [load_data('{}model_{}_sub.csv'.format(PREDICTION_PATH, e))
         for e in models]
sp_dict_sub = dict(zip(models, sp_list_sub))

# Load observed ratings
sp_obs_te = load_data('{}{}.csv'.format(DATA_PATH, 'test_set'))
sp_obs_val = load_data('{}{}.csv'.format(DATA_PATH, 'validation_set'))

### Get weights and a *mean* prediction

#### Use some *random* weights that I like

In [3]:
weights = {'baseline': 0, 'co_clustering': 0, 'knn_baseline_i': 0,
          'knn_baseline_u': 0, 'mf_als_recommend': 4,
          'mf_als': 4, 'mf_svd_sci': 1, 'mf_svd_skl': 1, 'nmf': 0,
          'slope_one': 0, 'sur_svd': 0}

total_weights = np.sum(list(weights.values()))

# Obtain weighted mean predictions
sp_mean_te = np.sum([(weights[m] / total_weights * sp_dict_te[m]) for m in models])
sp_mean_val = np.sum([(weights[m] / total_weights * sp_dict_val[m]) for m in models])
sp_mean_sub = np.sum([(weights[m] / total_weights * sp_dict_sub[m]) for m in models])

### Do some processing

#### Create dataframes

In [5]:
import pandas as pd

In [6]:
# Create dictionaries with predictions
pred_dict_te = {k: sp.find(v)[2] for k,v in sp_dict_te.items()}
pred_dict_val = {k: sp.find(v)[2] for k,v in sp_dict_val.items()}
pred_dict_sub = {k: sp.find(v)[2] for k,v in sp_dict_sub.items()}

# Create dataframes with predictions
df_pred_te = pd.DataFrame(data=pred_dict_te)
df_pred_val = pd.DataFrame(data=pred_dict_val)
df_pred_sub = pd.DataFrame(data=pred_dict_sub)

In [7]:
# Create dataframes with observed values
df_obs_te = pd.DataFrame(data={'Observed': sp.find(sp_obs_te)[2]})
df_obs_val = pd.DataFrame(data={'Observed': sp.find(sp_obs_val)[2]})

In [8]:
# Create dataframes with mean values
df_mean_te = pd.DataFrame(data={'Observed': sp.find(sp_mean_te)[2]})
df_mean_val = pd.DataFrame(data={'Observed': sp.find(sp_mean_val)[2]})

#### See if the predictions can be considered as *decisions* or not

In [9]:
def decide_vote_gen(tol):
    def decide_vote(pred):
        """Defines if a model gets to vote or not"""
        vote = round(pred)
        if (tol > abs(vote - pred)):
            return vote
        else:
            return 0
    return decide_vote

In [10]:
df_votes_te = df_pred_te.applymap(decide_vote_gen(.15))

combined_pred = majority_vote **if** exist_at_least_x_votes **else** weighted_mean