In [73]:
import pandas as pd
import numpy as np
import math


data_mape = pd.ExcelFile('MAPE_dataset.xlsx').parse('Sheet1')
data_mape = data_mape.set_index(['Function', 'Epochs', 'Neurons'])
data_pred = pd.ExcelFile('Predictions_dataset.xlsx').parse('Sheet1')
data_pred = data_pred.set_index(['Function', 'Epochs', 'Neurons'])
indexes = ['Propin1', 'SBER1', 'NR1', 'AMAWproc', 'AAPENproc', 'AMINCproc',	
           'DJNU', 'REINB', 'Pmproc', 'LPOORproc', 'LPOOR', 'KBIRTS', 'DEATHS', 'KDEATHS', 
           'ND', 'KND', 'MIGRAOUT', 'SMIGRA', 'EAP', 'EMECproc', 'UNEM', 'UNEMproc', 'UNEMREG']
Y_real = [4.8, 10, 3.4, 114.3065426, 105.6,	122.6, 0.406, 14.4, 97.61330346, 19.9, 13, 9.8, 668.6, 14.5, -301.6, -4.7, 135902, 24843, 75184, 93.9, 4581, 6.1, 2773]

N = data_mape.shape[0] # amount of respondents (models)
M = data_mape.shape[1] # amount of tasks (indicators)


# i for model, j for indicator

def birnbaum_three_param(i, j):
  return ((exp(alpha_diff[j]*(theta[i] - delta[j])))/(1 + exp(alpha_diff[j]*(theta[i] - delta[j]))) * (1 - C_guess[j])) + C_guess[j]

def birnbaum_two_param(i, j):
  return ((exp(alpha_diff[j]*(theta[i] - delta[j])))/(1 + exp(alpha_diff[j]*(theta[i] - delta[j]))))

def rasch(i, j):
  return ((math.exp(1.7*(theta[i] - delta[j])))/(1 + math.exp(1.7*(theta[i] - delta[j]))))


def accuracy(x):
    return (1-x)


def delta_norm(x):
    for i in x.columns:
        max_val_delta = x[i].max()
        min_val_delta = x[i].min()
        x[i] = ((x[i]-min_val_delta)/(max_val_delta-min_val_delta))
    return x

def theta_norm(x):
    y = x.transpose()
    for i in y.columns:
        max_val_theta = y[i].max()
        min_val_theta = y[i].min()
        y[i] = ((y[i]-min_val_theta)/(max_val_theta-min_val_theta))
    return y.transpose()  

def delta_calc(x):
    delta = []
    for i in x.columns:
        T = x[i].sum() 
        qqq = np.log((N-T)/T)
        delta.append(qqq)
    return delta

def theta_calc(x):
    theta = []
    y = x.transpose()
    for i in y.columns:
        K = y[i].sum()
        qqq = np.log(K/(M-K))
        theta.append(qqq)
    return theta

In [60]:
data_acc = accuracy(data_mape) # accuracy = 1-MAPE 
data_acc_copy1 = accuracy(data_mape) # otherwise data_acc changes
data_acc_copy2 = accuracy(data_mape) # otherwise data_acc changes


delta = delta_calc(delta_norm(data_acc_copy1))
theta = theta_calc(theta_norm(data_acc_copy2))

In [61]:
prob = pd.DataFrame(index = delta_calc(delta_norm(data_acc_copy2)), columns = theta_calc(theta_norm(data_acc_copy1))) # create dataframe for probabilities


for j in range(0, len(delta)):                 # fill with prob
    for i in range(0, len(theta)):
        prob.iloc[j][i] = rasch(i, j)
prob

Unnamed: 0,0.284465,0.262233,0.166490,0.735901,0.296290,0.517756,0.437928,0.415545,0.151869,0.391786,...,0.393094,0.364171,0.209432,0.313785,0.718335,0.300126,0.930408,0.532115,0.265349,0.548409
-1.063186,0.991279,0.978943,0.978817,0.974509,0.98119,0.969282,0.958919,0.99182,0.980244,0.974556,...,0.945659,0.954129,0.960923,0.983273,0.966964,0.962407,0.946852,0.955403,0.97717,0.978753
-0.98904,0.998711,0.996853,0.996834,0.996176,0.997195,0.995371,0.993752,0.998791,0.997051,0.996183,...,0.991638,0.992994,0.994068,0.99751,0.995011,0.9943,0.99183,0.993196,0.996583,0.996824
-0.43688,0.987109,0.969058,0.968875,0.962622,0.972331,0.95507,0.940208,0.987906,0.970952,0.96269,...,0.921404,0.933389,0.943071,0.97537,0.951733,0.945194,0.923086,0.935199,0.966481,0.968782
-0.913851,0.992674,0.982275,0.982169,0.978527,0.984173,0.974102,0.965307,0.993129,0.983374,0.978567,...,0.954011,0.961232,0.967011,0.98593,0.972138,0.968271,0.955029,0.962316,0.980777,0.982115
-1.633135,0.993196,0.983526,0.983427,0.980037,0.985292,0.975916,0.967716,0.993619,0.984549,0.980074,...,0.957169,0.963914,0.969305,0.986926,0.974085,0.97048,0.95812,0.964925,0.982132,0.983377
-1.018191,0.993193,0.98352,0.983421,0.98003,0.985286,0.975907,0.967705,0.993616,0.984543,0.980067,...,0.957154,0.963901,0.969294,0.986922,0.974076,0.97047,0.958106,0.964913,0.982125,0.983371
-2.221674,0.993218,0.98358,0.983481,0.980102,0.98534,0.975994,0.96782,0.99364,0.984599,0.980139,...,0.957305,0.964029,0.969404,0.986969,0.974169,0.970575,0.958253,0.965037,0.98219,0.983431
-0.327674,0.965047,0.918651,0.918195,0.90278,0.926852,0.884588,0.850073,0.967162,0.923387,0.902946,...,0.808689,0.834779,0.856593,0.934551,0.876692,0.861466,0.812291,0.838806,0.912255,0.917963
-1.289776,0.988716,0.972854,0.972693,0.96718,0.975735,0.960512,0.94735,0.989415,0.974522,0.96724,...,0.930627,0.941294,0.94989,0.978409,0.957561,0.951771,0.932126,0.942903,0.970583,0.97261
-1.091628,0.995298,0.988583,0.988514,0.98615,0.989812,0.983269,0.977515,0.995591,0.989295,0.986175,...,0.97007,0.974836,0.978632,0.990949,0.981986,0.979458,0.970743,0.97555,0.987611,0.988479


In [68]:
def bagging(indicator):
    pred_list = list(data_pred[indicator])
    def weights_calc(indicator):
        row_num = indexes.index(indicator)
        row_sum = prob.iloc[row_num].sum()
        weights = list(prob.iloc[row_num]/row_sum) # calc weights for bagging
        return weights
    return sum([a*b for a,b in zip(weights_calc(indicator),pred_list)])
    

In [104]:
mape_bagging_list = []
for i in indexes:
    a = bagging(i)
    b = Y_real[indexes.index(i)]
    mape_bagging = abs((b-a)/b)
    mape_bagging_list.append(mape_bagging)
avg_bagging_mape = sum(mape_bagging_list)/len(mape_bagging_list) # avg mape of bagging
print('MAPE after bagging:', avg_bagging_mape)

temp = data_mape.mean()
avg_mape = temp.sum()/len(indexes) # avg mape of single configurations
print('MAPE before:', avg_mape)

increase = 100 - (avg_bagging_mape*100/avg_mape)
print('Bagging increased accuracy on: ', round(increase, 2), '%')

    

MAPE after bagging: 0.23970946302368845
MAPE before: 0.3008804730747607
Bagging increased accuracy on:  20.33 %


In [87]:
prob.to_excel('probs.xlsx')