In [1]:
import os,sys
import time,datetime
import GPy
import GPyOpt
import numpy as np
from scipy.stats import bernoulli, norm
import pandas as pd

In [2]:
%run ../tools/TrainTest.py
%run ../tools/PG5GM.py

In [3]:
trial_num = 5
tune_trial_num = 100
snapshot_path = '../result/snapshot/complement'
snapshot_form = 'PG5-hyper5-EI5000w500-split{0}-{1}'
peer_review_csv = '../dataset/open_peer_review/peer_review/peer_review_forPG3_suffled.csv'

# functions
def timeStamp():
    # return time as str
    todaydetail = datetime.datetime.today()
    return todaydetail.strftime("%Y%m%d%H%M%S")

def saveGPyOpt(myBopt,fold_num):
    filename = snapshot_form.format(fold_num,timeStamp())
    path = os.path.join(snapshot_path, filename)
    np.savez_compressed(path, myBopt.X, myBopt.Y)
    print("Save: {}".format(path))

In [4]:
# preprocess train&test dataset
rDF = pd.read_csv(peer_review_csv)
np.random.seed(12345678)
shuffled_pattern = [np.random.permutation(len(rDF)) for i in xrange(trial_num)]
df = rDF.take(shuffled_pattern[0]).reset_index(drop=True)
train, val, test = splitDataset523(df)

In [5]:
pg5gm = PG5GM(train,val,test)
pg5gm.rmseForVal([3.71383869, 1.72362926, 56.91952985, 47.07077222, 8.10820333])

1.1064682289628933

In [6]:
pg5gm.rmseForTest([3.71383869, 1.72362926, 56.91952985, 47.07077222, 8.10820333])

1.0943193506688267

making aucForVal(self,hyper_list)

In [6]:
stan_fit = pg5gm.fit([3.71383869, 1.72362926, 56.91952985, 47.07077222, 8.10820333])

In [7]:
stan_fit

Inference for Stan model: anon_model_949d6f11d22ac6b9a03b43624cbbf326.
4 chains, each with iter=5000; warmup=500; thin=1; 
post-warmup draws per chain=4500, total post-warmup draws=18000.

                   mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
ability[0]         3.62  3.0e-3   0.28   2.95   3.46   3.68   3.84   3.98 8696.0    1.0
ability[1]         3.46  3.5e-3   0.38   2.58   3.22   3.52   3.76   3.97  12003    1.0
ability[2]         3.45  3.7e-3   0.39   2.55   3.21   3.52   3.76   3.98  10645    1.0
ability[3]         3.74  1.9e-3    0.2   3.27   3.62   3.78   3.89   3.99  10485    1.0
ability[4]         3.54  3.3e-3   0.33   2.76   3.34   3.61   3.81   3.98  10249    1.0
ability[5]         3.91  1.0e-3   0.08   3.69   3.88   3.94   3.97    4.0 6496.0    1.0
ability[6]         3.49  3.3e-3   0.35   2.68   3.27   3.55   3.77   3.98  11374    1.0
ability[7]         3.42  3.9e-3    0.4   2.51   3.18   3.48   3.73   3.97  10554    1.0
ability[8]         

In [8]:
eap_value = stan_fit.summary()['summary'][:,0]

In [9]:
split = pg5gm.userNum
ability = eap_value[0:split]
reliability = eap_value[split:split*2]
bias = eap_value[split*2:split*3]

In [22]:
ability

array([ 3.62184102,  3.45621567,  3.44949251,  3.73736962,  3.54376429,
        3.91343254,  3.48986919,  3.41898763,  3.18104855,  3.47346423,
        3.45054402,  3.38197486,  2.98079551,  3.37411407,  3.87133937,
        3.44031434,  3.34278649,  3.42054047,  3.37226197,  3.06486193,
        3.39475962,  3.54867246,  3.40032532,  3.49484047,  3.45204906,
        3.50938226,  3.45988857,  3.41238876,  3.41073976,  3.46307588,
        3.07342895,  2.9543905 ,  3.5563674 ,  3.4096872 ,  3.94131566,
        3.3945811 ,  3.45975449,  3.461859  ,  3.28553227,  3.62224335,
        3.41171481,  3.35152065,  3.38942583,  3.27311444,  2.86331082,
        3.67443364,  3.30626198,  3.78198215,  3.48471329,  3.41493155,
        3.46835796,  3.66666177,  3.8603967 ,  3.32509119,  3.46013013,
        3.60837156,  3.41235252,  3.93594396,  2.67413483,  3.52399762,
        3.8817132 ,  3.57271476,  3.84051618,  3.52270147,  3.45578277,
        3.36720528,  3.23854573,  3.6331258 ,  3.48032247,  3.46

In [11]:
sender = pg5gm.senderVal
receiver = pg5gm.receiverVal

In [12]:
sender[0], receiver[0]

(96, 12)

In [13]:
val

Unnamed: 0,sender_id,receiver_id,corrected,value
0,96,12,1,3
1,247,151,0,3
2,30,192,0,3
3,21,67,0,4
4,3,227,1,3
5,57,34,0,4
6,165,200,1,1
7,170,140,1,3
8,247,376,1,3
9,34,269,1,1


In [14]:
sender

array([ 96, 247,  30, ..., 123,  34, 101])

In [15]:
valueEst = ability[receiver] + bias[sender]

In [16]:
from sklearn.metrics import mean_squared_error
from math import sqrt

rmse = sqrt(mean_squared_error(pg5gm.valueVal, valueEst))

In [20]:
rmse

1.1063027304840651

In [19]:
len(pg5gm.valueVal), len(valueEst)

(1050, 1050)