In [1]:
import os,sys
import time,datetime
import GPy
import GPyOpt
import numpy as np
from scipy.stats import bernoulli, norm
import pandas as pd

In [2]:
%run ../tools/TrainTest.py
%run ../tools/PG4GM.py

In [3]:
trial_num = 5
tune_trial_num = 100
snapshot_path = '../result/snapshot/complement'
snapshot_form = 'PG4-hyper4-EI5000w500-split{0}-{1}'
peer_review_csv = '../dataset/open_peer_review/peer_review/peer_review_forPG3_suffled.csv'

# functions
def timeStamp():
    # return time as str
    todaydetail = datetime.datetime.today()
    return todaydetail.strftime("%Y%m%d%H%M%S")

def saveGPyOpt(myBopt,fold_num):
    filename = snapshot_form.format(fold_num,timeStamp())
    path = os.path.join(snapshot_path, filename)
    np.savez_compressed(path, myBopt.X, myBopt.Y)
    print("Save: {}".format(path))

In [4]:
# preprocess train&test dataset
rDF = pd.read_csv(peer_review_csv)
np.random.seed(12345678)
shuffled_pattern = [np.random.permutation(len(rDF)) for i in xrange(trial_num)]
df = rDF.take(shuffled_pattern[0]).reset_index(drop=True)
train, val, test = splitDataset523(df)

In [5]:
pg4gm = PG4GM(train,val,test)
pg4gm.rmseForVal([2.30348081, 4.4241827, 57.07851846, 88.98764654])

0.9463707444018544

In [6]:
pg4gm.rmseForTest([2.30348081, 4.4241827, 57.07851846, 88.98764654])

0.929336080216673

making aucForVal(self,hyper_list)

In [7]:
stan_fit = pg4gm.fit([2.30348081, 4.4241827, 57.07851846, 88.98764654])

In [8]:
stan_fit

Inference for Stan model: anon_model_2e3cad05ee880fcb1201e10952e4871a.
4 chains, each with iter=5000; warmup=500; thin=1; 
post-warmup draws per chain=4500, total post-warmup draws=18000.

                   mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
ability[0]         2.46  1.5e-3   0.21   2.05   2.32   2.46   2.59   2.86  18000    1.0
ability[1]         2.33  1.7e-3   0.23   1.88   2.17   2.32   2.48   2.77  18000    1.0
ability[2]         2.32  1.7e-3   0.22   1.88   2.17   2.32   2.47   2.76  18000    1.0
ability[3]         2.54  1.2e-3   0.17   2.21   2.43   2.54   2.65   2.87  18000    1.0
ability[4]         2.36  1.7e-3   0.23   1.92   2.21   2.36   2.51   2.81  18000    1.0
ability[5]         2.53  1.5e-3    0.2   2.13   2.39   2.53   2.67   2.92  18000    1.0
ability[6]         2.39  1.6e-3   0.21   1.96   2.24   2.39   2.53   2.81  18000    1.0
ability[7]          2.3  1.7e-3   0.22   1.86   2.16    2.3   2.46   2.74  18000    1.0
ability[8]         

In [9]:
eap_value = stan_fit.summary()['summary'][:,0]

In [11]:
split = pg4gm.userNum
ability = eap_value[0:split]
reliability = eap_value[split:split*2]
bias = eap_value[split*2:split*3]

In [12]:
reliability

array([ 0.18891036,  0.05844985,  0.04031271,  0.4531506 ,  0.10993906,
        0.60083213,  0.07674902,  0.04010513,  0.04714963,  0.04275963,
        0.05839596,  0.04033478,  0.05920047,  0.0401176 ,  0.60214493,
        0.05871203,  0.0455751 ,  0.04028224,  0.04037629,  0.21034326,
        0.04196154,  0.12739254,  0.04035267,  0.05952966,  0.0591084 ,
        0.14591395,  0.05796801,  0.04056443,  0.04096329,  0.05794902,
        0.40285495,  0.04439724,  0.07720661,  0.04039263,  0.80141208,
        0.04070345,  0.04039649,  0.07538459,  0.05682247,  0.29700886,
        0.04033762,  0.04155872,  0.04038425,  0.03959435,  0.09038882,
        0.21282246,  0.04064043,  0.31552868,  0.07613843,  0.04042161,
        0.05840797,  0.34737582,  0.49462157,  0.09532646,  0.05838473,
        0.14580775,  0.04071741,  0.95175126,  0.06351004,  0.11063385,
        0.44848083,  0.23500023,  0.58353709,  0.14513725,  0.04121808,
        0.03999504,  0.15370019,  0.08003271,  0.05820537,  0.04

In [13]:
sender = pg4gm.senderVal
receiver = pg4gm.receiverVal

In [14]:
sender[0], receiver[0]

(96, 12)

In [15]:
val

Unnamed: 0,sender_id,receiver_id,corrected,value
0,96,12,1,3
1,247,151,0,3
2,30,192,0,3
3,21,67,0,4
4,3,227,1,3
5,57,34,0,4
6,165,200,1,1
7,170,140,1,3
8,247,376,1,3
9,34,269,1,1


In [16]:
sender

array([ 96, 247,  30, ..., 123,  34, 101])

In [17]:
valueEst = ability[receiver] + bias[sender]

In [19]:
from sklearn.metrics import mean_squared_error
from math import sqrt

rmse = sqrt(mean_squared_error(pg4gm.valueVal, valueEst))

In [20]:
rmse

0.9463947133981274

In [21]:
len(pg4gm.valueVal), len(valueEst)

(1050, 1050)