In [1]:
import os,sys
import time,datetime
import GPy
import GPyOpt
import numpy as np
from scipy.stats import bernoulli, norm
import pandas as pd

In [2]:
%run ../tools/TrainTest.py
%run ../tools/PG3GM.py

In [3]:
trial_num = 5
tune_trial_num = 100
snapshot_path = '../result/snapshot/complement'
snapshot_form = 'PG3-hyper5-EI5000w500-split{0}-{1}'
peer_review_csv = '../dataset/open_peer_review/peer_review/peer_review_forPG3_suffled.csv'

# functions
def timeStamp():
    # return time as str
    todaydetail = datetime.datetime.today()
    return todaydetail.strftime("%Y%m%d%H%M%S")

def saveGPyOpt(myBopt,fold_num):
    filename = snapshot_form.format(fold_num,timeStamp())
    path = os.path.join(snapshot_path, filename)
    np.savez_compressed(path, myBopt.X, myBopt.Y)
    print("Save: {}".format(path))

In [4]:
# preprocess train&test dataset
rDF = pd.read_csv(peer_review_csv)
np.random.seed(12345678)
shuffled_pattern = [np.random.permutation(len(rDF)) for i in xrange(trial_num)]
df = rDF.take(shuffled_pattern[0]).reset_index(drop=True)
train, val, test = splitDataset523(df)

In [7]:
pg3gm = PG3GM(train,val,test)
pg3gm.rmseForVal([2.9518053, 42.05718364, 60.99846285, 3.22677753, 32.73395754])

0.7544428402007262

In [8]:
pg3gm.rmseForTest([2.9518053, 42.05718364, 60.99846285, 3.22677753, 32.73395754])

0.7799690515012381

making aucForVal(self,hyper_list)

In [8]:
stan_fit = pg3gm.fit([2.9518053, 42.05718364, 60.99846285, 3.22677753, 32.73395754])

In [9]:
stan_fit

Inference for Stan model: anon_model_7913f7a02dc7f40d418d13943b0ae329.
4 chains, each with iter=5000; warmup=500; thin=1; 
post-warmup draws per chain=4500, total post-warmup draws=18000.

                   mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
ability[0]         3.12  5.2e-5 7.0e-3    3.1   3.11   3.12   3.12   3.13  18000    1.0
ability[1]         2.95  1.8e-4   0.02   2.91   2.94   2.95   2.97    3.0  18000    1.0
ability[2]         3.49  9.3e-5   0.01   3.46   3.48   3.49   3.49   3.51  18000    1.0
ability[3]         2.62  2.8e-5 3.7e-3   2.61   2.62   2.62   2.62   2.63  18000    1.0
ability[4]         2.57  1.5e-4   0.02   2.53   2.55   2.57   2.58   2.61  18000    1.0
ability[5]         2.84  5.4e-5 7.2e-3   2.82   2.83   2.84   2.84   2.85  18000    1.0
ability[6]         3.31  7.2e-5 9.6e-3   3.29    3.3   3.31   3.31   3.33  18000    1.0
ability[7]         2.95  1.8e-4   0.02    2.9   2.94   2.95   2.97    3.0  18000    1.0
ability[8]         

In [10]:
eap_value = stan_fit.summary()['summary'][:,0]

In [11]:
split = pg3gm.userNum
ability = eap_value[0:split]
bias = eap_value[split:split*2]
reliability = eap_value[split*2:split*3]

In [15]:
reliability

array([ 71.05235573,  70.52266305,  72.24612662,  69.45765506,
        69.28456591,  70.15937659,  71.67176847,  70.52297272,
        70.45815295,  72.3076766 ,  69.32715482,  68.76152077,
        68.73964145,  67.07797843,  70.86642001,  71.75649705,
        70.56120168,  70.52362411,  67.84497894,  69.60305027,
        70.27325165,  70.97798717,  69.13409159,  72.11292681,
        70.54208209,  71.14640439,  70.52283914,  68.37092019,
        71.11664738,  70.52237191,  68.70762823,  70.27135142,
        71.50726263,  68.30089609,  70.53621008,  71.59655042,
        71.57898151,  69.04095973,  68.20310122,  71.70859738,
        70.57460205,  70.32103287,  70.13456033,  67.94297481,
        68.06793624,  69.04258753,  69.8377489 ,  71.08486423,
        71.4301897 ,  70.52274507,  69.2593073 ,  69.43977419,
        66.78239394,  70.02952595,  73.60127623,  69.49074489,
        70.52369043,  68.1455299 ,  69.30477242,  69.00736727,
        70.05075168,  70.13891244,  65.7341886 ,  70.20

In [16]:
sender = pg3gm.senderVal
receiver = pg3gm.receiverVal

In [17]:
sender[0], receiver[0]

(96, 12)

In [18]:
val

Unnamed: 0,sender_id,receiver_id,corrected,value
0,96,12,1,3
1,247,151,0,3
2,30,192,0,3
3,21,67,0,4
4,3,227,1,3
5,57,34,0,4
6,165,200,1,1
7,170,140,1,3
8,247,376,1,3
9,34,269,1,1


In [19]:
sender

array([ 96, 247,  30, ..., 123,  34, 101])

In [20]:
valueEst = ability[receiver] + bias[sender]

In [21]:
from sklearn.metrics import mean_squared_error
from math import sqrt

rmse = sqrt(mean_squared_error(pg3gm.valueVal, valueEst))

In [22]:
rmse

0.7544427007358172

In [23]:
len(pg3gm.valueVal), len(valueEst)

(1050, 1050)