In [34]:
from sklearn import metrics
import numpy as np
import pandas as pd
from scipy import stats as st
import torch
dtype=torch.float

In [108]:
# re-construct the parameters of the training and test data set
num_features = 50
num_users = 40
num_observations = 40
flag = "bcd"

alpha_b = 1
np.random.seed(1)  # this is fixed
ind = 10 # only keep 10 non-zero effects
intersect = 5
bu = np.random.uniform(1,0,size=ind)
bi = np.random.uniform(1,0,size=ind)
bu = np.append(bu,np.zeros(num_features - ind))
if num_features > intersect + ind:
    bi = np.append(np.zeros(intersect),bi)
    bi = np.append(bi,np.zeros(num_features - intersect - ind))
else:
    bi = np.append(np.zeros(intersect),bi)[:num_features]
halfInd = int(ind/2)
bu[ind:] = 0
bi[:halfInd] = 0
bi[(halfInd + ind):] = 0
fixedEff = [1,2,3,-1,-2,-3,7,10]
fixedEff = np.append(fixedEff,np.zeros([num_features - len(fixedEff)]))

# get theta
thetau = np.zeros([num_users,num_features])
for i in range(num_features):
    if bu[i] > 0:
        thetau[:,i] = st.laplace.rvs(loc = 0,scale = bu[i],size=num_users)
thetai = np.zeros([num_observations,num_features])
for i in range(num_features):
    if bi[i] > 0:
        thetai[:,i] = st.laplace.rvs(loc = 0,scale = bi[i],size=num_observations)
    
def getDataset(dataFr):
    features = list(dataFr.iloc[:,np.arange(num_features)].values)
    data = pd.DataFrame()
    data['feature'] = features
    data['uid'] = dataFr['uid']
    data['iid'] = dataFr['iid'] + num_users
    data['rating'] = dataFr['label']
    return data

In [107]:
%run "MixFM.ipynb"
filePath = ''
trainFname = filePath + 'train.csv'
testFname = filePath + 'test.csv'
dataFr = pd.read_csv(trainFname)
testFr = pd.read_csv(testFname)
data = getDataset(dataFr)
test = getDataset(testFr)

mixfmModel = MixFM(num_users, num_observations, num_features,verbose = True, iterations = 50,alpha = 18 / data.rating.var(),epsilon = 1e-2)
w = mixfmModel.fit(data,test)
_,loss, r = mixfmModel.evaluate(test)

initalization ...
Iter    TrLL    TeLL   TrRMSE   TeRMSE   TrR2   TeR2
0    -7.9133    -3.0767    14.4877    14.1848    -0.0003    -0.0038
1    -3.5479    -4.0033    2.1777    12.7375    0.9774    0.1906
2    -3.2802    -3.6361    1.4905    11.7777    0.9894    0.3079
3    -3.1437    -3.3969    1.4762    11.1430    0.9896    0.3805
4    -3.0563    -3.2182    1.4654    10.5473    0.9898    0.4450
5    -2.9999    -3.0962    1.4438    10.0896    0.9901    0.4921
finish initialization... 0.4793263470486332
Iter    TrLL    TeLL   TrRMSE   TeRMSE   TrR2   TeR2
0    -0.8830    -4.4570    1.4438    10.0896    0.9901    0.4921
1    -0.3173    -3.6972    1.3743    9.3439    0.9910    0.5644
2    0.6535    -1.6858    1.5007    8.7767    0.9893    0.6157
3    3.2402    1.9258    1.7028    8.0936    0.9862    0.6732
4    6.5577    5.9573    1.9060    7.2932    0.9827    0.7346
5    9.7622    9.6751    2.1266    6.4105    0.9784    0.7950
6    12.2294    12.5078    2.3353    5.3350    0.9740    0.85

In [110]:
# the r2 score
print('r2')
print(r)

# estimated bu
estbu = np.round(mixfmModel.b_u,5)
msebu = np.mean(np.square(estbu[:ind] - bu[:ind]))
print('mse of bu')
print(msebu)

# estimated bi
estbi = np.round(mixfmModel.b_i,5)
msebi = np.mean(np.square(estbi[halfInd:(halfInd + ind)] - bi[halfInd:(halfInd + ind)]))
print('mse of bi')
print(msebi)

# estimated fixed effects
estbeta = np.round(mixfmModel.fixedEffects(),5)
msebeta = np.mean(np.square(estbeta - fixedEff))
print('mse of beta')
print(msebeta)

fp = []
fn = []
hitfp = 0
hitfn = 0
for j in range(len(estbeta)):
    if estbeta[j] != 0 and fixedEff[j] == 0:
        hitfp += 1
    if estbeta[j] == 0 and fixedEff[j] != 0:
        hitfn += 1

print('fp')
print(hitfp)
print('fn')
print(hitfn)

r2
0.9296130742140418
mse of bu
0.054655607742709646
mse of bi
0.24406359056097907
mse of beta
0.040809518968
fp
2
fn
0
