forked from giorgiop/almostnolabel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo.R
102 lines (81 loc) · 3.4 KB
/
demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
library(doMC)
registerDoMC(cores=2)
library(ggplot2)
library(data.table)
library(digest)
library(pROC)
current_path = "paht/to/files"
source(file=paste(current_path,"auc.R", sep=""))
source(file=paste(current_path,"mean.map.R", sep=""))
source(file=paste(current_path,"laplacian.mean.map.R", sep=""))
source(file=paste(current_path,"alternating.mean.map.R", sep=""))
source(file=paste(current_path,"logistic.regression.R", sep=""))
#Hyperparameters
lambda <- 10
gamma <- .01
sigma <- 1
set.seed(123456)
#The dataset is provided in the form of (label, bag, features)
#with the label being still the binary labels.
#This is useful in case of testing the model
load(file=paste(current_path,"demo.heart.R", sep="")) #load heart.data
heart.data <- heart.data[sample(nrow(heart.data)),] #shuffle
test.fold <- 1:(floor(nrow(heart.data)/5))
testset <- heart.data[test.fold,]
trainset <- heart.data[-test.fold,]
N <- length(unique(trainset$bag)) #count the bags into the trainset
#Logistic regression - the labels are the binary ones, not proportions - Oracle
trainset <- trainset <- heart.data[-test.fold,]
w.lr <- logistic.regression(trainset,lambda)
test.X <- as.matrix(testset[,-c(1,2)])
test.pred <- 1/(1+exp(-2*test.X %*% w.lr))
test.auc <- auc((testset$label+1)/2, test.pred)
print(test.auc)
#Cast label in proportions
for (bag in unique(trainset$bag)) {
id <- which(trainset$bag==bag)
trainset$label[id] <- rep(mean((trainset$label[id]+1)/2), length(id))
}
#Mean Map
w.mm <- mean.map(trainset,lambda)
test.X <- as.matrix(testset[,-c(1,2)])
test.pred <- 1/(1+exp(-2*test.X %*% w.mm))
test.auc <- auc((testset$label+1)/2, test.pred)
print(test.auc)
#Laplacian Mean Map with similarity v^G
#Functions for building the Laplacian matrix are into laplacian.mean.map.R
L <- laplacian(similarity="G,s", trainset, N, sigma=1)
w.lmm <- laplacian.mean.map(trainset, lambda, gamma, L = L)
test.X <- as.matrix(testset[,-c(1,2)])
test.pred <- 1/(1+exp(-2*test.X %*% w.lmm))
test.auc <- auc((testset$label+1)/2, test.pred)
print(test.auc)
#Laplacian Mean Map with similarity v^{G,s}
L <- laplacian(similarity="G,s", trainset, N, sigma=sigma)
w.lmm <- laplacian.mean.map(trainset, lambda, gamma, L = L)
test.X <- as.matrix(testset[,-c(1,2)])
test.pred <- 1/(1+exp(-2*test.X %*% w.lmm))
test.auc <- auc((testset$label+1)/2, test.pred)
print(test.auc)
#Laplacian Mean Map with similarity v^NC
L <- laplacian(similarity="NC", trainset, N)
w.lmm <- laplacian.mean.map(trainset, lambda, gamma, L = L)
test.X <- as.matrix(testset[,-c(1,2)])
test.pred <- 1/(1+exp(-2*test.X %*% w.lmm))
test.auc <- auc((testset$label+1)/2, test.pred)
print(test.auc)
#Alternating Mean Map started with MM
w.amm <- alternating.mean.map(trainset, lambda=lambda, init="MM")
w.amm <- w.amm$theta #the algorithm returns a structure that contains also the number of step until termination
test.X <- as.matrix(testset[,-c(1,2)])
test.pred <- 1/(1+exp(-2*test.X %*% w.amm))
test.auc <- auc((testset$label+1)/2, test.pred)
print(test.auc)
#Alternating Mean Map started with LMM with similarity v^{G,s}
L <- laplacian(similarity="G,s", trainset, N, sigma=10)
w.amm <- alternating.mean.map(trainset, lambda=lambda, init="LMM", L=L, gamma=gamma)
w.amm <- w.amm$theta #the algorithm returns a structure that contains also the number of step until termination
test.X <- as.matrix(testset[,-c(1,2)])
test.pred <- 1/(1+exp(-2*test.X %*% w.amm))
test.auc <- auc((testset$label+1)/2, test.pred)
print(test.auc)