In [1]:
library(devtools)
library(glmSLOPE)
library(MLmetrics)


Attaching package: ‘MLmetrics’

The following object is masked from ‘package:base’:

    Recall



In [2]:
X_train <- as.matrix(read.csv(file="./data/microarray_train.csv", header=TRUE, sep=","))
y_train <- as.vector(read.csv(file="./data/labels_train.csv", header=TRUE, sep=","))
X_test <- as.matrix(read.csv(file="./data/microarray_test.csv", header=TRUE, sep=","))
y_test <- as.vector(read.csv(file="./data/labels_test.csv", header=TRUE, sep=","))

In [3]:
n <- dim(X_train)[1]
p <- dim(X_train)[2]
print(paste(n, p))

[1] "726 12179"


In [7]:
lambda <- 0.5*qnorm(1-seq(1:p)*0.1/p)*sqrt(n);
y_train_new = rep(y_train, times=1)
y_train_new[y_train == 0] = -1
obj <- solve_slope(X_train, y_train_new, lambda=lambda, model='logistic');

In [8]:
print(Sys.time())
# around one hour of calculations - not very efficient - also quite hard to tune lambdas 

[1] "2019-05-16 14:38:38 CEST"


In [9]:
weights <- obj$w

#logistic regression
predict_proba <- function(weights, X) {
    as.vector(plogis(X %*% matrix(weights, nrow=length(weights), ncol=1)))
}

predict <- function(weights, X) {
    probs <- predict_proba(weights, X)
    round(probs)
}

In [10]:
predicted_probs_train <- predict(weights, X_train)
predicted_probs_test <- predict(weights, X_test)

predicted_train <- round(predicted_probs_train)
predicted_test <- round(predicted_probs_test)

auc_train = AUC(predicted_probs_train, y_train)
auc_test = AUC(predicted_probs_test, y_test)

acc_train = Accuracy(predicted_train, y_train)
acc_test = Accuracy(predicted_test, y_test)

print(paste("AUC on train set", auc_train, "Accuracy on train set", acc_train))
print(paste("AUC on test set", auc_test, "Accuracy on test set", acc_test))

[1] "AUC on train set 0.640664961636829 Accuracy on train set 0.68732782369146"
[1] "AUC on test set 0.634788323187604 Accuracy on test set 0.666666666666667"
