## Demo for Strata Conf: Running Log. Reg. with Morpheus on MovieLens1M 3-table dataset:

### Load requisite libraries, source files, and base table data files.

In [2]:
library('Matrix');
library('matrixStats');
library(data.table);
source('src/NormalMatrix.r');
source('src/MLalgorithms/LogisticRegression.r');

S = matrix(0,0,0);
Ydir = 'Data/MovieLens1M/MLY.csv';

FK1dir = 'Data/MovieLens1M/MLFK1.csv';
R1dir = 'Data/MovieLens1M/MLR1Sparse.txt';

FK2dir = 'Data/MovieLens1M/MLFK2.csv';
R2dir = 'Data/MovieLens1M/MLR2Sparse.txt';

Ytest = as.matrix((fread(Ydir)));
#Convert 7-class target into a binary target
Y = (Ytest > 2.5)+0;

JSet1 = as.matrix(read.table(FK1dir,header=TRUE)); 
nS = nrow(JSet1);
nR1 = max(JSet1);
FK1 = sparseMatrix(i=c(1:nS),j=JSet1,x=1,dims=c(nS,nR1));
R1S = readMM(file=R1dir)+0;

JSet2 = as.matrix(read.table(FK2dir,header=TRUE)); 
nS = nrow(JSet2);
nR2 = max(JSet2);
FK2 = sparseMatrix(i=c(1:nS),j=JSet2,x=1,dims=c(nS,nR2));
R2S = readMM(file=R2dir)+0;

print("Loaded data");

[1] "Loaded data"


### Create regular single table (Materialized) matrix.

In [3]:
t0 = Sys.time();
T = cbind(FK1 %*% R1S, FK2 %*% R2S);
timetaken = Sys.time()-t0;
print("Created Materialized matrix");
print(timetaken);

[1] "Created Materialized matrix"
Time difference of 2.490081 secs


### Create Normalized Matrix for Morpheus.

In [4]:
t0 = Sys.time();
TNM = NormalMatrix(EntTable = list(S),
                   AttTables = list(R1S,R2S),
                   KFKDs = list(FK1,FK2),
                   Sparse = TRUE);
timetaken = Sys.time()-t0;
print("Created Normalized matrix");
print(timetaken);

[1] "Created Normalized matrix"
Time difference of 0.003711939 secs


### Initialize Logistic Regression weights and hyper-parameters.

In [5]:
winit = Matrix(rnorm(ncol(T),2),ncol(T),1,sparse=TRUE);
gamma0 = 0.01; # L2 regularizer
Max_Iter = 5; # maximum number of iterations/epochs

### Run Logistic Regression with regular Materialized matrix.

In [6]:
print("Materialized execution started");
t0 = Sys.time();
weightsMat = LogisticRegression(T,Max_Iter, winit, gamma0, Y);
timemat = Sys.time()-t0;
print("Materialized execution done");
print(timemat);

[1] "Materialized execution started"
[1] "Materialized execution done"
Time difference of 10.4957 secs


### Run Logistic Regression on Morpheus with Normalized matrix.

In [7]:
print("Morpheus execution started");
t0 = Sys.time();
weightsMorph = LogisticRegression(TNM,Max_Iter, winit, gamma0, Y);
timemorph = Sys.time()-t0;
print("Morpheus execution done");
print(timemorph);
print("Runtime Speedup of Morpheus over Materialized:");
print(as.numeric(timemat) / as.numeric(timemorph));

[1] "Morpheus execution started"
[1] "Morpheus execution done"
Time difference of 0.9491551 secs
[1] "Runtime Speedup of Morpheus over Materialized:"
[1] 11.05794


### Check if both approaches return same weights.

In [8]:
print(weightsMat[[1]][1:6]);
print(weightsMorph[[1]][1:6]);

[1] 3.0090147 2.5521603 1.4173721 2.6236962 1.2025118 0.9236351
[1] 3.0090147 2.5521603 1.4173721 2.6236962 1.2025118 0.9236351


In [9]:
Result_eps = 1e-6; # weight difference tolerance
if(norm(as.matrix(weightsMat[[1]] - weightsMorph[[1]]),'F')<Result_eps*(1+norm(as.matrix(weightsMat[[1]]),'F')) )
{
	print("Success: Both approches return same weights.");
}else
{
	print("Failed: The approches return different weights.");
}

[1] "Success: Both approches return same weights."
