config.properties

# -------------- GENERAL SETTINGS -------------- #
# ---------------------------------------------- #

#1->sprank
#2->graph kernel model-based
#3->graph kernel heuristic
recommendationAlgorithm = 1

# extraction metadata from a graph knowledge by means of SPARQL queries
# if true, it enables DATA EXTRACTION phase
dataExtraction=true

# SPrank: itemPathExtraction and userPathExtraction produce the input files for SPRank algorithm
itemPathExtraction=true
userPathExtraction=true

# itemGraphEmbedding phase produces the input files for the graph kernel algorithms
itemGraphEmbedding=false

# enable computation of recommendations
computeRecommendation=true

# evaluate accuracy of recommendations
evaluation=false

# evaluate all the recommendations files in a specified directory
evaluationDir=false

# main working directory where intermediate files will be located
workingDir=temp/

# number of threads for multi-threading
nThreads=4

# if true, ratings are implicit (0 and 1)
implicit=false

# training file (required format tsv)
inputTrainRatingFile=movielens/TrainSetML_80_20

# validation file for learning (required format tsv)
inputValidationRatingFile=movielens/ValidationSetML_80_20

# test file for evaluation (required format tsv)
inputTestRatingFile=movielens/TestSetML_80_20_reduced_min50


# --------------- DATA EXTRACTION -------------- #
# ---------------------------------------------- #

# set jenatdb=false to query remote endpoint
# set jenatdb=true to query local dataset
jenatdb=false

# if jenatdb=true set local dataset parameters
localDatasetFile=dump.nt
tdbDirectory=target/TDBgraph kernel

# if jenatdb=false set endpoint parameters
endpoint=http://live.dbpedia.org/sparql
graphURI=http://dbpedia.org
 
# input/output
inputItemURIsFile=movielens/MovielensURIs_DBpedia39_reduced
propertiesFile=movielens/props.xml

# set directed=true to consider directed property
directed=true
# output file in data extraction and input file in user paths/paths extractions 
itemsMetadataFile=metadata
outputTextFormat=true
outputBinaryFormat=true
# set append=true to continue a previous extraction
append=false
# set caching=true to enable caching (requires a lot of memory) 
caching=false

# ------------- ITEM GRAPH EMBEDDING ----------- #
# ---------------------------------------------- #

# Graph Kernel algorithms.
# 1 -> entity from text 
# 2 -> branch from text 
# 3 -> entity from binary  
# 4- > branch from binary

embeddingOption=1
maxBranchLength=1
addCollabFeatures=true
listAlphaVals=

minMaxNorm=false
minFreq=0
maxFreq=100000
idf=false
lengthNorm=false

onlyEntityBranches=true

entityBasedEmbFile=_entity_mapping
branchBasedEmbFile=_branch_mapping


# --------------- PATH EXTRACTION -------------- #
# ---------------------------------------------- #

# number of items you can load in memory
itemsInMemory=1000
# output file
outputPathsTextFormat=true
outputPathsBinaryFormat=true
pathsFile=path
# if you want to consider i1-i2 and i2-i1
computeInversePaths=false
# if you want consider only popular path
computeTopPaths=false
# number of top paths to consider in paths extraction
nTopPaths=100
# number of items to consider for top paths computation
nItemTopPaths=100


# ------------- USER PATH EXTRACTION ----------- #
# ---------------------------------------------- #

# percentage of paths loaded in memory
pathsInMemory=50
# percentage of rated user items to consider in user paths extraction
userItemsSampling=50
# user rates threshold (>)
ratesThreshold=3
# normalize the user-item frequencies
normalize=true


# ------------------- LEARNING ----------------- #
# ---------------------------------------------- #

libLinear=false
rankLib=true
silentLearning=true

# liblinear parameters
# for multi-class classification
#     0 -- L2-regularized logistic regression (primal)
#     1 -- L2-regularized L2-loss support vector classification (dual)
#     2 -- L2-regularized L2-loss support vector classification (primal)
#     3 -- L2-regularized L1-loss support vector classification (dual)
#     4 -- support vector classification by Crammer and Singer
#     5 -- L1-regularized L2-loss support vector classification
#     6 -- L1-regularized logistic regression
#     7 -- L2-regularized logistic regression (dual)
# for regression
#    11 -- L2-regularized L2-loss support vector regression (primal)
#    12 -- L2-regularized L2-loss support vector regression (dual)
#    13 -- L2-regularized L1-loss support vector regression (dual)
listSolverType=0,1,2,3,4,5,6,7,11,12,13

# cost of constraints violation
listC=1,10
#1,10,100,1000

# tolerance of termination criterion
listEps=0.1,0.01
#0.1,0.01,0.005

# epsilon in loss function of epsilon-SVR (default 0.1)
listP=0.1,0.01
#0.1,0.01,0.001

timesRealFb=5
nValidNegEx=1000
minTrainEx=100
addNegValidationEx=true

# ranklib parameters

#1->RANKER_TYPE.RANKBOOST
#2->RANKER_TYPE.ADARANK
#3->RANKER_TYPE.COOR_ASCENT
#4->RANKER_TYPE.LAMBDAMART
#5->RANKER_TYPE.RANDOM_FOREST

rankerType=5

# RANKBOOST parameters
# The number of rounds to train (default=300)
#nIteration=300
# Number of threshold candidates to search. -1 to use all feature values (default=10)
#nThreshold = 10

# ADARANK parameters
# The number of rounds to train (default=500)
#nIteration = 500
# Tolerance between two consecutive rounds of learning (default=0.002)
#tolerance = 0.002
# The maximum number of times can a feature be consecutively selected without 
# changing performance (default=5)
#maxSelCount = 5

# COOR_ASCENT parameters
# The number of iterations to search in each dimension (default=25)
#nMaxIteration = 25
# Performance tolerance between two solutions (default=0.001)
#tolerance = 0.001
# The number of random restarts (default=5)evaluationDir
#nRestart = 5
# Regularization parameter (default=no-regularization)
#regularized = false

# LAMBDAMART
# Number of trees (default=1000)
#nTrees = 1000
# Number of leaves for each tree (default=10)
#nTreeLeaves=10
# Shrinkage, or learning rate (default=0.1)
#learningRate=0.1
# Stop early when no improvement is observed on validaton data 
# in e consecutive rounds (default=100)
#nRoundToStopEarly=100
# Number of threshold candidates for tree spliting. 
# -1 to use all feature values (default=256)
#nThreshold=256
# Min leaf support -- minimum #samples each leaf has to contain (default=1)
#minLeafSupport=1

# RANDOM_FOREST parameters
# Number of bags (default=300)
#nBag=300
# Number of trees in each bag (default=1)
#nTrees=1
# Number of leaves for each tree (default=100)
#nTreeLeaves=100
# Shrinkage, or learning rate (default=0.1)
#learningRate=0.1
# Number of threshold candidates for tree spliting. 
# -1 to use all feature values (default=256)
#nThreshold=256
# Min leaf support -- minimum #samples each leaf has to contain (default=1)
#minLeafSupport=1
# Feature sampling rate (default=0.3)
#featureSamplingRate=0.3
# Sub-sampling rate (default=1.0)
#subSamplingRate=1.0

# ------------------ EVALUATION ---------------- #
# ---------------------------------------------- #

# LibLinear: P@k, NDCG@k 
# RankLib: MAP, NDCG@k, DCG@k, P@k, RR@k, ERR@k
evalMetric=P@10
# rilevant item thresh (>=)
evalRatingThresh=4
relUnknownItems=3
negRatingThresh=2
itemsMetadataEvalFile=metadata_eval
outputEvaluationFile=evaluation
recDirToEval=


# ---------------- RECOMMENDATION -------------- #
# ---------------------------------------------- #

# output file
recommendationsFile=rec
# number of top predicted items
topN=2