In [1]:
kernel.silent(true)

In [2]:
import coursierapi.MavenRepository
interp.repositories() ++= Seq(MavenRepository.of("https://jitpack.io"))

In [3]:
import $ivy.`com.github.propi:rdfrules:1.5.0`
import collection._
import org.apache.jena.riot.Lang

import com.github.propi.rdfrules.data._
import com.github.propi.rdfrules.algorithm.amie._
import com.github.propi.rdfrules.algorithm.dbscan._
import com.github.propi.rdfrules.utils._
import com.github.propi.rdfrules.index._
import com.github.propi.rdfrules.rule._
import com.github.propi.rdfrules.ruleset._

In [4]:
val expensesDataset = Dataset.fromCache("../cache/expensesCube.cache")
val wikidataDataset = Dataset.fromCache("../cache/wikidata.cache")

In [5]:
val refPeriodLinking = Dataset("../data/linking/refPeriodLinking.ttl")

In [6]:
val dataset = expensesDataset + wikidataDataset + refPeriodLinking
println(dataset.size)

25194


In [7]:
val index = dataset.index()

In [8]:
val uri = (value: String) => TripleItem.Uri(value)
val czURI = uri("http://www.wikidata.org/entity/Q213")
val qbDataSet = "http://purl.org/linked-data/cube#dataSet"
val cssaDimension = "https://data.cssz.cz/ontology/dimension/"
val expenses = uri("https://data.cssz.cz/ontology/measure/vydaje-na-duchody-opravene-o-zalohy-v-tis-kc")
val cssaRefPeriod = uri(cssaDimension+"refPeriod")
val qbdPredicate = uri(qbDataSet)
val appliesTo = uri("http://kizi.vse.cz/novp19/diploma-thesis/appliesToRefPeriod")
val wdProperty = (value: Int) => uri("http://www.wikidata.org/prop/P" + value)

# alignment -> expenses

In [9]:
val alignmentExpenses: RulePattern = (
    AtomPattern(subject = 'f', predicate = appliesTo, `object` = 'b') &:
    AtomPattern(subject = czURI, predicate = wdProperty(6), `object` = 'f') &:
    AtomPattern(subject = 'f', predicate = wdProperty(6), `object` = 'e') &:
    AtomPattern(subject = 'e', predicate = wdProperty(102), `object` = 'c') &:  
    AtomPattern(subject = 'd', predicate = wdProperty(1387)) &:
    AtomPattern(subject = 'c', predicate = wdProperty(102), `object` = 'd') &:
    AtomPattern(subject = 'c', predicate = appliesTo, `object` = 'b') &:
    AtomPattern(subject = 'a', predicate = cssaRefPeriod, `object` = 'b') &:
    AtomPattern(subject = 'a', predicate = qbdPredicate, `object` = AnyConstant)
    =>:
    AtomPattern(subject = 'a', predicate = expenses)
)

In [10]:
val alignmentExpensesTask = Amie()
    .addThreshold(Threshold.MaxRuleLength(10))
    .addThreshold(Threshold.Timeout(1))
    .addPattern(alignmentExpenses)

In [11]:
val startTimeMillis = System.currentTimeMillis()
val alignmentExpensesTaskRuleset = index.mine(alignmentExpensesTask)
println("rules: "+alignmentExpensesTaskRuleset.size)
val endTimeMillis = System.currentTimeMillis()
val durationSeconds = (endTimeMillis - startTimeMillis) / 1000
println("duration: " + durationSeconds + "s")

2021-09-14 08:41:08:076 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Predicates trimming.
2021-09-14 08:41:08:111 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Subjects indexing.
2021-09-14 08:41:08:168 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Subjects trimming.
2021-09-14 08:41:08:184 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Objects indexing.
2021-09-14 08:41:08:222 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Objects trimming.
2021-09-14 08:41:08:272 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Amie task settings:
MinHeadSize=100,
MinHeadCoverage=0.0,
MinSupport=1,
MaxThreads=4,
MinAtomSize=0,
MaxRuleLength=10,
WithConstants=true,
ConstantsPosition=All,
Timeout=60000,
WithDuplicitPredicates=true,
Patterns=List(Mapped(Vector(Mapped(Variable(?f),Constant(Constant(217712947)),Variable(?b),Any), Ma

rules: 96
duration: 3s


In [12]:
alignmentExpensesTaskRuleset.export("../rulesets/expenses-wikidata/alignmentExpensesTaskRuleset.txt")

In [13]:
val alignmentExpensesTaskRulesetFiltered = alignmentExpensesTaskRuleset
.filter(rule => rule.measures(Measure.Support).value > 1)
.computePcaConfidence(0.0)
.computeConfidence(0.0)
.computeLift()
.sortBy(Measure.PcaConfidence, Measure.Support)

println("rules: "+alignmentExpensesTaskRulesetFiltered.size)
alignmentExpensesTaskRulesetFiltered.export("../rulesets/expenses-wikidata/alignmentExpensesTaskRulesetFiltered.txt")

rules: 58


# party -> expenses

In [14]:
val partyExpenses: RulePattern = (
    AtomPattern(subject = 'e', predicate = appliesTo, `object` = 'b') &:
    AtomPattern(subject = czURI, predicate = wdProperty(6), `object` = 'e') &:
    AtomPattern(subject = 'e', predicate = wdProperty(6), `object` = 'd') &:
    AtomPattern(subject = 'd', predicate = wdProperty(102), `object` = 'c') &:  
    AtomPattern(subject = 'c', predicate = wdProperty(102), `object` = AnyConstant) &:
    AtomPattern(subject = 'c', predicate = appliesTo, `object` = 'b') &:
    AtomPattern(subject = 'a', predicate = cssaRefPeriod, `object` = 'b') &:
    AtomPattern(subject = 'a', predicate = qbdPredicate, `object` = AnyConstant)
    =>:
    AtomPattern(subject = 'a', predicate = expenses)
)

In [15]:
val partyExpensesTask = Amie()
    .addThreshold(Threshold.MaxRuleLength(9))
    .addThreshold(Threshold.Timeout(1))
    .addPattern(partyExpenses)

In [16]:
val startTimeMillis = System.currentTimeMillis()
val partyExpensesTaskRuleset = index.mine(partyExpensesTask)
println("rules: "+partyExpensesTaskRuleset.size)
val endTimeMillis = System.currentTimeMillis()
val durationSeconds = (endTimeMillis - startTimeMillis) / 1000
println("duration: " + durationSeconds + "s")

2021-06-19 16:11:41:695 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Amie task settings:
MinHeadSize=100,
MinHeadCoverage=0.0,
MinSupport=1,
MaxThreads=4,
MinAtomSize=0,
MaxRuleLength=9,
WithConstants=true,
ConstantsPosition=All,
Timeout=60000,
WithDuplicitPredicates=true,
Patterns=List(Mapped(Vector(Mapped(Variable(?e),Constant(Constant(217712947)),Variable(?b),Any), Mapped(Constant(Constant(-1927475503)),Constant(Constant(774364698)),Variable(?e),Any), Mapped(Variable(?e),Constant(Constant(774364698)),Variable(?d),Any), Mapped(Variable(?d),Constant(Constant(52500897)),Variable(?c),Any), Mapped(Variable(?c),Constant(Constant(52500897)),AnyConstant,Any), Mapped(Variable(?c),Constant(Constant(217712947)),Variable(?b),Any), Mapped(Variable(?a),Constant(Constant(1169043154)),Variable(?b),Any), Mapped(Variable(?a),Constant(Constant(624690160)),AnyConstant,Any)),Some(Mapped(Variable(?a),Constant(Constant(2137166855)),Any,Any)),false,false)),
OnlyPredicates=Non

rules: 48
duration: 1s


In [17]:
partyExpensesTaskRuleset.export("../rulesets/expenses-wikidata/partyExpensesTaskRuleset.txt")

In [18]:
val partyExpensesTaskRulesetFiltered = partyExpensesTaskRuleset
.filter(rule => rule.measures(Measure.Support).value > 1)
.computePcaConfidence(0.0)
.computeConfidence(0.0)
.computeLift()
.sortBy(Measure.PcaConfidence, Measure.Support)

println("rules: "+partyExpensesTaskRulesetFiltered.size)
partyExpensesTaskRulesetFiltered.export("../rulesets/expenses-wikidata/partyExpensesTaskRulesetFiltered.txt")

rules: 29


# expenses -> party

In [19]:
val expensesParty: RulePattern = (
    AtomPattern(subject = czURI, predicate = wdProperty(6), `object` = 'e') &:
    AtomPattern(subject = 'e', predicate = wdProperty(6), `object` = 'd') &:
    AtomPattern(subject = 'd', predicate = wdProperty(102), `object` = 'a') &:
    AtomPattern(subject = 'c', predicate = expenses) &:
    AtomPattern(subject = 'c', predicate = qbdPredicate, `object` = AnyConstant) &:
    AtomPattern(subject = 'c', predicate = cssaRefPeriod, `object` = 'b') &:
    AtomPattern(subject = 'a', predicate = appliesTo, `object` = 'b')
    =>:
    AtomPattern(subject = 'a', predicate = wdProperty(102), `object` = AnyConstant)
)

In [20]:
val expensesPartyTask = Amie()
    .addThreshold(Threshold.MinSupport(1))
    .addThreshold(Threshold.MaxRuleLength(8))
    .addThreshold(Threshold.MinHeadSize(0))
    .addThreshold(Threshold.Timeout(1))
    .addPattern(expensesParty)

In [21]:
val startTimeMillis = System.currentTimeMillis()
val expensesPartyTaskRuleset = index.mine(expensesPartyTask)
println("rules: "+expensesPartyTaskRuleset.size)
val endTimeMillis = System.currentTimeMillis()
val durationSeconds = (endTimeMillis - startTimeMillis) / 1000
println("duration: " + durationSeconds + "s")

2021-06-19 16:11:44:223 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Amie task settings:
MinHeadSize=1,
MinHeadCoverage=0.0,
MinSupport=1,
MaxThreads=4,
MinAtomSize=0,
MaxRuleLength=8,
WithConstants=true,
ConstantsPosition=All,
Timeout=60000,
WithDuplicitPredicates=true,
Patterns=List(Mapped(Vector(Mapped(Constant(Constant(-1927475503)),Constant(Constant(774364698)),Variable(?e),Any), Mapped(Variable(?e),Constant(Constant(774364698)),Variable(?d),Any), Mapped(Variable(?d),Constant(Constant(52500897)),Variable(?a),Any), Mapped(Variable(?c),Constant(Constant(2137166855)),Any,Any), Mapped(Variable(?c),Constant(Constant(624690160)),AnyConstant,Any), Mapped(Variable(?c),Constant(Constant(1169043154)),Variable(?b),Any), Mapped(Variable(?a),Constant(Constant(217712947)),Variable(?b),Any)),Some(Mapped(Variable(?a),Constant(Constant(52500897)),AnyConstant,Any)),false,false)),
OnlyPredicates=None,
WithoutPredicates=None


rules: 48
duration: 0s


In [22]:
expensesPartyTaskRuleset.export("../rulesets/expensesPartyTaskRuleset.txt")

In [23]:
expensesPartyTaskRuleset
.filter(rule => rule.measures(Measure.Support).value > 1)
.computePcaConfidence(0.0)
.sortBy(Measure.PcaConfidence, Measure.Support)
.export("../rulesets/expensesPartyTaskRulesetFiltered.txt")

# expenses -> alignment

In [24]:
val expensesAlignment: RulePattern = (
    AtomPattern(subject = czURI, predicate = wdProperty(6), `object` = 'f') &:
    AtomPattern(subject = 'f', predicate = wdProperty(6), `object` = 'e') &:
    AtomPattern(subject = 'e', predicate = wdProperty(102), `object` = 'b') &:  
    AtomPattern(subject = 'd', predicate = expenses) &: 
    AtomPattern(subject = 'd', predicate = qbdPredicate, `object` = AnyConstant) &:
    AtomPattern(subject = 'd', predicate = cssaRefPeriod, `object` = 'c') &:
    AtomPattern(subject = 'b', predicate = appliesTo, `object` = 'c') &:
    AtomPattern(subject = 'b', predicate = wdProperty(102), `object` = 'a')
    =>:
    AtomPattern(subject = 'a', predicate = wdProperty(1387))
)

In [25]:
val expensesAlignmentTask = Amie()
    .addThreshold(Threshold.MinSupport(1))
    .addThreshold(Threshold.MaxRuleLength(9))
    .addThreshold(Threshold.MinHeadSize(0))
    .addThreshold(Threshold.Timeout(1))
    .addPattern(expensesAlignment)

In [26]:
val startTimeMillis = System.currentTimeMillis()
val expensesAlignmentTaskRuleset = index.mine(expensesAlignmentTask)
println("rules: "+expensesAlignmentTaskRuleset.size)
val endTimeMillis = System.currentTimeMillis()
val durationSeconds = (endTimeMillis - startTimeMillis) / 1000
println("duration: " + durationSeconds + "s")

2021-06-19 16:11:45:989 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Amie task settings:
MinHeadSize=1,
MinHeadCoverage=0.0,
MinSupport=1,
MaxThreads=4,
MinAtomSize=0,
MaxRuleLength=9,
WithConstants=true,
ConstantsPosition=All,
Timeout=60000,
WithDuplicitPredicates=true,
Patterns=List(Mapped(Vector(Mapped(Constant(Constant(-1927475503)),Constant(Constant(774364698)),Variable(?f),Any), Mapped(Variable(?f),Constant(Constant(774364698)),Variable(?e),Any), Mapped(Variable(?e),Constant(Constant(52500897)),Variable(?b),Any), Mapped(Variable(?d),Constant(Constant(2137166855)),Any,Any), Mapped(Variable(?d),Constant(Constant(624690160)),AnyConstant,Any), Mapped(Variable(?d),Constant(Constant(1169043154)),Variable(?c),Any), Mapped(Variable(?b),Constant(Constant(217712947)),Variable(?c),Any), Mapped(Variable(?b),Constant(Constant(52500897)),Variable(?a),Any)),Some(Mapped(Variable(?a),Constant(Constant(1496784743)),Any,Any)),false,false)),
OnlyPredicates=None,
Withou

rules: 96
duration: 0s


In [27]:
alignmentExpensesTaskRuleset.export("../rulesets/expensesAlignmentTaskRuleset.txt")

In [28]:
expensesAlignmentTaskRuleset
.filter(rule => rule.measures(Measure.Support).value > 1)
.computePcaConfidence(0.0)
.sortBy(Measure.PcaConfidence, Measure.Support)
.export("../rulesets/expensesAlignmentTaskRulesetFiltered.txt")