In [1]:
kernel.silent(true)

In [2]:
import coursierapi.MavenRepository
interp.repositories() ++= Seq(MavenRepository.of("https://jitpack.io"))

In [3]:
import $ivy.`com.github.propi:rdfrules:1.5.0`
import collection._
import org.apache.jena.riot.Lang

import scala.util.control.Breaks._
import scala.collection.immutable.ListMap

import com.github.propi.rdfrules.data._
import com.github.propi.rdfrules.algorithm.amie._
import com.github.propi.rdfrules.algorithm.dbscan._
import com.github.propi.rdfrules.utils._
import com.github.propi.rdfrules.index._
import com.github.propi.rdfrules.rule._
import com.github.propi.rdfrules.ruleset._
kernel.silent(false)

In [None]:
val jaurDataset = Dataset.fromCache("../cache/jaurCube.cache")
jaurDataset.size

In [None]:
val yagoHop0 = Dataset.fromCache("../cache/yago-hop0.cache")
yagoHop0.size

In [None]:
val yagoHop1 = Dataset.fromCache("../cache/yago-hop1.cache")
yagoHop1.size

In [None]:
val yagoHop2 = Dataset.fromCache("../cache/yago-hop2.cache")
yagoHop2.size

In [None]:
val yagoDataset = yagoHop0 + yagoHop1 + yagoHop2
yagoDataset.size

In [4]:
val rdfsLabel = "http://www.w3.org/2000/01/rdf-schema#label"
val rdfsComment = "http://www.w3.org/2000/01/rdf-schema#comment"
val alternateName = "http://schema.org/alternateName"
val image = "http://schema.org/image"
val rdfType = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
val rounded = (value: Double, scale: Integer) => BigDecimal(value).setScale(scale, BigDecimal.RoundingMode.HALF_UP).toDouble

[36mrdfsLabel[39m: [32mString[39m = [32m"http://www.w3.org/2000/01/rdf-schema#label"[39m
[36mrdfsComment[39m: [32mString[39m = [32m"http://www.w3.org/2000/01/rdf-schema#comment"[39m
[36malternateName[39m: [32mString[39m = [32m"http://schema.org/alternateName"[39m
[36mimage[39m: [32mString[39m = [32m"http://schema.org/image"[39m
[36mrdfType[39m: [32mString[39m = [32m"http://www.w3.org/1999/02/22-rdf-syntax-ns#type"[39m
[36mrounded[39m: ([32mDouble[39m, [32mInteger[39m) => [32mDouble[39m = ammonite.$sess.cmd3$Helper$$Lambda$2842/0x0000000840c83040@3673d5bf

In [None]:
val yagoDatasetFiltered = yagoDataset.
filter(q => !q.triple.predicate.hasSameUriAs(rdfsLabel) &&
                !q.triple.predicate.hasSameUriAs(rdfsComment) &&
                !q.triple.predicate.hasSameUriAs(alternateName) &&
                !q.triple.predicate.hasSameUriAs(rdfType) &&
                !q.triple.predicate.hasSameUriAs(image))

val ratio: Double = (yagoDatasetFiltered.size.toDouble / yagoDataset.size.toDouble)
yagoDatasetFiltered.size + " / " + yagoDataset.size + " = " + rounded(ratio,2)*100 + "%"

In [None]:
// ref area linking yago csu
val refAreaLinking = Dataset("../data/linking/yagoCZSOLinking.ttl")
refAreaLinking.size

In [None]:
val dataset = jaurDataset + yagoDatasetFiltered + refAreaLinking
dataset.size

In [None]:
val index = dataset.index()
index.cache("../cache/yaurYagoIndex.cache")
kernel.silent(true)

In [5]:
val index = Index.fromCache("../cache/yaurYagoIndex.cache", false)
kernel.silent(true)

[36mindex[39m: [32mIndex[39m = com.github.propi.rdfrules.index.Index$FromCacheFullyPreservedIndex@7c211463

In [6]:
val uri = (value: String) => TripleItem.Uri(value)
val qbDataSet = uri("http://purl.org/linked-data/cube#dataSet")
// in all cubes
val czsoUri = "http://data.czso.cz/ontology/"
val unemploymentRate = uri(czsoUri+"podilNezamestnanych")
val reachableApplicants = uri(czsoUri+"dosazitelniNeumisteniUchazeciOZamestnani")
// only in total cubes
val unplacedApplicants = uri(czsoUri+"neumisteniUchazeciOZamestnani")
val vacaniesCount = uri(czsoUri+"pocetVolnychMist")
val measures = Array(unemploymentRate,reachableApplicants,unplacedApplicants,vacaniesCount)

val oneOfMeasures = OneOf(unemploymentRate,reachableApplicants,unplacedApplicants,vacaniesCount)
val refArea = uri("http://data.czso.cz/ontology/refArea")
val constantsAtObject = RuleConstraint.ConstantsAtPosition.ConstantsPosition.Object
val constantsOnlyAtObject = RuleConstraint.ConstantsAtPosition(constantsAtObject)

val oneOfRegionCubes = OneOf(uri("jaur-regions-by-sex"),uri("jaur-regions-total"))
val oneOfDistrictCubes = OneOf(uri("jaur-districts-by-sex"),uri("jaur-districts-total"))

In [11]:
val regionPattern = (
    AtomPattern(subject = 'b', graph = uri("yago")) &:
    AtomPattern(subject = 'a', predicate = refArea, `object` = 'b', graph = uri("czso")) &:
    AtomPattern(subject = 'a', predicate = qbDataSet, `object` = oneOfRegionCubes, graph = uri("czso"))
    =>: 
    AtomPattern(subject = 'a', predicate = oneOfMeasures, graph = uri("czso"))
)

val districtPattern = (
    AtomPattern(subject = 'b', graph = uri("yago")) &:
    AtomPattern(subject = 'a', predicate = refArea, `object` = 'b', graph = uri("czso")) &:
    AtomPattern(subject = 'a', predicate = qbDataSet, `object` = oneOfDistrictCubes, graph = uri("czso"))
    =>: 
    AtomPattern(subject = 'a', predicate = oneOfMeasures, graph = uri("czso"))
)

In [13]:
val regionTask = Amie()
    .addThreshold(Threshold.MinSupport(18))
    .addThreshold(Threshold.MaxRuleLength(6))
    .addConstraint(constantsOnlyAtObject)
    .addPattern(regionPattern)
    //.addPattern(regionPattern2)
    //.addThreshold(Threshold.Timeout(1))

val districtTask = Amie()
    .addThreshold(Threshold.MinSupport(36))
    .addThreshold(Threshold.MaxRuleLength(6))
    .addConstraint(constantsOnlyAtObject)
    .addPattern(districtPattern)

In [14]:
val startTimeMillis = System.currentTimeMillis()
val regionTaskRuleset = index.mine(regionTask)
println("rules: "+regionTaskRuleset.size)
println("duration: " + (System.currentTimeMillis() - startTimeMillis) / 1000 + "s")

2021-05-13 21:53:41:886 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Amie task settings:
MinHeadSize=100,
MinHeadCoverage=0.0,
MinSupport=18,
MaxThreads=4,
MinAtomSize=0,
MaxRuleLength=6,
WithConstants=true,
ConstantsPosition=Object,
Timeout=60000,
WithDuplicitPredicates=true,
Patterns=List(Mapped(Vector(Mapped(Variable(?b),Any,Any,Constant(Constant(-1596161386))), Mapped(Variable(?a),Constant(Constant(1481837794)),Variable(?b),Constant(Constant(1015601977))), Mapped(Variable(?a),Constant(Constant(624690160)),OneOf(ArrayBuffer(Constant(Constant(1171323585)), Constant(Constant(1687262354)))),Constant(Constant(1015601977)))),Some(Mapped(Variable(?a),OneOf(ArrayBuffer(Constant(Constant(-2070273298)), Constant(Constant(1659106226)), Constant(Constant(1797717682)), Constant(Constant(1142069620)))),Any,Constant(Constant(1015601977)))),false,false)),
OnlyPredicates=None,
WithoutPredicates=None


rules: 18677
duration: 11s


In [17]:
regionTaskRuleset.export("../rulesets/jaurYagoRegionTaskRuleset.txt")

In [15]:
val regionTaskRulesetFiltered = regionTaskRuleset
.filterResolved(rr => {rr.body.count(i => measures.contains(i.predicate)) == 0})
.computePcaConfidence(0.5)
.sortBy(Measure.PcaConfidence, Measure.Support)
.cache
regionTaskRulesetFiltered.export("../rulesets/jaurYagoRegionTaskRulesetFiltered.txt")
println("rules: " + regionTaskRulesetFiltered.size)

rules: 3


In [16]:
val startTimeMillis = System.currentTimeMillis()
val districtTaskRuleset = index.mine(districtTask)
println("rules: " + districtTaskRuleset.size) 
println("duration: " + (System.currentTimeMillis() - startTimeMillis) / 1000 + "s")

2021-05-13 21:54:47:489 +0200 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Amie task settings:
MinHeadSize=100,
MinHeadCoverage=0.0,
MinSupport=36,
MaxThreads=4,
MinAtomSize=0,
MaxRuleLength=6,
WithConstants=true,
ConstantsPosition=Object,
Timeout=-1,
WithDuplicitPredicates=true,
Patterns=List(Mapped(Vector(Mapped(Variable(?b),Any,Any,Constant(Constant(-1596161386))), Mapped(Variable(?a),Constant(Constant(1481837794)),Variable(?b),Constant(Constant(1015601977))), Mapped(Variable(?a),Constant(Constant(624690160)),OneOf(ArrayBuffer(Constant(Constant(345337843)), Constant(Constant(2112950736)))),Constant(Constant(1015601977)))),Some(Mapped(Variable(?a),OneOf(ArrayBuffer(Constant(Constant(-2070273298)), Constant(Constant(1659106226)), Constant(Constant(1797717682)), Constant(Constant(1142069620)))),Any,Constant(Constant(1015601977)))),false,false)),
OnlyPredicates=None,
WithoutPredicates=None


rules: 193770
duration: 174s


In [19]:
districtTaskRuleset.export("../rulesets/jaurYagoDistrictTaskRuleset.txt")

In [None]:
val districtTaskRulesetFiltered = districtTaskRuleset
.filterResolved(rr => {rr.body.count(i => measures.contains(i.predicate)) == 0})
.computePcaConfidence(0.6)
.sortBy(Measure.PcaConfidence, Measure.Support)
.cache
districtTaskRulesetFiltered.export("../rulesets/jaurYagoDistrictTaskRulesetFiltered.txt")
println("rules: " + districtTaskRulesetFiltered.size)

In [None]:
val ruleset = regionTaskRulesetFiltered + districtTaskRulesetFiltered

In [None]:
ruleset.export("../rulesets/jaurYagoRulesetFiltered.txt")

In [None]:
val rulesetClustered = ruleset
.makeClusters {
implicit val ruleSimilarityCounting: SimilarityCounting[Rule.Simple] = SimilarityCounting.AtomsSimilarityCounting
DbScan(minNeighbours = 3, minSimilarity = 0.85)
}
.cache

In [None]:
rulesetClustered
.cache("../cache/jaurYagoOneMeasureTaskRulesetFilteredClustered.cache")
.export("../rulesets/jaurYagoOneMeasureTaskRulesetFilteredClustered.txt")

In [None]:
val rulesetClustered = Ruleset.fromCache(index, "../cache/oneMeasureTaskRulesetFilteredClustered.cache")

In [None]:
val filterByCluster = (r: ResolvedRule, cluster: Int) => 
r.measures.get(Measure.Cluster).get == Measure.Cluster(cluster)

val groupRulesByCluster = (r: Ruleset) => {
    var clustersMap: ListMap[Int, Int] = ListMap()
    breakable {
        var counter = 0
        while (true) {
            val size = r.filterResolved(r => filterByCluster(r,counter)).size
            if (size > 0) {
                clustersMap = clustersMap + (counter -> size)
                counter = counter + 1
            } else break;
        }
    }
    clustersMap = ListMap(clustersMap.toSeq.sortWith(_._2 > _._2):_*)
    clustersMap
}

In [None]:
groupRulesByCluster(rulesetClustered)

In [None]:
val filterByLength = (r: ResolvedRule, length: Int) => r.body.size == length

val groupRulesByLength = (r: Ruleset, maxLength: Int) => {
    var lengthMap: ListMap[Int, Int] = ListMap()
    breakable {
        var counter = 1
        while (counter <= maxLength) {
            val size = r.filterResolved(r => filterByLength(r,counter)).size
            lengthMap = lengthMap + (counter -> size)
            counter = counter + 1
        }
    }
    lengthMap = ListMap(lengthMap.toSeq.sortWith(_._2 > _._2):_*)
    lengthMap
}

In [None]:
groupRulesByLength(rulesetClustered, 6)

In [None]:
groupRulesByLength(districtTaskRuleset, 6)

In [None]:
groupRulesByLength(regionTaskRuleset, 6)