In [55]:
kernel.silent(true)

val intervalsCount = 5
val minSupport = 50
val minConfidence = 0.65

In [56]:
import coursierapi.MavenRepository
interp.repositories() ++= Seq(MavenRepository.of("https://jitpack.io"))

In [57]:
import $ivy.`com.github.propi:rdfrules:1.5.0`
import collection._
import org.apache.jena.riot.Lang

import com.github.propi.rdfrules.data._
import com.github.propi.rdfrules.algorithm.amie._
import com.github.propi.rdfrules.algorithm.dbscan._
import com.github.propi.rdfrules.utils._
import com.github.propi.rdfrules.index._
import com.github.propi.rdfrules.rule._
import com.github.propi.rdfrules.ruleset._

In [58]:
val jaurDistrictsTotal = Graph("data/czso-jaur-districts-total.ttl")
val jaurDistrictsBySex = Graph("data/czso-jaur-districts-by-sex.ttl")
val jaurRegionsTotal = Graph("data/czso-jaur-regions-total.ttl")
val jaurRegionsBySex = Graph("data/czso-jaur-regions-by-sex.ttl")

In [59]:
// in all cubes
val unemploymentRate = "http://data.czso.cz/ontology/podilNezamestnanych"
val reachableApplicants = "http://data.czso.cz/ontology/dosazitelniNeumisteniUchazeciOZamestnani"

// only in total cubes
val unplacedApplicants = "http://data.czso.cz/ontology/neumisteniUchazeciOZamestnani"
val vacaniesCount = "http://data.czso.cz/ontology/pocetVolnychMist"

In [60]:
val refArea = "http://data.czso.cz/ontology/refArea"
val sex = "http://data.czso.cz/ontology/sex"
val refPeriod = "http://data.czso.cz/ontology/refPeriod"

In [61]:
val equiFrequent = DiscretizationTask.Equifrequency(intervalsCount)
val hasPredicate = (quad: Quad, uri: String) => quad.triple.predicate.hasSameUriAs(uri)

val jaurDistrictsTotalDiscretized = jaurDistrictsTotal
    .discretize(equiFrequent)(quad => hasPredicate(quad, unemploymentRate))
    .discretize(equiFrequent)(quad => hasPredicate(quad, reachableApplicants))
    .discretize(equiFrequent)(quad => hasPredicate(quad, unplacedApplicants))
    .discretize(equiFrequent)(quad => hasPredicate(quad, vacaniesCount))

val jaurRegionsTotalDiscretized = jaurRegionsTotal
    .discretize(equiFrequent)(quad => hasPredicate(quad, unemploymentRate))
    .discretize(equiFrequent)(quad => hasPredicate(quad, reachableApplicants))
    .discretize(equiFrequent)(quad => hasPredicate(quad, unplacedApplicants))
    .discretize(equiFrequent)(quad => hasPredicate(quad, vacaniesCount))

val jaurDistrictsBySexDiscretized = jaurDistrictsBySex
    .discretize(equiFrequent)(quad => hasPredicate(quad, unemploymentRate))
    .discretize(equiFrequent)(quad => hasPredicate(quad, reachableApplicants))

val jaurRegionsBySexDiscretized = jaurRegionsBySex
    .discretize(equiFrequent)(quad => hasPredicate(quad, unemploymentRate))
    .discretize(equiFrequent)(quad => hasPredicate(quad, reachableApplicants))

In [62]:
// TODO pro každý dataset jiný qb:dataSet
val qbDataSet = "http://purl.org/linked-data/cube#dataSet"
val uri = (value: String) => TripleItem.Uri(value)

val jaurDistrictsTotalNamed = jaurDistrictsTotalDiscretized
.map(t => if (t.predicate.hasSameUriAs(qbDataSet)) t.copy(`object` = uri("jaurDistrictsTotal")) else t)

val jaurRegionsTotalNamed = jaurRegionsTotalDiscretized
.map(t => if (t.predicate.hasSameUriAs(qbDataSet)) t.copy(`object` = uri("jaurRegionsTotal")) else t)

val jaurDistrictsBySexNamed = jaurDistrictsBySexDiscretized
.map(t => if (t.predicate.hasSameUriAs(qbDataSet)) t.copy(`object` = uri("jaurDistrictsBySex")) else t)

val jaurRegionsBySexNamed = jaurRegionsBySexDiscretized
.map(t => if (t.predicate.hasSameUriAs(qbDataSet)) t.copy(`object` = uri("jaurRegionsBySex")) else t)


In [63]:
// TODO u total datasetů odstranit dimenzi pohlaví
val jaurDistrictsTotalNoSexDimension = jaurDistrictsTotalNamed.filter(t => !t.predicate.hasSameUriAs(sex))
val jaurRegionsTotalNoSexDimension = jaurRegionsTotalNamed.filter(t => !t.predicate.hasSameUriAs(sex))

In [64]:
val dataset = Dataset() + 
    jaurDistrictsTotalNoSexDimension + 
    jaurRegionsTotalNoSexDimension + 
    jaurDistrictsBySexNamed + 
    jaurRegionsBySexNamed

In [65]:
val index = dataset.index()
index.cache("index.cache")

2021-03-06 23:31:18:935 +0100 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Predicates trimming.
2021-03-06 23:31:18:944 +0100 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Subjects indexing.
2021-03-06 23:31:18:981 +0100 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Subjects trimming.
2021-03-06 23:31:18:988 +0100 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Objects indexing.
2021-03-06 23:31:19:000 +0100 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Objects trimming.


In [66]:
val constantsAtObject = RuleConstraint.ConstantsAtPosition.ConstantsPosition.Object
val onlyObjectConstants = RuleConstraint.ConstantsAtPosition(constantsAtObject)
val oneOfMeasures = OneOf(
    uri(unemploymentRate), 
    uri(unemploymentRate), 
    uri(reachableApplicants), 
    uri(vacaniesCount)
)
val qbdPredicate = uri(qbDataSet)
val oneOfDimensions = OneOf(
    uri(refArea), 
    uri(sex), 
    uri(refPeriod)
)

In [67]:
val oneCubeTwoMeasures: RulePattern = (
    AtomPattern(subject = 'a', predicate = qbdPredicate) &: 
    AtomPattern(subject = 'a', predicate = oneOfMeasures) 
    =>: 
    AtomPattern(subject = 'a', predicate = oneOfMeasures)
)

val oneCubeTwoMeasuresOneDimension: RulePattern = (
    AtomPattern(subject = 'a', predicate = qbdPredicate) &: 
    AtomPattern(subject = 'a', predicate = oneOfMeasures) &: 
    AtomPattern(subject = 'a', predicate = oneOfDimensions)
    =>: 
    AtomPattern(subject = 'a', predicate = oneOfMeasures)
)

In [68]:
val oneCubeTwoMeasuresTask = Amie()
    .addThreshold(Threshold.MinSupport(minSupport))
    .addThreshold(Threshold.MaxRuleLength(3))
    .addThreshold(Threshold.MinHeadSize(0))
    .addConstraint(onlyObjectConstants)
    .addPattern(oneCubeTwoMeasures)
    .addPattern(oneCubeTwoMeasuresOneDimension)

val oneCubeTwoMeasuresOneDimensionTask = Amie()
    .addThreshold(Threshold.MinSupport(minSupport))
    .addThreshold(Threshold.MaxRuleLength(4))
    .addThreshold(Threshold.MinHeadSize(0))
    .addConstraint(onlyObjectConstants)
    .addPattern(oneCubeTwoMeasures)
    .addPattern(oneCubeTwoMeasuresOneDimension)

In [69]:
val oneCubeTwoMeasuresRuleset = index.mine(oneCubeTwoMeasuresTask)
val oneCubeTwoMeasuresOneDimensionRuleset = index.mine(oneCubeTwoMeasuresOneDimensionTask)

2021-03-06 23:31:19:602 +0100 [scala-interpreter-1] INFO com.github.propi.rdfrules.utils.Debugger - Amie task settings:
MinHeadSize=1,
MinHeadCoverage=0.0,
MinSupport=50,
MaxThreads=4,
MinAtomSize=0,
MaxRuleLength=3,
WithConstants=true,
ConstantsPosition=Object,
Timeout=-1,
WithDuplicitPredicates=true,
Patterns=List(Mapped(Vector(Mapped(Variable(?a),Constant(Constant(624690160)),Any,Any), Mapped(Variable(?a),OneOf(ArrayBuffer(Constant(Constant(-2070273298)), Constant(Constant(-2070273298)), Constant(Constant(1659106226)), Constant(Constant(1142069620)))),Any,Any), Mapped(Variable(?a),OneOf(ArrayBuffer(Constant(Constant(1481837794)), Constant(Constant(-1485615047)), Constant(Constant(-901057765)))),Any,Any)),Some(Mapped(Variable(?a),OneOf(ArrayBuffer(Constant(Constant(-2070273298)), Constant(Constant(-2070273298)), Constant(Constant(1659106226)), Constant(Constant(1142069620)))),Any,Any)),false,false), Mapped(Vector(Mapped(Variable(?a),Constant(Constant(624690160)),Any,Any), Mapped(Vari

In [77]:
val ruleset = (oneCubeTwoMeasuresRuleset + oneCubeTwoMeasuresOneDimensionRuleset)
    .computeConfidence(minConfidence)
    .sortBy(Measure.Confidence, Measure.HeadCoverage)
ruleset.export("jaurRules.txt")
ruleset.foreach(rule => println("\n" + rule + "\n"))


(?a czso:neumisteniUchazeciOZamestnani [ 1025.0 ; 3045.0 )) ^ (?a qb:dataSet <jaurDistrictsTotal>) ^ (?a czso:podilNezamestnanych [ 1.2 ; 4.555 )) -> (?a czso:dosazitelniNeumisteniUchazeciOZamestnani [ 901.0 ; 2928.0 )) | support: 80, headCoverage: 0.033291718684977115, confidence: 0.9876543209876543, headSize: 2403, bodySize: 81


(?a czso:neumisteniUchazeciOZamestnani [ 8353.5 ; 26549.0 ]) ^ (?a qb:dataSet <jaurDistrictsTotal>) ^ (?a czso:podilNezamestnanych [ 8.765 ; 16.2 ]) -> (?a czso:dosazitelniNeumisteniUchazeciOZamestnani [ 8110.5 ; 25767.0 ]) | support: 79, headCoverage: 0.0328755722014149, confidence: 0.9753086419753086, headSize: 2403, bodySize: 81


(?a qb:dataSet <jaurDistrictsBySex>) ^ (?a czso:dosazitelniNeumisteniUchazeciOZamestnani [ 468.0 ; 1470.5 )) ^ (?a czso:sex sdmx-code:sex-M) -> (?a czso:podilNezamestnanych [ 1.13 ; 4.585 )) | support: 107, headCoverage: 0.04452767374115689, confidence: 0.7181208053691275, headSize: 2403, bodySize: 149

