Skip to content

Commit

Permalink
Work through example
Browse files Browse the repository at this point in the history
  • Loading branch information
rikhuijzer committed Jun 23, 2023
1 parent 027dcbd commit 6bdbd04
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 24 deletions.
11 changes: 1 addition & 10 deletions src/dependent.jl
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,7 @@ function _linearly_dependent(rules::Vector{Rule})::BitVector
for (A, B) in P
indexes = filter(i -> _related_rule(rules[i], A, B), 1:length(rules))
subset = view(rules, indexes)
# TODO return the index of the dependent rule with the lowest gap here.
dependent_subset = _linearly_dependent(subset, A, B)
# TODO assert that gap is always correct
# TODO maybe just create a large feature matrix on the whole ruleset and increase it step by step.
# Once the rank doesn't increase, pause and figure out which rules are linearly dependent.
# Then note which rule can be removed and filter those in the next step.
for i in 1:length(dependent_subset)
if dependent_subset[i]
Expand All @@ -172,17 +168,12 @@ function _linearly_dependent(rules::Vector{Rule})::BitVector
return dependent
end

function _filter_linearly_dependent2(rules::Vector{Rule})

end

"""
Return the subset of `rules` which are not linearly dependent.
This is based on a complex heuristic involving calculating the rank of the matrix, see above StackExchange link for more information.
Also note that this method assumes that the rules are assumed to be in ordered by frequency of occurence in the trees.
This assumption is used to filter less common rules when finding linearly dependent rules.
"""
function _filter_linearly_dependent(rules::Vector{Rule})::Vector{Rule}
sorted = _tmp_sort_by_gap_size(rules)
dependent = _linearly_dependent(rules)
out = Rule[]
for i in 1:length(dependent)
Expand Down
58 changes: 47 additions & 11 deletions src/tmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,21 @@ And also adding a column for each conjunction (&).
For example, for the rule set
Rule 1: A < 3, then ...
Rule 2: A ≥ 3, then ...
Rule 3: A < 3 & B < 2, then ...
Rule 2: B < 2, then ...
Rule 3: A ≥ 3 & B ≥ 2, then ...
Rule 4: A ≥ 3 & B < 2, then ...
returns the following matrix (without the headers):
returns the following matrix (with the headers as the `conditions` vector):
| ---- | A < 3 | A ≥ 3 | B < 2 | B ≥ 2 | A < 3 & B < 2 |
| ---- | ----- | ----- | ----- | ----- | ------------- |
| R1 | 1 | 0 | 0 | 0 | 1 |
| R2 | 0 | 1 | 0 | 0 | 0 |
| R3 | 0 | 0 | 0 | 0 | 1 |
| ---- | A < 3 | A ≥ 3 | B < 2 | B ≥ 2 | A ≥ 3 & B ≥ 2 | A ≥ 3 & B < 2 |
| ---- | ----- | ----- | ----- | ----- | ------------- | ------------- |
| R1 | 1 | 0 | 0 | 0 | 0 | 0 |
| R2 | 0 | 0 | 1 | 0 | 0 | 0 |
| R3 | 0 | 1 | 0 | 1 | 1 | 0 |
| R4 | 0 | 1 | 1 | 0 | 0 | 1 |
Note that the unknown cases (A < 3 => B < 2?) are set to 0.
In other words, the matrix represents which syntetic datapoints (constraints in columns)
are implied by each rule (rows).
Gaussian elimination needs to know only implications (=>).
"""
function _tmp_rule_space(rules::Vector{Rule})
Expand All @@ -126,12 +129,45 @@ function _tmp_rule_space(rules::Vector{Rule})
rule = rules[i]
for j in eachindex(conditions)
condition = conditions[j]
space[i, j] = _tmp_implies(condition, rule.path)
space[i, j] = _tmp_implies(rule.path, condition)
end
end
return (conditions, space)
end

"Return the indexes of the linearly dependent rules."
function _tmp_linearly_dependent(rules::Vector{Rule})

@assert _tmp_gap_size(rules[end]) _tmp_gap_size(rules[1])
conditions, space = _tmp_rule_space(rules)
n_rules = size(space, 1)
n_conditions = size(space, 2)
@assert n_conditions == length(conditions)
reduced_form = _reduced_echelon_form(space)
findall(x -> all(iszero, x), eachrow(reduced_form))
end

function _tmp_gap_size(rule::Rule)
@assert length(rule.then) == length(rule.otherwise)
gap_size_per_class = abs.(rule.then .- rule.otherwise)
sum(gap_size_per_class)
end

"""
Return the vector rule sorted by decreasing gap size.
This allows the linearly dependent filter to remove the rules further down the list since
they have a smaller gap.
"""
function _tmp_sort_by_gap_size(rules::Vector{Rule})
return sort(rules; by=_tmp_gap_size, rev=true)
end

"""
Return `rules` but with linearly dependent rules removed.
Note that this does not remove the rules with one constraint which are identical to a
previous rule with the constraint sign reversed.
"""
function _tmp_filter_linearly_dependent(rules::Vector{Rule})::Vector{Rule}
sorted = _tmp_sort_by_gap_size(rules)
indexes = _tmp_linearly_dependent(sorted)
return sorted[setdiff(1:length(sorted), indexes)]
end
33 changes: 30 additions & 3 deletions test/tmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,35 @@ function _condition_implies_rules(condition::S.TreePath, conditions, space::BitM
end

conditions, space = S._tmp_rule_space([r1, r2, r3])
@test _condition_implies_rules(S.TreePath(" X[i, 1] < 3 "), conditions, space) == Bool[1, 0, 0]
@test _condition_implies_rules(S.TreePath(" X[i, 1] < 3 "), conditions, space) == Bool[1, 0, 1]
@test _condition_implies_rules(S.TreePath(" X[i, 1] ≥ 3 "), conditions, space) == Bool[0, 1, 0]
@test _condition_implies_rules(S.TreePath(" X[i, 2] < 2 "), conditions, space) == Bool[0, 0, 0]
@test _condition_implies_rules(S.TreePath(" X[i, 2] < 2 "), conditions, space) == Bool[0, 0, 1]
@test _condition_implies_rules(S.TreePath(" X[i, 2] ≥ 2 "), conditions, space) == Bool[0, 0, 0]
@test _condition_implies_rules(r3.path, conditions, space) == Bool[1, 0, 1]
@test _condition_implies_rules(r3.path, conditions, space) == Bool[0, 0, 1]

###
# TMP COPY FROM TEST/DEPENDENT
###
r1 = S.Rule(S.TreePath(" X[i, 1] < 32000 "), [0.061], [0.408])
r2 = S.Rule(S.TreePath(" X[i, 1] ≥ 32000 "), [0.408], [0.061])

r3 = S.Rule(S.TreePath(" X[i, 2] < 8000 "), [0.062], [0.386])
r4 = S.Rule(S.TreePath(" X[i, 2] ≥ 8000 "), [0.386], [0.062])
r5 = S.Rule(S.TreePath(" X[i, 3] < 64 "), [0.056], [0.334])
r6 = S.Rule(S.TreePath(" X[i, 3] ≥ 64 "), [0.334], [0.056])
r7 = S.Rule(S.TreePath(" X[i, 1] ≥ 32000 & X[i, 3] ≥ 64 "), [0.517], [0.067])
r8 = S.Rule(S.TreePath(" X[i, 4] < 8 "), [0.050], [0.312])
r9 = S.Rule(S.TreePath(" X[i, 4] ≥ 8 "), [0.312], [0.050])
r10 = S.Rule(S.TreePath(" X[i, 5] < 50 "), [0.335], [0.058])
r11 = S.Rule(S.TreePath(" X[i, 5] ≥ 50 "), [0.058], [0.335])
r12 = S.Rule(S.TreePath(" X[i, 1] ≥ 32000 & X[i, 3] < 64 "), [0.192], [0.102])
r13 = S.Rule(S.TreePath(" X[i, 1] < 32000 & X[i, 4] ≥ 8 "), [0.157], [0.100])
# First constraint is updated based on a comment from Clément via email.
r14 = S.Rule(S.TreePath(" X[i, 1] ≥ 32000 & X[i, 4] ≥ 12 "), [0.554], [0.073])
r15 = S.Rule(S.TreePath(" X[i, 1] ≥ 32000 & X[i, 4] < 12 "), [0.192], [0.096])
r16 = S.Rule(S.TreePath(" X[i, 2] ≥ 8000 & X[i, 4] ≥ 12 "), [0.586], [0.76])
r17 = S.Rule(S.TreePath(" X[i, 2] ≥ 8000 & X[i, 4] < 12 "), [0.236], [0.94])

# @test S._tmp_linearly_dependent([r1, r5, r7, r12]) == Bool[0, 0, 0, 1]
# @test S._tmp_filter_linearly_dependent([r1, r5, r7, r12]) == [r1, r5, r7]
# @test S._tmp_filter_linearly_dependent([r1, r5, r12, r7]) == [r1, r5, r7]

0 comments on commit 6bdbd04

Please sign in to comment.