# Result Status Differences

This script looks for differences between test results to find interesting changes. When it finds something which may be relevant it can notify any interested parties. This uses the [JDP framework](https://rpalethorpe.io.suse.de/jdp/).

First we need to build up our data structures to create the test matrix. There are some stats here which may be useful, but otherwise you can safely skip this part most of the time.

In [15]:
# Monitors library source files and recompiles them after most changes
import Revise

# Run the init script which will setup the JDP project if necessary
include("../src/init.jl")

# Bring DataFrame's _members_ into our namespace, so we can call them directly
using DataFrames
import DataStructures: SortedDict, SortedSet, SDSemiToken
import Dates: Day

# import the markdown string literal/macro
import Markdown: @md_str, MD

# Import some libraries from the JDP project
using JDP.Conf
using JDP.Trackers.OpenQA    # Contains functions for dealing with the OpenQA web API
using JDP.Trackers.Bugzilla  # Functions for accessing the Bugzilla API(s)
using JDP.Repository
using JDP.Spammer

In [None]:
allres = Repository.fetch(OpenQA.TestResult, Vector, "osd", OpenQA.RecentOrInterestingJobsDef)

md"We have **$(length(allres))** results in total"

We only show results for a single product, which can be set here.

In [None]:
product = "sle-15-SP1-Installer-DVD"
cloudproduct = "sle-15-SP1"

prodres = filter(allres) do res
    res.product == product
end

cloudprodres = filter(allres) do res
    startswith(res.product, cloudproduct) && ("Public Cloud" in res.flags)
end

md"""
We have **$(length(prodres))** test results for $(product) and $(length(cloudprodres)) for $(cloudproduct) cloud
"""

In [None]:
builds = SortedDict(parse(Float64, res.build) => res.build for res in prodres)

totalbuilds = length(builds)
recentnum = max(0, min(9, totalbuilds - 9))
recentbuilds = SortedDict(k => v for (k, v) in collect(pairs(builds))[end - recentnum + 1:end])
rbs = join(map(b -> "**$b**", values(recentbuilds)), ", ", " and ")

md"We have **$totalbuilds** builds in total. The last $recentnum are $rbs"

In [None]:
cloudbuilds = SortedDict(parse(Float64, res.build) => res.build for res in cloudprodres)

cloudtotalbuilds = length(cloudbuilds)
cloudrecentnum = max(0, min(9, cloudtotalbuilds - 9))
cloudrecentbuilds = SortedDict(k => v for (k, v) in collect(pairs(cloudbuilds))[end - cloudrecentnum + 1:end])
cloudrbs = join(map(b -> "**$b**", values(cloudrecentbuilds)), ", ", " and ")

md"We have **$totalbuilds** builds in total. The last $cloudrecentnum are $cloudrbs"

In [None]:
prodtestnames = map(res -> (suit = res.suit, name = res.name, arch = res.arch, flags = res.flags), 
    prodres) |> unique |> SortedSet;

In [None]:
cloudprodtestnames = map(res -> (suit = res.suit, name = res.name, arch = res.arch, flags = res.flags), 
    cloudprodres) |> unique |> SortedSet;

In [None]:
function build_diff_matrix(testnames, recentbuilds, prodres; 
        bad_build_limit=0.25, bad_arch_limit=0.25, bad_test_limit=0.25, no_limits=false)
    
    if no_limits
        bad_build_limit = bad_arch_limit = bad_test_limit = 1
    end
    # First build a dictionary with build names for keys and test dictionarys for values
    buildsres = SortedDict(
        build => Dict{NamedTuple, Any}(name => nothing for name in testnames) for build in keys(recentbuilds)
    )

    # Find the best result for each build-test pair
    for res in Iterators.filter(res -> haskey(buildsres, parse(Float64, res.build)), prodres)
        name = (suit = res.suit, name = res.name, arch = res.arch, flags = res.flags)
        bres = buildsres[parse(Float64, res.build)]
        if haskey(bres, name) && (bres[name] == nothing || bres[name].result != "passed")
            bres[name] = res
        end 
    end

    bad_builds = Set{Float64}()
    bad_builds_for_arch = Dict{String, Set{Float64}}()

    # Remove builds where many of the tests were not run as this usually means there was
    # an obvious problem with the testing infrastructure. Also remove all tests for a 
    # particular architecture in a build, if many tests on that arch returned no result.
    for (build, tests) in buildsres
        none_count = 0
        arch_none_count = Dict{String, Int}()
        arch_count = Dict{String, Int}()

        for (id, res) in tests
            if res == nothing || res.result == "none"
                none_count += 1
                arch_none_count[id.arch] = get(arch_none_count, id.arch, 0) + 1
            end
            arch_count[id.arch] = get(arch_count, id.arch, 0) + 1
        end

        if none_count / length(testnames) > bad_build_limit
            push!(bad_builds, build)
        else
            for (arch, count) in arch_none_count
                if count / arch_count[arch] > bad_arch_limit
                    for (id, res) in buildsres[build]
                        if id.arch == arch
                            push!(get(Set, bad_builds_for_arch, arch), build)
                        end
                    end
                end
            end
        end
    end

    for build in bad_builds
        delete!(buildsres, build)
    end

    failed_testnames = []
    final_tests = []
    for name in testnames
        boring = true
        none_count = 0

        statuses = Dict{String, Int}()
        for (build, results) in buildsres
            if build in get(bad_builds_for_arch, name.arch, Set())
                continue
            end

            res = results[name]
            if res == nothing
                statuses["none"] = get(statuses, "none", 0) + 1
            else
                statuses[res.result] = get(statuses, res.result, 0) + 1
            end
        end

        if length(keys(statuses)) > 1
            boring = false
        end

        if get(statuses, "none", 0) / length(buildsres) > bad_test_limit
            boring = true
        end
        
        final = length(buildsres) > 0 ? buildsres[end][name] : nothing
        if final ≠ nothing && final.result == "passed"
            boring = true
        end

        if boring
            for k in keys(buildsres)
                delete!(buildsres[k], name)
            end
        else
            push!(failed_testnames, name)
            push!(final_tests, final)
        end
    end

    # Put the results into columns for display in a table
    buildcols = [[let res = buildsres[build][name]
        res == nothing ? "none" : res.result
    end for name in failed_testnames] for build in keys(buildsres)]
    headers = [Symbol("Suit"), Symbol("Test"), Symbol("Arch"), Symbol.(keys(buildsres))...]

    (failed = failed_testnames, builds = buildcols, headers = headers, final_tests = final_tests)
end

In [None]:
ltp = build_diff_matrix(filter(tn -> tn.suit[1] == "LTP", collect(prodtestnames)), recentbuilds, prodres);

In [None]:
fstests = build_diff_matrix(filter(tn -> tn.suit[1] == "fstests", collect(prodtestnames)), recentbuilds, prodres);

In [None]:
ipa =  build_diff_matrix(cloudprodtestnames, cloudrecentbuilds, cloudprodres);

In [None]:
hpcres = filter(collect(prodtestnames)) do tn
    length(tn.suit) > 1 && tn.suit[1:2] == ["OpenQA", "HPC"]
end
hpc =  build_diff_matrix(hpcres, recentbuilds, prodres);

In [None]:
otherres = filter(collect(prodtestnames)) do tn
    suit = tn.suit[1]
    suit ≠ "LTP" && suit ≠ "fstests" && suit ≠ "IPA" && get(tn.suit, 2, nothing) ≠ "HPC"
end
others =  build_diff_matrix(otherres, recentbuilds, prodres);

## Results

Below are the diff matrices for various test suites

In [None]:
show_matrix(m) = withenv("LINES" => 200) do
    display(
        DataFrame([
                map(t -> join(t.suit, ":"), m.failed), 
                map(t -> t.name * " " * join(t.flags, " "), m.failed),
                map(t -> t.arch, m.failed), 
                m.builds...],
            m.headers)
    )
end

In [None]:
show_matrix(ltp)

In [None]:
show_matrix(fstests)

In [None]:
show_matrix(ipa)

In [None]:
show_matrix(hpc)

In [None]:
show_matrix(others)

## Notifications

Next we notify interested persons of the changes in test results. To limit the amount of noise, each test can only be included in a notification to the specified set of users once a month.

In [None]:
function maybe_notify(mat, settings)
    changed_tests = []
    mentions = String[]

    for (name, test) in zip(mat.failed, mat.final_tests)
        test_name = join(name.suit, ":") * ":$(name.name)"
        test_id = "$test_name@$(name.arch)[" * join(name.flags, ",") * "]"
        users = []
        users_key = join(user for (user, ))
        flag_key = "diff-notified-$test_id$users_key"
        oldres = Repository.get_temp_flag(flag_key)
        newres = test ≠ nothing ? test.result : "none"
        @debug test_id repr(oldres) newres
        if oldres ≠ newres
            push!(changed_tests, (test_id, test))
            Repository.set_temp_flag(flag_key, newres, Day(1))
        end
    end

    if !isempty(changed_tests)
        io = IOBuffer()
        println(io, "The following tests appear to have changed status recently:\n")

        for (test_id, test) in changed_tests
            if test ≠ nothing
                show(io, MIME("text/markdown"), test)
                println(io)
            else
                println(io, split(test_id, "[")[1])
            end
        end

        print(io, "\nSee the [Status Difference Report](https://rpalethorpe.io.suse.de/jdp/reports/Report-Status-Diff.html) for details")

        Spammer.post_message(Spammer.Message(String(take!(io)), users))
    end
    
    changed_tests
end

In [6]:
using JDP.Tracker
osd = Tracker.login("osd")

JDP.Trackers.OpenQA.Session("openqa.suse.de", `[4mopenqa-client[24m [4m--json-output[24m [4m--host[24m`, "C6C16EEAC002010A", "A411FEDE926A5A9A")

In [9]:
jgroup = OpenQA.get_job_group(osd, OpenQA.JobGroup(116))

JDP.Trackers.OpenQA.JobGroup(116, 15, "Kernel", "Linux Test Project (LTP), fstests, nvmftests, Trinity and other misc kernel tests.\r\n\r\nAutogenerated reports:\r\n\r\n* [General](https://rpalethorpe.io.suse.de/jdp/reports/Report-DataFrames.html)\r\n* [Milestone](https://rpalethorpe.io.suse.de/jdp/reports/#Rendered-Inline-1) (generated for the latest build, not necessarily the last milestone build)\r\n* [Bug Tag Propagation](https://rpalethorpe.io.suse.de/jdp/reports/Propagate%20Bug%20Tags.html)\r\n* [Status Diff](https://rpalethorpe.io.suse.de/jdp/reports/Report-Status-Diff.html)\r\n\r\n<code>\r\n[JDP.notify.on-status-diff] <br>\r\nrpalethorp = ['LTP', 'OpenQA'] <br>\r\nmetan = 'LTP' <br>\r\npvorel = 'LTP' <br>\r\nmmoese = ['nvmftests', 'LTP'] <br>\r\nlansuse = 'fstests' <br>\r\nyosun = 'fstests' <br>\r\ncfconrad = 'udev.no-partlabel-links' <br>\r\n</code>")

In [21]:
import TOML

In [23]:
tdict = map(TOML.parse(replace(match(r"<code>(.*)</code>"s, jgroup.description)[1], "<br>" => ""))) do 

Dict{AbstractString,Any} with 1 entry:
  "JDP" => Dict{AbstractString,Any}("notify"=>Dict{AbstractString,Any}("on-stat…

In [27]:
[k => v isa Vector ? v : [v] for (k, v) in tdict["JDP"]["notify"]["on-status-diff"]]

7-element Array{Pair{String,Array{String,1}},1}:
     "mmoese" => ["nvmftests", "LTP"]       
      "metan" => ["LTP"]                    
   "cfconrad" => ["udev.no-partlabel-links"]
      "yosun" => ["fstests"]                
    "lansuse" => ["fstests"]                
     "pvorel" => ["LTP"]                    
 "rpalethorp" => ["LTP", "OpenQA"]          

In [4]:
jgroup = Repository.fetch(OpenQA.JobGroup, "osd", 116)

└ @ Revise /home/rich/.julia/packages/Revise/UtBAC/src/lowered.jl:169
┌ Error: evaluation error
│   mod = JDP.Trackers.OpenQA
│   ex = no3 = NativeSession("https://openqa.opensuse.org")
│   exception = (ErrorException("expected return statement, got no3 = JuliaInterpreter.SSAValue(1)"), Union{Ptr{Nothing}, InterpreterIP}[Ptr{Nothing} @0x00007f24238f95eb, Ptr{Nothing} @0x00007f24238ca204, Ptr{Nothing} @0x00007f23dbf16060, Ptr{Nothing} @0x00007f24238ae5d6, Ptr{Nothing} @0x00007f23dbf2803f, Ptr{Nothing} @0x00007f23dbf1fd06, Ptr{Nothing} @0x00007f23dbf205e4, Ptr{Nothing} @0x00007f24238ae5d6, Ptr{Nothing} @0x00007f23dbf12075, Ptr{Nothing} @0x00007f24238ae5d6, Ptr{Nothing} @0x00007f23dbf10d40, Ptr{Nothing} @0x00007f23dbf10d94, Ptr{Nothing} @0x00007f23dbf10513, Ptr{Nothing} @0x00007f23dbf10b3f, Ptr{Nothing} @0x00007f24238af08c, Ptr{Nothing} @0x00007f24238ae5d6, Ptr{Nothing} @0x00007f23fb8f7807, Ptr{Nothing} @0x00007f23fb8f5490, Ptr{Nothing} @0x00007f24238ae5d6, Ptr{Nothing} @0x00007f24238bdc6

MethodError: MethodError: Cannot `convert` an object of type Nothing to an object of type JDP.Trackers.OpenQA.JobGroup
Closest candidates are:
  convert(::Type{S}, !Matched::T<:(Union{CategoricalString{R}, CategoricalValue{T,R} where T} where R)) where {S, T<:(Union{CategoricalString{R}, CategoricalValue{T,R} where T} where R)} at /home/rich/.julia/packages/CategoricalArrays/ucKV2/src/value.jl:91
  convert(::Type{T}, !Matched::T) where T at essentials.jl:154
  JDP.Trackers.OpenQA.JobGroup(::Any, !Matched::Any, !Matched::Any, !Matched::Any) at /home/rich/julia/jdp/src/trackers/OpenQA.jl:180

In [None]:
changes = maybe_notify(ltp, ["rpalethorpe", "metan", "pvorel", "sebchlad", "mmoese"])
md"Sent **$(length(changes))** change notifications"

In [None]:
changes = maybe_notify(fstests, ["lansuse", "yosun"])
md"Sent **$(length(changes))** change notifications"

In [None]:
changes = maybe_notify(ipa, ["cfconrad", "jlausuch"])
md"Sent **$(length(changes))** change notifications"

In [None]:
changes = maybe_notify(hpc, ["sebchlad"])
md"Sent **$(length(changes))** change notifications"

In [None]:
changes = maybe_notify(others, ["rpalethorpe"])
md"Sent **$(length(changes))** change notifications"