timholy · timholy · Aug 24, 2020 · Aug 24, 2020
diff --git a/demos/abstract.jl b/demos/abstract.jl
@@ -0,0 +1,155 @@
+using MethodAnalysis
+
+
+"""
+    atrisktyp(tt)
+
+Given a Tuple-type signature (e.g., `Tuple{typeof(sum),Vector{Int}}`), determine whether this signature
+is "at risk" for invalidation. Essentially it returns `true` if one or more arguments are of abstract type,
+although there are prominent exceptions:
+
+- `Function` is allowed
+- any constructor call is allowed
+- `convert(X, x)` where `isa(x, X)` is true
+- `setindex!` and `push!` methods where the valtype is a subtype of the eltype (likewise keytype for AbstractDicts)
+- `getindex`, `length`, `isempty`, and `iterate` on any tuple
+"""
+function atrisktype(@nospecialize(typ))
+    # signatures like `convert(Vector, a)`, `foo(::Vararg{Synbol,N}) where N` do not seem to pose a problem
+    isa(typ, TypeVar) && return false
+    # isbits parameters are not a problem
+    isa(typ, Type) || return false
+    if isa(typ, UnionAll)
+        typ = Base.unwrap_unionall(typ)
+    end
+    # Exclude signatures with Union{}
+    typ === Union{} && return false
+    isa(typ, Union) && return atrisktype(typ.a) | atrisktype(typ.b)
+    # Type{T}: signatures like `convert(::Type{AbstractString}, ::String)` are not problematic
+    typ <: Type && return false
+    if typ <: Tuple && length(typ.parameters) >= 1
+        p1 = typ.parameters[1]
+        # Constructor calls are not themselves a problem (any `convert`s they trigger might be, but those are covered)
+        isa(p1, Type) && p1 <: Type && return false
+        # convert(::Type{T}, ::S) where S<:T is not problematic
+        if p1 === typeof(Base.convert) || p1 === typeof(Core.convert)
+            p2, p3 = typ.parameters[2], typ.parameters[3]
+            if isa(p2, Type)
+                p2 = Base.unwrap_unionall(p2)
+                if isa(p2, DataType) && length(p2.parameters) === 1
+                    T = p2.parameters[1]
+                    isa(p3, Type) && isa(T, Type) && p3 <: T && return false
+                end
+            end
+        # `getindex`, `length`, etc are OK for various Tuple{T1,T2,...}
+        elseif p1 === typeof(Base.getindex) ||
+               p1 === typeof(Base.length)  ||
+               p1 === typeof(Base.isempty) ||
+               p1 === typeof(Base.iterate) || p1 === typeof(Core.iterate)
+            p2 = typ.parameters[2]
+            if isa(p2, Type)
+                p2 = Base.unwrap_unionall(p2)
+                p2 <: Tuple && return false
+            end
+        # show(io::IO, x) is OK as long as typeof(x) is safe
+        elseif p1 === typeof(Base.show) || p1 === typeof(Base.print) || p1 === typeof(Base.println)
+            # atrisktype(typ.parameters[2]) && return true
+            for i = 3:length(typ.parameters)
+                atrisktype(typ.parameters[i]) && return true
+            end
+            return false
+        # setindex!(a, x, idx) and push!(a, x) are safe if typeof(x) <: eltype(a)
+        elseif (p1 === typeof(Base.setindex!) || p1 === typeof(Base.push!)) && length(typ.parameters) >= 3
+            p2, p3 = typ.parameters[2], typ.parameters[3]
+            if isconcretetype(p2)
+                if p2 <: AbstractDict && length(typ.parameters) >= 4
+                    p4 = typ.parameters[4]
+                    p3 <: valtype(p2) && p4 <: keytype(p2) && return false
+                else
+                    p3 <: eltype(p2) && return false
+                end
+            end
+        end
+    end
+    # Standard DataTypes
+    isconcretetype(typ) && return false
+    # ::Function args are excluded
+    typ === Function && return false
+    !isempty(typ.parameters) && (any(atrisktype, typ.parameters) || return false)
+    return true
+end
+
+@assert  atrisktype(Tuple{typeof(==),Any,Any})
+@assert  atrisktype(Tuple{typeof(==),Symbol,Any})
+@assert  atrisktype(Tuple{typeof(==),Any,Symbol})
+@assert !atrisktype(Tuple{typeof(==),Symbol,Symbol})
+@assert !atrisktype(Tuple{typeof(convert),Type{Any},Any})
+@assert !atrisktype(Tuple{typeof(convert),Type{AbstractString},AbstractString})
+@assert !atrisktype(Tuple{typeof(convert),Type{AbstractString},String})
+@assert  atrisktype(Tuple{typeof(convert),Type{String},AbstractString})
+@assert !atrisktype(Tuple{typeof(map),Function,Vector{Any}})
+@assert !atrisktype(Tuple{typeof(getindex),Dict{Union{String,Int},Any},Union{String,Int}})
+@assert  atrisktype(Tuple{typeof(getindex),Dict{Union{String,Int},Any},Any})
+@assert !atrisktype(Tuple{Type{BoundsError},Any,Any})
+@assert  atrisktype(Tuple{typeof(sin),Any})
+@assert !atrisktype(Tuple{typeof(length),Tuple{Any,Any}})
+@assert  atrisktype(Tuple{typeof(setindex!),Vector{Int},Any,Int})
+@assert !atrisktype(Tuple{typeof(setindex!),Vector{Any},Any,Int})
+@assert  atrisktype(Tuple{typeof(push!),Vector{Int},Any})
+@assert !atrisktype(Tuple{typeof(push!),Vector{Any},Any})
+
+isexported(mi::Core.MethodInstance) = isdefined(Main, mi.def.name)
+getfunc(mi::Core.MethodInstance) = getfunc(mi.def)
+getfunc(m::Method) = getfield(m.module, m.name)
+nmethods(mi::Core.MethodInstance) = length(methods(getfunc(mi)))
+
+# Test whether a module is Core.Compiler or inside it
+# Methods there are protected from invalidation by other means
+function fromcc(mod::Module)
+    fn = fullname(mod)
+    return length(fn) >= 2 && fn[1] === :Core && fn[2] === :Compiler
+end
+
+const mis = Dict{Method,Vector{Core.MethodInstance}}()
+visit() do item
+    if item isa Method && !fromcc(item.module)
+        m = item
+        mis[m] = methodinstances(m)
+        return false
+    end
+    return true
+end
+
+# Count # of backedges for MethodInstances with abstract types
+const becounter = Dict{Core.MethodInstance,Int}()
+visit() do item
+    if item isa Core.MethodInstance && !fromcc(item.def.module)
+        if atrisktype(item.specTypes)
+            becounter[item] = length(all_backedges(item))
+        end
+        return false
+    end
+    return true
+end
+
+prs = sort!(collect(becounter); by=last)
+open("/tmp/methdata_$VERSION.log", "w") do io
+    for (mi, c) in prs
+        c == 0 && continue
+        println(io, mi.specTypes=>c)
+    end
+end
+
+# Split into exported & private functions
+mtup = (nmethods = 0, nbackedges = 0)
+miexp = Pair{Core.MethodInstance,typeof(mtup)}[]
+mipriv = similar(miexp)
+for (mi, c) in prs
+    n = nmethods(mi)
+    pr = mi=>(nmethods=n, nbackedges=c)
+    if isexported(mi)
+        push!(miexp, pr)
+    else
+        push!(mipriv, pr)
+    end
+end
diff --git a/demos/abstract_analyze_versions.jl b/demos/abstract_analyze_versions.jl
@@ -1,3 +1,7 @@
+# Do `include("abstract.jl")` on two Julia versions and then run this script.
+# You will likely have to change the path for `sigstable` below.
+# Typically this should be run using the in-development version of Julia (or whatever "latest" is in your comparison)
+
 using PyPlot
 
 function parseline(line)
@@ -41,10 +45,23 @@ function split_comparable(sigc1, sigc2)
     return sigs, c1, c2
 end
 
-sigc16 = parsedata("/tmp/methdata_$VERSION.log")
-sigc14 = parsedata("/tmp/methdata_1.4.3-pre.0.log")
+function tally0(c1, c2)
+    nz1, nz2 = 0, 0
+    for (a1, a2) in zip(c1, c2)
+        a1 == a2 == 0 && continue
+        a1 == 0 && (nz1 += 1)
+        a2 == 0 && (nz2 += 1)
+    end
+    return nz1, nz2
+end
+
+sigmaster = parsedata("/tmp/methdata_$VERSION.log")
+sigstable, stablever = parsedata("/tmp/methdata_1.5.1-pre.29.log"), "1.5"
+# sigstable, stablever = parsedata("/tmp/methdata_1.4.2.log"), "1.4"
 
-sigs, c1, c2 = split_comparable(sigc14, sigc16)
+sigs, c1, c2 = split_comparable(sigstable, sigmaster)
+nz1, nz2 = tally0(c1, c2)
+println("$stablever has $nz1 with no backedges, master has $nz2")
 mx1, mx2 = maximum(c1), maximum(c2)
 isexported(sig) = (ft = Base.unwrap_unionall(sig).parameters[1]; isdefined(Main, ft.name.mt.name))
 colors = [isexported(sig) ? "magenta" : "green" for sig in sigs]
@@ -56,52 +73,26 @@ function on_click(event)
     println(sigs[idx])
 end
 begin
+    hfig, axs = plt.subplots(2, 1)
+    plt.subplots_adjust(hspace=0.3)
+    logedges = LinRange(0, log10(max(mx1, mx2)+2), 30)
+    ax = axs[1]
+    ax.hist(log10.(c1 .+ 1), bins=logedges)
+    ax.set_xlabel("log₁₀(# backedges + 1), $stablever")
+    ax.set_ylabel("# 'at risk' signatures")
+    ax = axs[2]
+    ax.hist(log10.(c2 .+ 1), bins=logedges)
+    ax.set_xlabel("log₁₀(# backedges + 1), 1.6")
+    ax.set_ylabel("# 'at risk' signatures")
+
+    display(hfig)
     fig, ax = plt.subplots()
     ax.scatter(c1 .+ 1, c2 .+ 1, c=colors)  # + 1 for the log-scaling
-    ax.set_xlabel("# backedges + 1, 1.4")
+    ax.set_xlabel("# backedges + 1, $stablever")
     ax.set_ylabel("# backedges + 1, 1.6")
     ax.set_xscale("log")
     ax.set_yscale("log")
+    ax.set_aspect("equal")
     fig.canvas.callbacks.connect("button_press_event", on_click)
     fig
 end
-
-# Ones we've made progress on:
-# ==(::Any, Symbol)
-# ==(::Symbol, ::Any)
-# ==(::Any, ::Nothing)
-# ==(::UUID, ::Any)
-# ==(::AbstractString, ::String)
-# isequal(::Symbol, ::Any)
-# isequal(::Any, ::Symbol)
-# isequal(::Any, ::Nothing)
-# isequal(::UUID, ::Any)
-# cmp(::AbstractString, ::String)
-# convert(::Type{Int}, ::Integer)
-# convert(::Type{UInt}, ::Integer)
-# convert(::Type{Union{Nothing,Module}}, ::Any)
-# Base.to_index(::Integer)
-# iterate(::Base.OneTo, ::Any)
-# repr(::Any)
-# thisind(::AbstractString, ::Int)
-# getindex(::String, ::Any)
-# string(::String, ::Integer, ::String)
-# ^(::String, ::Integer)
-# repeat(::String, ::Integer)
-# Base.isidentifier(::AbstractString)
-# +(::Ptr{UInt8}, ::Integer)
-# Base._show_default(::Base.GenericIOBuffer{Array{UInt8,1}}, ::Any)
-
-# Ones that are better but I don't remember helping with
-# isconcretetype(::Any)
-# pointer(::String, ::Integer)
-
-# Regressions:
-# basename(::AbstractString)
-# splitdir(::AbstractString)
-# isfile(::Any)
-# joinpath(::AbstractString, ::String)
-# sizeof(::Unsigned)
-# +(::Int, ::Any, ::Any)
-# Base.split_sign(::Integer)
-# in(::Any, ::Tuple{Symbol})