Skip to content

Commit

Permalink
cmd/compile/internal/ssa: tighten non-faulting loads
Browse files Browse the repository at this point in the history
This change introduces alias analysis into the ssa
backend, and uses it to tighten some loads with their uses.

Since stack loads are non-faulting, tighten is now often able
to postpone loading function return values until their uses.
Given code like

    res, ok := fn()
    if !ok {
        return
    }
    // use res

The 'res' return value won't be loaded until after examining 'ok.'

Fixed golang#19195

Benchmarks on linux/arm:
name                     old time/op    new time/op     delta
BinaryTree17-4              31.1s ± 0%      30.9s ± 0%   -0.39%  (p=0.001 n=9+8)
Fannkuch11-4                13.9s ± 0%      14.1s ± 0%   +0.90%  (p=0.000 n=8+9)
FmtFprintfEmpty-4           666ns ± 5%      674ns ± 4%     ~     (p=0.424 n=10+10)
FmtFprintfString-4         1.09µs ± 4%     1.12µs ± 4%   +2.62%  (p=0.027 n=10+10)
FmtFprintfInt-4            1.22µs ± 3%     1.16µs ± 3%   -4.37%  (p=0.000 n=10+10)
FmtFprintfIntInt-4         1.74µs ± 2%     1.64µs ± 3%   -6.00%  (p=0.000 n=10+10)
FmtFprintfPrefixedInt-4    1.75µs ± 1%     1.69µs ± 3%   -3.80%  (p=0.000 n=9+10)
FmtFprintfFloat-4          3.27µs ± 1%     3.41µs ± 1%   +4.33%  (p=0.000 n=10+9)
FmtManyArgs-4              6.49µs ± 1%     6.23µs ± 2%   -4.10%  (p=0.000 n=10+10)
GobDecode-4                87.1ms ± 1%     75.5ms ± 1%  -13.37%  (p=0.000 n=10+10)
GobEncode-4                69.4ms ± 1%     69.7ms ± 1%     ~     (p=0.190 n=10+10)
Gzip-4                      3.56s ± 1%      3.57s ± 1%     ~     (p=0.053 n=10+9)
Gunzip-4                    446ms ± 2%      442ms ± 2%     ~     (p=0.123 n=10+10)
HTTPClientServer-4         1.51ms ± 1%     1.55ms ± 3%   +2.26%  (p=0.001 n=8+9)
JSONEncode-4                191ms ± 1%      175ms ± 2%   -8.33%  (p=0.000 n=10+10)
JSONDecode-4                798ms ± 1%      835ms ± 1%   +4.65%  (p=0.000 n=10+10)
Mandelbrot200-4            33.6ms ± 0%     33.6ms ± 0%     ~     (p=0.068 n=8+10)
GoParse-4                  42.4ms ± 1%     42.5ms ± 1%     ~     (p=0.190 n=10+10)
RegexpMatchEasy0_32-4       829ns ± 1%      853ns ± 1%   +2.98%  (p=0.000 n=9+8)
RegexpMatchEasy0_1K-4      4.04µs ± 1%     4.03µs ± 1%     ~     (p=0.986 n=10+10)
RegexpMatchEasy1_32-4       889ns ± 2%      900ns ± 5%     ~     (p=0.566 n=10+10)
RegexpMatchEasy1_1K-4      6.01µs ± 2%     6.15µs ± 2%   +2.29%  (p=0.000 n=9+9)
RegexpMatchMedium_32-4     1.35µs ± 3%     1.39µs ± 4%   +2.26%  (p=0.018 n=9+10)
RegexpMatchMedium_1K-4      357µs ± 9%      352µs ± 2%     ~     (p=0.968 n=10+9)
RegexpMatchHard_32-4       22.2µs ± 6%     22.6µs ± 6%     ~     (p=0.161 n=9+9)
RegexpMatchHard_1K-4        652µs ± 4%      664µs ± 4%   +1.91%  (p=0.028 n=9+10)
Revcomp-4                  51.4ms ± 1%     51.3ms ± 2%     ~     (p=0.353 n=10+10)
Template-4                  1.17s ± 2%      1.06s ± 2%   -9.39%  (p=0.000 n=10+10)
TimeParse-4                4.44µs ± 1%     4.46µs ± 1%   +0.50%  (p=0.003 n=9+10)
TimeFormat-4               9.30µs ± 1%     9.33µs ± 1%     ~     (p=0.197 n=10+10)
[Geo mean]                  557µs           553µs        -0.82%

Change-Id: Ibe35216e1bc3b0ff937cee9b3bac64f40f087b61
  • Loading branch information
philhofer committed Mar 20, 2017
1 parent 2805d20 commit e456900
Show file tree
Hide file tree
Showing 3 changed files with 529 additions and 2 deletions.
295 changes: 295 additions & 0 deletions src/cmd/compile/internal/ssa/alias.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
package ssa

import "fmt"

type aliasAnalysis struct {
// Alias analysis tries to map
// addresses to memory partitions.
// A partition is a region of memory
// that is known to be distinct from
// other memory partitions; pointers
// based on two different partitions
// do not alias. A pointer based on a
// partition and a pointer with an unknown
// partition may alias.
idinfo []int32 // map value.ID to index in info
info []ptrinfo // partition info
partitions int32 // total number of partitions
}

type pointerFlags int32

const (
flagAlloc pointerFlags = 1 << iota // this pointer is from an allocation
)

type ptrinfo struct {
partition int32
flags pointerFlags
}

func (p *ptrinfo) isAlloc() bool { return p.flags&flagAlloc != 0 }

func (a *aliasAnalysis) partition(v *Value) int32 {
idx := a.idinfo[v.ID] - 1
if idx < 0 {
return -1
}
return a.info[idx].partition
}

func (a *aliasAnalysis) isAlloc(v *Value) bool {
idx := a.idinfo[v.ID] - 1
if idx < 0 {
return false
}
return a.info[idx].isAlloc()
}

func (a *aliasAnalysis) addPointer(id ID, flags pointerFlags) {
part := a.partitions
a.partitions++
a.info = append(a.info, ptrinfo{part, flags})
a.idinfo[id] = int32(len(a.info))
}

func (a *aliasAnalysis) setEquivalent(old ID, ptr ID) {
a.idinfo[ptr] = a.idinfo[old]
}

// Table of functions known to produce unique pointers.
// The return value is at offset
// byteoff+(ptroff * Frontend().TypeBytePtr().Size())
var knownAllocs = []struct {
byteoff int64 // bytes to add to frame address
ptroff int64 // pointer widths to add to frame address
name string // symbol name
}{
{ptroff: 1, byteoff: 0, name: "runtime.newobject"}, // newobject(*_type) unsafe.Pointer
{ptroff: 3, byteoff: 0, name: "runtime.makeslice"}, // makeslice(*_type, int, int) slice
{ptroff: 1, byteoff: 16, name: "runtime.makeslice64"}, // makeslice64(*_type, int64, int64) slice
{ptroff: 2, byteoff: 0, name: "runtime.newarray"}, // newarray(*_type, int) unsafe.Pointer
{ptroff: 5, byteoff: 0, name: "runtime.growslice"}, // growslice(*_type, slice, int) slice
}

// Is this the return value of a function known
// to produce unique pointers? If so, return
// the value ID of the call site.
func isunique(v *Value, ptrsize int64) (ID, bool) {
// match (Load (OffPtr [off] (SP)) (StaticCall {sym}))
if v.Op == OpLoad && v.Args[0].Op == OpOffPtr &&
v.Args[1].Op == OpStaticCall && v.Args[0].Args[0].Op == OpSP {
off := v.Args[0].AuxInt
sym := v.Args[1].Aux

for _, known := range knownAllocs {
argoff := (known.ptroff * ptrsize) + known.byteoff
if off == argoff && isSameSym(sym, known.name) {
return v.Args[1].ID, true
}
}
}
return 0, false
}

func (aa *aliasAnalysis) init(f *Func) {
aa.idinfo = make([]int32, f.NumValues())
aa.info = make([]ptrinfo, 0, 20)

// guard against symbols being matched more than once
sympart := make(map[string]ID)
ptrsize := f.Config.Types.BytePtr.Size()
lastsp := ID(0)

// For now, do a coarse single-pass analysis.
// Partitions are:
// - Each allocation
// - The stack at and below SP
// - Each symbol (auto symbols, arg symbols, and globals)
//
// TODO: see which pointers in which partitions are
// stored into other data structures or passed into other
// functions. It may be very profitable to be able to
// prove that some values (e.g. locals that never have
// their address taken) are not call-clobbered.
for _, b := range f.Blocks {
for _, v := range b.Values {
switch v.Op {
case OpLoad:
if vid, ok := isunique(v, ptrsize); ok {
aa.addPointer(vid, flagAlloc)
}
case OpSP:
if lastsp == 0 {
aa.addPointer(v.ID, 0)
} else {
aa.setEquivalent(lastsp, v.ID)
}
lastsp = v.ID
case OpAddr:
old, ok := sympart[symname(v.Aux)]
if !ok {
sympart[symname(v.Aux)] = v.ID
aa.addPointer(v.ID, 0)
} else {
aa.setEquivalent(old, v.ID)
}
}
}
}
}

// peel away OffPtr and Copy ops
func offsplit(a *Value) (ID, int64) {
var off int64
outer:
for {
switch a.Op {
case OpOffPtr:
off += a.AuxInt
fallthrough
case OpCopy:
a = a.Args[0]
default:
break outer
}
}
return a.ID, off
}

const (
mustNotAlias = -1 // pointers must be distinct
mayAlias = 0 // pointers may or may not be distinct
mustAlias = 1 // pointers are identical
)

func overlap(off0, width0, off1, width1 int64) bool {
if off0 > off1 {
off0, width0, off1, width1 = off1, width1, off0, width0
}
return off0+width0 > off1
}

// alias returns the relationship between two pointer values and their
// load/store widths. One of mustNotAlias, mayAlias, and mustAlias will
// be returned. The null hypothesis is that two pointers may alias.
func (a *aliasAnalysis) alias(b *Value, bwidth int64, c *Value, cwidth int64) int {
if b == c {
if bwidth != cwidth {
return mayAlias
}
return mustAlias
}

bbase, cbase := ptrbase(b), ptrbase(c)
if bbase == cbase {
// two pointers derived from the same
// base pointer can be proven distinct
// (or identical) if they have constant offsets
// from a shared base
bid, boff := offsplit(b)
cid, coff := offsplit(c)
if bid == cid {
if boff == coff && bwidth == cwidth {
// identical addresses and widths
return mustAlias
}
if overlap(boff, bwidth, coff, cwidth) {
return mayAlias
}
return mustNotAlias
}
return mayAlias
}

// At this point, we know that the pointers
// come from distinct base pointers.
// Try to prove that the base pointers point
// to regions of memory that cannot alias.
bpart, cpart := a.partition(bbase), a.partition(cbase)
if bpart != cpart && bpart != -1 && cpart != -1 {
return mustNotAlias
}
if bpart == cpart {
return mayAlias
}

// Allocations cannot alias any pointer that
// the allocation itself does not dominate.
// No allocation dominates arguments.
sdom := b.Block.Func.sdom()
if a.isAlloc(bbase) && a.isAlloc(cbase) {
// We should have already handled this case.
b.Fatalf("new allocations should have different partitions")
}
if a.isAlloc(bbase) &&
(cbase.Op == OpArg || !sdom.isAncestorEq(bbase.Block, cbase.Block)) {
return mustNotAlias
}
if a.isAlloc(cbase) &&
(bbase.Op == OpArg || !sdom.isAncestorEq(cbase.Block, bbase.Block)) {
return mustNotAlias
}

return mayAlias
}

// given a load or store operation, return its width
func ptrwidth(v *Value) int64 {
if v.Op == OpLoad {
return v.Type.Size()
}
if !v.Type.IsMemory() {
v.Fatalf("expected memory, got %s", v.LongString())
}
t, ok := v.Aux.(Type)
if !ok {
v.Fatalf("aux for %s is not a Type", v.LongString())
}
return t.Size()
}

// find the base pointer of this address calculation
func ptrbase(v *Value) *Value {
for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
v = v.Args[0]
}
return v
}

func symname(sym interface{}) string {
return sym.(fmt.Stringer).String()
}

// clobbers return whether or not a memory-producing
// value must be ordered with respect to the given load
func (a *aliasAnalysis) clobbers(mem, load *Value) bool {
if mem.Op == OpPhi {
mem.Fatalf("unexpected Phi")
}
if mem.Op == OpSelect1 {
mem = mem.Args[0]
}
switch mem.Op {
case OpInitMem:
return true
case OpVarDef, OpVarKill, OpVarLive:
// VarDef/VarLive/VarKill clobber autotmp symbols.
// Figure out if the load references the same one.
base := ptrbase(load.Args[0])
return base.Op == OpAddr && base.Args[0].Op == OpSP && symname(base.Aux) == symname(mem.Aux)
case OpKeepAlive:
return mem.Args[0] == load
case OpCopy, OpConvert:
return false
}
if mem.MemoryArg() == nil {
mem.Fatalf("expected a memory op; got %s", mem.LongString())
}
// calls and atomic operations clobber everything
if opcodeTable[mem.Op].call || opcodeTable[mem.Op].hasSideEffects || mem.Type.IsTuple() {
return true
}
// at this point, mem must be a store operation
return a.alias(mem.Args[0], ptrwidth(mem), load.Args[0], ptrwidth(load)) != mustNotAlias
}
2 changes: 1 addition & 1 deletion src/cmd/compile/internal/ssa/loop_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func TestLoopConditionS390X(t *testing.T) {
Bloc("entry",
Valu("mem", OpInitMem, TypeMem, 0, nil),
Valu("SP", OpSP, TypeUInt64, 0, nil),
Valu("ret", OpAddr, TypeInt64Ptr, 0, nil, "SP"),
Valu("ret", OpAddr, TypeInt64Ptr, 0, c.Frontend().Auto(TypeInt64), "SP"),
Valu("N", OpArg, TypeInt64, 0, c.Frontend().Auto(TypeInt64)),
Valu("starti", OpConst64, TypeInt64, 0, nil),
Valu("startsum", OpConst64, TypeInt64, 0, nil),
Expand Down
Loading

0 comments on commit e456900

Please sign in to comment.