Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CombineFeatures for faster lookups. #120

Merged
merged 1 commit into from
Oct 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ Package home: https://github.com/klauspost/cpuid

## installing

`go get -u github.com/klauspost/cpuid/v2` using modules.

`go get -u github.com/klauspost/cpuid/v2` using modules.
Drop `v2` for others.

## example
Expand Down Expand Up @@ -77,10 +76,14 @@ We have Streaming SIMD 2 Extensions
The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.

To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc.
This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported.

Note that for some cpu/os combinations some features will not be detected.
`amd64` has rather good support and should work reliably on all platforms.

Note that hypervisors may not pass through all CPU features.
Note that hypervisors may not pass through all CPU features through to the guest OS,
so even if your host supports a feature it may not be visible on guests.

## arm64 feature detection

Expand Down
51 changes: 39 additions & 12 deletions cpuid.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool {

// Has allows for checking a single feature.
// Should be inlined by the compiler.
func (c CPUInfo) Has(id FeatureID) bool {
func (c *CPUInfo) Has(id FeatureID) bool {
return c.featureSet.inSet(id)
}

Expand All @@ -381,26 +381,43 @@ func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
return false
}

// Features contains several features combined for a fast check using
// CpuInfo.HasAll
type Features *flagSet

// CombineFeatures allows to combine several features for a close to constant time lookup.
func CombineFeatures(ids ...FeatureID) Features {
var v flagSet
for _, id := range ids {
v.set(id)
}
return &v
}

func (c *CPUInfo) HasAll(f Features) bool {
return c.featureSet.hasSetP(f)
}

// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)

// X64Level returns the microarchitecture level detected on the CPU.
// If features are lacking or non x64 mode, 0 is returned.
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
func (c CPUInfo) X64Level() int {
if c.featureSet.hasSet(level4Features) {
if c.featureSet.hasSetP(level4Features) {
return 4
}
if c.featureSet.hasSet(level3Features) {
if c.featureSet.hasSetP(level3Features) {
return 3
}
if c.featureSet.hasSet(level2Features) {
if c.featureSet.hasSetP(level2Features) {
return 2
}
if c.featureSet.hasSet(level1Features) {
if c.featureSet.hasSetP(level1Features) {
return 1
}
return 0
Expand Down Expand Up @@ -564,7 +581,7 @@ const flagMask = flagBits - 1
// flagSet contains detected cpu features and characteristics in an array of flags
type flagSet [(lastID + flagMask) / flagBits]flags

func (s flagSet) inSet(feat FeatureID) bool {
func (s *flagSet) inSet(feat FeatureID) bool {
return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
}

Expand Down Expand Up @@ -594,7 +611,17 @@ func (s *flagSet) or(other flagSet) {
}

// hasSet returns whether all features are present.
func (s flagSet) hasSet(other flagSet) bool {
func (s *flagSet) hasSet(other flagSet) bool {
for i, v := range other[:] {
if s[i]&v != v {
return false
}
}
return true
}

// hasSet returns whether all features are present.
func (s *flagSet) hasSetP(other *flagSet) bool {
for i, v := range other[:] {
if s[i]&v != v {
return false
Expand All @@ -604,7 +631,7 @@ func (s flagSet) hasSet(other flagSet) bool {
}

// nEnabled will return the number of enabled flags.
func (s flagSet) nEnabled() (n int) {
func (s *flagSet) nEnabled() (n int) {
for _, v := range s[:] {
n += bits.OnesCount64(uint64(v))
}
Expand Down
40 changes: 40 additions & 0 deletions cpuid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,43 @@ func ExampleCPUInfo_Ia32TscAux() {
core := ecx & 0xFFF
fmt.Println("Chip, Core:", chip, core)
}

func TestCombineFeatures(t *testing.T) {
cpu := CPU
for i := FeatureID(0); i < lastID; i++ {
if cpu.Has(i) != cpu.HasAll(CombineFeatures(i)) {
t.Errorf("id %d:%s mismatch", i, i.String())
}
}
}

func BenchmarkFlags(b *testing.B) {
var a bool
var cpu = CPU
b.Run("ids", func(b *testing.B) {
for i := 0; i < b.N; i++ {
a = cpu.Supports(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) || a
}
_ = a
})
b.Run("features", func(b *testing.B) {
f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
for i := 0; i < b.N; i++ {
a = cpu.HasAll(f) || a
}
_ = a
})
b.Run("id", func(b *testing.B) {
for i := 0; i < b.N; i++ {
a = cpu.Has(CMOV) || a
}
_ = a
})
b.Run("feature", func(b *testing.B) {
f := CombineFeatures(CMOV)
for i := 0; i < b.N; i++ {
a = cpu.HasAll(f) || a
}
_ = a
})
}