Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions crates/simd/src/bit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ mod reduce_sum_of_and {
}
}

#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")]
pub fn reduce_sum_of_and(lhs: &[u64], rhs: &[u64]) -> u32 {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand Down Expand Up @@ -366,7 +366,7 @@ mod reduce_sum_of_or {
}
}

#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")]
pub fn reduce_sum_of_or(lhs: &[u64], rhs: &[u64]) -> u32 {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand Down Expand Up @@ -549,7 +549,7 @@ mod reduce_sum_of_xor {
}
}

#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")]
pub fn reduce_sum_of_xor(lhs: &[u64], rhs: &[u64]) -> u32 {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand Down Expand Up @@ -772,7 +772,7 @@ mod reduce_sum_of_and_or {
}
}

#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")]
pub fn reduce_sum_of_and_or(lhs: &[u64], rhs: &[u64]) -> (u32, u32) {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand Down Expand Up @@ -933,7 +933,7 @@ mod reduce_sum_of_x {
}
}

#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(@"v4:avx512vpopcntdq", @"v4", @"v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")]
pub fn reduce_sum_of_x(this: &[u64]) -> u32 {
let n = this.len();
let mut sum = 0;
Expand All @@ -950,7 +950,9 @@ pub fn vector_and(lhs: &[u64], rhs: &[u64]) -> Vec<u64> {
}

mod vector_and {
#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_and(lhs: &[u64], rhs: &[u64]) -> Vec<u64> {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand All @@ -973,7 +975,9 @@ pub fn vector_or(lhs: &[u64], rhs: &[u64]) -> Vec<u64> {
}

mod vector_or {
#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_or(lhs: &[u64], rhs: &[u64]) -> Vec<u64> {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand All @@ -996,7 +1000,9 @@ pub fn vector_xor(lhs: &[u64], rhs: &[u64]) -> Vec<u64> {
}

mod vector_xor {
#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_xor(lhs: &[u64], rhs: &[u64]) -> Vec<u64> {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand Down
68 changes: 50 additions & 18 deletions crates/simd/src/f16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ impl Floating for f16 {
mod reduce_or_of_is_zero_x {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn reduce_or_of_is_zero_x(this: &[f16]) -> bool {
for &x in this {
if x == f16::ZERO {
Expand All @@ -177,7 +179,9 @@ mod reduce_sum_of_x {

use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn reduce_sum_of_x(this: &[f16]) -> f32 {
let n = this.len();
let mut x = 0.0f32;
Expand All @@ -193,7 +197,9 @@ mod reduce_sum_of_abs_x {

use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn reduce_sum_of_abs_x(this: &[f16]) -> f32 {
let n = this.len();
let mut x = 0.0f32;
Expand All @@ -209,7 +215,9 @@ mod reduce_sum_of_x2 {

use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn reduce_sum_of_x2(this: &[f16]) -> f32 {
let n = this.len();
let mut x2 = 0.0f32;
Expand All @@ -225,7 +233,9 @@ mod reduce_min_max_of_x {

use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn reduce_min_max_of_x(this: &[f16]) -> (f32, f32) {
let mut min = f32::INFINITY;
let mut max = f32::NEG_INFINITY;
Expand Down Expand Up @@ -501,7 +511,7 @@ mod reduce_sum_of_xy {
}
}

#[crate::multiversion(@"v4:avx512fp16", @"v4", @"v3", #[cfg(target_endian = "little")] @"a3.512", @"a2:fp16", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(@"v4:avx512fp16", @"v4", @"v3", #[cfg(target_endian = "little")] @"a3.512", @"a2:fp16", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")]
pub fn reduce_sum_of_xy(lhs: &[f16], rhs: &[f16]) -> f32 {
assert!(lhs.len() == rhs.len());
let n = lhs.len();
Expand Down Expand Up @@ -784,7 +794,7 @@ mod reduce_sum_of_d2 {
}
}

#[crate::multiversion(@"v4:avx512fp16", @"v4", @"v3", #[cfg(target_endian = "little")] @"a3.512", @"a2:fp16", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(@"v4:avx512fp16", @"v4", @"v3", #[cfg(target_endian = "little")] @"a3.512", @"a2:fp16", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7")]
pub fn reduce_sum_of_d2(lhs: &[f16], rhs: &[f16]) -> f32 {
assert!(lhs.len() == rhs.len());
let n = lhs.len();
Expand All @@ -803,7 +813,9 @@ mod reduce_sum_of_xy_sparse {

use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn reduce_sum_of_xy_sparse(lidx: &[u32], lval: &[f16], ridx: &[u32], rval: &[f16]) -> f32 {
use std::cmp::Ordering;
assert_eq!(lidx.len(), lval.len());
Expand Down Expand Up @@ -836,7 +848,9 @@ mod reduce_sum_of_d2_sparse {

use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn reduce_sum_of_d2_sparse(lidx: &[u32], lval: &[f16], ridx: &[u32], rval: &[f16]) -> f32 {
use std::cmp::Ordering;
assert_eq!(lidx.len(), lval.len());
Expand Down Expand Up @@ -875,7 +889,9 @@ mod reduce_sum_of_d2_sparse {
mod vector_add {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_add(lhs: &[f16], rhs: &[f16]) -> Vec<f16> {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand All @@ -895,7 +911,9 @@ mod vector_add {
mod vector_add_inplace {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_add_inplace(lhs: &mut [f16], rhs: &[f16]) {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand All @@ -908,7 +926,9 @@ mod vector_add_inplace {
mod vector_sub {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_sub(lhs: &[f16], rhs: &[f16]) -> Vec<f16> {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand All @@ -928,7 +948,9 @@ mod vector_sub {
mod vector_mul {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_mul(lhs: &[f16], rhs: &[f16]) -> Vec<f16> {
assert_eq!(lhs.len(), rhs.len());
let n = lhs.len();
Expand All @@ -948,7 +970,9 @@ mod vector_mul {
mod vector_mul_scalar {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_mul_scalar(lhs: &[f16], rhs: f32) -> Vec<f16> {
let rhs = f16::from_f32(rhs);
let n = lhs.len();
Expand All @@ -968,7 +992,9 @@ mod vector_mul_scalar {
mod vector_mul_scalar_inplace {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_mul_scalar_inplace(lhs: &mut [f16], rhs: f32) {
let rhs = f16::from_f32(rhs);
let n = lhs.len();
Expand All @@ -981,7 +1007,9 @@ mod vector_mul_scalar_inplace {
mod vector_abs_inplace {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_abs_inplace(this: &mut [f16]) {
let n = this.len();
for i in 0..n {
Expand All @@ -993,7 +1021,9 @@ mod vector_abs_inplace {
mod vector_from_f32 {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_from_f32(this: &[f32]) -> Vec<f16> {
let n = this.len();
let mut r = Vec::<f16>::with_capacity(n);
Expand All @@ -1012,7 +1042,9 @@ mod vector_from_f32 {
mod vector_to_f32 {
use super::*;

#[crate::multiversion("v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13")]
#[crate::multiversion(
"v4", "v3", "v2", "a2", "z17", "z16", "z15", "z14", "z13", "p9", "p8", "p7"
)]
pub fn vector_to_f32(this: &[f16]) -> Vec<f32> {
let n = this.len();
let mut r = Vec::<f32>::with_capacity(n);
Expand Down
Loading
Loading