diff --git a/crates/prover/benches/eval_at_point.rs b/crates/prover/benches/eval_at_point.rs index 1b148edbc..4bda3228c 100644 --- a/crates/prover/benches/eval_at_point.rs +++ b/crates/prover/benches/eval_at_point.rs @@ -37,7 +37,10 @@ pub fn cpu_eval_at_secure_point(c: &mut criterion::Criterion) { let point = CirclePoint { x, y }; c.bench_function("cpu eval_at_secure_field_point 2^20", |b| { b.iter(|| { - black_box(::eval_at_point(&poly, point)); + black_box(::eval_at_points( + &[&poly], + &[vec![point]], + )); }) }); } @@ -79,7 +82,10 @@ pub fn avx512_eval_at_secure_point(c: &mut criterion::Criterion) { let point = CirclePoint { x, y }; c.bench_function("avx eval_at_secure_field_point 2^20", |b| { b.iter(|| { - black_box(::eval_at_point(&poly, point)); + black_box(::eval_at_points( + &[&poly], + &[vec![point]], + )); }) }); } diff --git a/crates/prover/src/core/backend/avx512/circle.rs b/crates/prover/src/core/backend/avx512/circle.rs index 922fc46d1..efaa93801 100644 --- a/crates/prover/src/core/backend/avx512/circle.rs +++ b/crates/prover/src/core/backend/avx512/circle.rs @@ -1,3 +1,5 @@ +use std::iter::zip; + use bytemuck::{cast_slice, Zeroable}; use num_traits::One; @@ -115,57 +117,11 @@ impl AVX512Backend { fn advance_twiddle(twiddle: F, steps: &[F], curr_idx: usize) -> F { twiddle * steps[curr_idx.trailing_ones() as usize] } -} - -// TODO(spapini): Everything is returned in redundant representation, where values can also be P. -// Decide if and when it's ok and what to do if it's not. -impl PolyOps for AVX512Backend { - // The twiddles type is i32, and not BaseField. This is because the fast AVX mul implementation - // requries one of the numbers to be shifted left by 1 bit. This is not a reduced - // representation of the field. - type Twiddles = Vec; - - fn new_canonical_ordered( - coset: CanonicCoset, - values: Col, - ) -> CircleEvaluation { - // TODO(spapini): Optimize. - let eval = CPUBackend::new_canonical_ordered(coset, as_cpu_vec(values)); - CircleEvaluation::new( - eval.domain, - Col::::from_iter(eval.values), - ) - } - - fn interpolate( - eval: CircleEvaluation, - twiddles: &TwiddleTree, - ) -> CirclePoly { - let mut values = eval.values; - let log_size = values.length.ilog2(); - - let twiddles = domain_line_twiddles_from_tree(eval.domain, &twiddles.itwiddles); - - // Safe because [PackedBaseField] is aligned on 64 bytes. - unsafe { - ifft::ifft( - std::mem::transmute(values.data.as_mut_ptr()), - &twiddles, - log_size as usize, - ); - } - - // TODO(spapini): Fuse this multiplication / rotation. - let inv = BaseField::from_u32_unchecked(eval.domain.size() as u32).inverse(); - let inv = PackedBaseField::from_array([inv; 16]); - for x in values.data.iter_mut() { - *x *= inv; - } - - CirclePoly::new(values) - } - fn eval_at_point(poly: &CirclePoly, point: CirclePoint) -> SecureField { + fn eval_at_point( + poly: &CirclePoly, + point: CirclePoint, + ) -> SecureField { // If the polynomial is small, fallback to evaluate directly. // TODO(Ohad): it's possible to avoid falling back. Consider fixing. if poly.log_size() <= 8 { @@ -212,6 +168,74 @@ impl PolyOps for AVX512Backend { (sum * twiddle_lows).pointwise_sum() } +} + +// TODO(spapini): Everything is returned in redundant representation, where values can also be P. +// Decide if and when it's ok and what to do if it's not. +impl PolyOps for AVX512Backend { + // The twiddles type is i32, and not BaseField. This is because the fast AVX mul implementation + // requries one of the numbers to be shifted left by 1 bit. This is not a reduced + // representation of the field. + type Twiddles = Vec; + + fn new_canonical_ordered( + coset: CanonicCoset, + values: Col, + ) -> CircleEvaluation { + // TODO(spapini): Optimize. + let eval = CPUBackend::new_canonical_ordered(coset, as_cpu_vec(values)); + CircleEvaluation::new( + eval.domain, + Col::::from_iter(eval.values), + ) + } + + fn interpolate_batch( + evals: Vec>, + twiddles: &TwiddleTree, + ) -> Vec> { + evals + .into_iter() + .map(|eval| { + let mut values = eval.values; + let log_size = values.length.ilog2(); + + let twiddles = domain_line_twiddles_from_tree(eval.domain, &twiddles.itwiddles); + + // Safe because [PackedBaseField] is aligned on 64 bytes. + unsafe { + ifft::ifft( + std::mem::transmute(values.data.as_mut_ptr()), + &twiddles, + log_size as usize, + ); + } + + // TODO(spapini): Fuse this multiplication / rotation. + let inv = BaseField::from_u32_unchecked(eval.domain.size() as u32).inverse(); + let inv = PackedBaseField::from_array([inv; 16]); + for x in values.data.iter_mut() { + *x *= inv; + } + + CirclePoly::new(values) + }) + .collect() + } + + fn eval_at_points( + poly: &[&CirclePoly], + points: &[Vec>], + ) -> Vec> { + zip(poly, points) + .map(|(poly, points)| { + points + .iter() + .map(|point| Self::eval_at_point(poly, *point)) + .collect() + }) + .collect() + } fn extend(poly: &CirclePoly, log_size: u32) -> CirclePoly { // TODO(spapini): Optimize or get rid of extend. @@ -219,63 +243,67 @@ impl PolyOps for AVX512Backend { .interpolate() } - fn evaluate( - poly: &CirclePoly, - domain: CircleDomain, + fn evaluate_batch( + polys: &[&CirclePoly], + domains: &[CircleDomain], twiddles: &TwiddleTree, - ) -> CircleEvaluation { - // TODO(spapini): Precompute twiddles. - // TODO(spapini): Handle small cases. - let log_size = domain.log_size() as usize; - let fft_log_size = poly.log_size() as usize; - assert!( - log_size >= fft_log_size, - "Can only evaluate on larger domains" - ); - - let twiddles = domain_line_twiddles_from_tree(domain, &twiddles.twiddles); - - // Evaluate on a big domains by evaluating on several subdomains. - let log_subdomains = log_size - fft_log_size; - - // Alllocate the destination buffer without initializing. - let mut values = Vec::with_capacity(domain.size() >> VECS_LOG_SIZE); - #[allow(clippy::uninit_vec)] - unsafe { - values.set_len(domain.size() >> VECS_LOG_SIZE) - }; - - for i in 0..(1 << log_subdomains) { - // The subdomain twiddles are a slice of the large domain twiddles. - let subdomain_twiddles = (0..(fft_log_size - 1)) - .map(|layer_i| { - &twiddles[layer_i] - [i << (fft_log_size - 2 - layer_i)..(i + 1) << (fft_log_size - 2 - layer_i)] - }) - .collect::>(); - - // FFT from the coefficients buffer to the values chunk. - unsafe { - rfft::fft( - std::mem::transmute(poly.coeffs.data.as_ptr()), - std::mem::transmute( - values[i << (fft_log_size - VECS_LOG_SIZE) - ..(i + 1) << (fft_log_size - VECS_LOG_SIZE)] - .as_mut_ptr(), - ), - &subdomain_twiddles, - fft_log_size, + ) -> Vec> { + zip(polys, domains) + .map(|(poly, domain)| { + // TODO(spapini): Precompute twiddles. + // TODO(spapini): Handle small cases. + let log_size = domain.log_size() as usize; + let fft_log_size = poly.log_size() as usize; + assert!( + log_size >= fft_log_size, + "Can only evaluate on larger domains" ); - } - } - CircleEvaluation::new( - domain, - BaseFieldVec { - data: values, - length: domain.size(), - }, - ) + let twiddles = domain_line_twiddles_from_tree(*domain, &twiddles.twiddles); + + // Evaluate on a big domains by evaluating on several subdomains. + let log_subdomains = log_size - fft_log_size; + + // Alllocate the destination buffer without initializing. + let mut values = Vec::with_capacity(domain.size() >> VECS_LOG_SIZE); + #[allow(clippy::uninit_vec)] + unsafe { + values.set_len(domain.size() >> VECS_LOG_SIZE) + }; + + for i in 0..(1 << log_subdomains) { + // The subdomain twiddles are a slice of the large domain twiddles. + let subdomain_twiddles = (0..(fft_log_size - 1)) + .map(|layer_i| { + &twiddles[layer_i][i << (fft_log_size - 2 - layer_i) + ..(i + 1) << (fft_log_size - 2 - layer_i)] + }) + .collect::>(); + + // FFT from the coefficients buffer to the values chunk. + unsafe { + rfft::fft( + std::mem::transmute(poly.coeffs.data.as_ptr()), + std::mem::transmute( + values[i << (fft_log_size - VECS_LOG_SIZE) + ..(i + 1) << (fft_log_size - VECS_LOG_SIZE)] + .as_mut_ptr(), + ), + &subdomain_twiddles, + fft_log_size, + ); + } + } + + CircleEvaluation::new( + *domain, + BaseFieldVec { + data: values, + length: domain.size(), + }, + ) + }) + .collect() } fn precompute_twiddles(coset: Coset) -> TwiddleTree { @@ -454,14 +482,14 @@ mod tests { let p = CirclePoint { x, y }; assert_eq!( - ::eval_at_point(&poly, p), + ::eval_at_points(&[&poly], &[vec![p]])[0][0], slow_eval_at_point(&poly, p), "log_size = {log_size}" ); println!( "log_size = {log_size} passed, eval{}", - ::eval_at_point(&poly, p) + ::eval_at_points(&[&poly], &[vec![p]])[0][0] ); } } diff --git a/crates/prover/src/core/backend/cpu/circle.rs b/crates/prover/src/core/backend/cpu/circle.rs index 0c1ee7fa8..02be7b1b4 100644 --- a/crates/prover/src/core/backend/cpu/circle.rs +++ b/crates/prover/src/core/backend/cpu/circle.rs @@ -1,3 +1,5 @@ +use std::iter::zip; + use num_traits::Zero; use super::CPUBackend; @@ -36,57 +38,75 @@ impl PolyOps for CPUBackend { CircleEvaluation::new(domain, new_values) } - fn interpolate( - eval: CircleEvaluation, + fn interpolate_batch( + evals: Vec>, twiddles: &TwiddleTree, - ) -> CirclePoly { - let mut values = eval.values; - - assert!(eval.domain.half_coset.is_doubling_of(twiddles.root_coset)); - let line_twiddles = domain_line_twiddles_from_tree(eval.domain, &twiddles.itwiddles); - - if eval.domain.log_size() == 1 { - let (mut val0, mut val1) = (values[0], values[1]); - ibutterfly( - &mut val0, - &mut val1, - eval.domain.half_coset.initial.y.inverse(), - ); - let inv = BaseField::from_u32_unchecked(2).inverse(); - (values[0], values[1]) = (val0 * inv, val1 * inv); - return CirclePoly::new(values); - }; - - let circle_twiddles = circle_twiddles_from_line_twiddles(line_twiddles[0]); - - for (h, t) in circle_twiddles.enumerate() { - fft_layer_loop(&mut values, 0, h, t, ibutterfly); - } - for (layer, layer_twiddles) in line_twiddles.into_iter().enumerate() { - for (h, &t) in layer_twiddles.iter().enumerate() { - fft_layer_loop(&mut values, layer + 1, h, t, ibutterfly); - } - } - - // Divide all values by 2^log_size. - let inv = BaseField::from_u32_unchecked(eval.domain.size() as u32).inverse(); - for val in &mut values { - *val *= inv; - } - - CirclePoly::new(values) + ) -> Vec> { + evals + .into_iter() + .map(|eval| { + let mut values = eval.values; + + assert!(eval.domain.half_coset.is_doubling_of(twiddles.root_coset)); + let line_twiddles = + domain_line_twiddles_from_tree(eval.domain, &twiddles.itwiddles); + + if eval.domain.log_size() == 1 { + let (mut val0, mut val1) = (values[0], values[1]); + ibutterfly( + &mut val0, + &mut val1, + eval.domain.half_coset.initial.y.inverse(), + ); + let inv = BaseField::from_u32_unchecked(2).inverse(); + (values[0], values[1]) = (val0 * inv, val1 * inv); + return CirclePoly::new(values); + }; + + let circle_twiddles = circle_twiddles_from_line_twiddles(line_twiddles[0]); + + for (h, t) in circle_twiddles.enumerate() { + fft_layer_loop(&mut values, 0, h, t, ibutterfly); + } + for (layer, layer_twiddles) in line_twiddles.into_iter().enumerate() { + for (h, &t) in layer_twiddles.iter().enumerate() { + fft_layer_loop(&mut values, layer + 1, h, t, ibutterfly); + } + } + + // Divide all values by 2^log_size. + let inv = BaseField::from_u32_unchecked(eval.domain.size() as u32).inverse(); + for val in &mut values { + *val *= inv; + } + + CirclePoly::new(values) + }) + .collect() } - fn eval_at_point(poly: &CirclePoly, point: CirclePoint) -> SecureField { - // TODO(Andrew): Allocation here expensive for small polynomials. - let mut mappings = vec![point.y, point.x]; - let mut x = point.x; - for _ in 2..poly.log_size() { - x = CirclePoint::double_x(x); - mappings.push(x); - } - mappings.reverse(); - fold(&poly.coeffs, &mappings) + fn eval_at_points( + poly: &[&CirclePoly], + points: &[Vec>], + ) -> Vec> { + zip(poly, points) + .map(|(poly, points)| { + points + .iter() + .map(|point| { + // TODO(Andrew): Allocation here expensive for small polynomials. + let mut mappings = vec![point.y, point.x]; + let mut x = point.x; + for _ in 2..poly.log_size() { + x = CirclePoint::double_x(x); + mappings.push(x); + } + mappings.reverse(); + fold(&poly.coeffs, &mappings) + }) + .collect() + }) + .collect() } fn extend(poly: &CirclePoly, log_size: u32) -> CirclePoly { @@ -97,34 +117,38 @@ impl PolyOps for CPUBackend { CirclePoly::new(coeffs) } - fn evaluate( - poly: &CirclePoly, - domain: CircleDomain, + fn evaluate_batch( + polys: &[&CirclePoly], + domains: &[CircleDomain], twiddles: &TwiddleTree, - ) -> CircleEvaluation { - let mut values = poly.extend(domain.log_size()).coeffs; - - assert!(domain.half_coset.is_doubling_of(twiddles.root_coset)); - let line_twiddles = domain_line_twiddles_from_tree(domain, &twiddles.twiddles); - - if domain.log_size() == 1 { - let (mut val0, mut val1) = (values[0], values[1]); - butterfly(&mut val0, &mut val1, domain.half_coset.initial.y.inverse()); - return CircleEvaluation::new(domain, values); - }; - - let circle_twiddles = circle_twiddles_from_line_twiddles(line_twiddles[0]); - - for (layer, layer_twiddles) in line_twiddles.iter().enumerate().rev() { - for (h, &t) in layer_twiddles.iter().enumerate() { - fft_layer_loop(&mut values, layer + 1, h, t, butterfly); - } - } - for (h, t) in circle_twiddles.enumerate() { - fft_layer_loop(&mut values, 0, h, t, butterfly); - } - - CircleEvaluation::new(domain, values) + ) -> Vec> { + zip(polys, domains) + .map(|(poly, domain)| { + let mut values = poly.extend(domain.log_size()).coeffs; + + assert!(domain.half_coset.is_doubling_of(twiddles.root_coset)); + let line_twiddles = domain_line_twiddles_from_tree(*domain, &twiddles.twiddles); + + if domain.log_size() == 1 { + let (mut val0, mut val1) = (values[0], values[1]); + butterfly(&mut val0, &mut val1, domain.half_coset.initial.y.inverse()); + return CircleEvaluation::new(*domain, values); + }; + + let circle_twiddles = circle_twiddles_from_line_twiddles(line_twiddles[0]); + + for (layer, layer_twiddles) in line_twiddles.iter().enumerate().rev() { + for (h, &t) in layer_twiddles.iter().enumerate() { + fft_layer_loop(&mut values, layer + 1, h, t, butterfly); + } + } + for (h, t) in circle_twiddles.enumerate() { + fft_layer_loop(&mut values, 0, h, t, butterfly); + } + + CircleEvaluation::new(*domain, values) + }) + .collect() } fn precompute_twiddles(mut coset: Coset) -> TwiddleTree { diff --git a/crates/prover/src/core/poly/circle/evaluation.rs b/crates/prover/src/core/poly/circle/evaluation.rs index 75e241137..b75c23a66 100644 --- a/crates/prover/src/core/poly/circle/evaluation.rs +++ b/crates/prover/src/core/poly/circle/evaluation.rs @@ -80,13 +80,15 @@ impl CircleEvaluation { /// Computes a minimal [CirclePoly] that evaluates to the same values as this evaluation. pub fn interpolate(self) -> CirclePoly { let coset = self.domain.half_coset; - B::interpolate(self, &B::precompute_twiddles(coset)) + B::interpolate_batch(vec![self], &B::precompute_twiddles(coset)) + .pop() + .unwrap() } /// Computes a minimal [CirclePoly] that evaluates to the same values as this evaluation, using /// precomputed twiddles. pub fn interpolate_with_twiddles(self, twiddles: &TwiddleTree) -> CirclePoly { - B::interpolate(self, twiddles) + B::interpolate_batch(vec![self], twiddles).pop().unwrap() } } diff --git a/crates/prover/src/core/poly/circle/ops.rs b/crates/prover/src/core/poly/circle/ops.rs index 40b86cb68..635be8bef 100644 --- a/crates/prover/src/core/poly/circle/ops.rs +++ b/crates/prover/src/core/poly/circle/ops.rs @@ -20,28 +20,32 @@ pub trait PolyOps: FieldOps + Sized { values: Col, ) -> CircleEvaluation; - /// Computes a minimal [CirclePoly] that evaluates to the same values as this evaluation. + /// Computes a minimal [CirclePoly] for each evaluation, that evaluates to the same values as + /// that evaluation. /// Used by the [`CircleEvaluation::interpolate()`] function. - fn interpolate( - eval: CircleEvaluation, + fn interpolate_batch( + evals: Vec>, itwiddles: &TwiddleTree, - ) -> CirclePoly; + ) -> Vec>; - /// Evaluates the polynomial at a single point. + /// Evaluates each polynomial at a set of points. /// Used by the [`CirclePoly::eval_at_point()`] function. - fn eval_at_point(poly: &CirclePoly, point: CirclePoint) -> SecureField; + fn eval_at_points( + poly: &[&CirclePoly], + points: &[Vec>], + ) -> Vec>; /// Extends the polynomial to a larger degree bound. /// Used by the [`CirclePoly::extend()`] function. fn extend(poly: &CirclePoly, log_size: u32) -> CirclePoly; - /// Evaluates the polynomial at all points in the domain. + /// Evaluates each polynomial at a domain. /// Used by the [`CirclePoly::evaluate()`] function. - fn evaluate( - poly: &CirclePoly, - domain: CircleDomain, + fn evaluate_batch( + poly: &[&CirclePoly], + domains: &[CircleDomain], twiddles: &TwiddleTree, - ) -> CircleEvaluation; + ) -> Vec>; /// Precomputes twiddles for a given coset. fn precompute_twiddles(coset: Coset) -> TwiddleTree; diff --git a/crates/prover/src/core/poly/circle/poly.rs b/crates/prover/src/core/poly/circle/poly.rs index 3bbbf523b..c897c802f 100644 --- a/crates/prover/src/core/poly/circle/poly.rs +++ b/crates/prover/src/core/poly/circle/poly.rs @@ -40,7 +40,7 @@ impl CirclePoly { /// Evaluates the polynomial at a single point. pub fn eval_at_point(&self, point: CirclePoint) -> SecureField { - B::eval_at_point(self, point) + B::eval_at_points(&[self], &[vec![point]])[0][0] } /// Extends the polynomial to a larger degree bound. @@ -53,7 +53,13 @@ impl CirclePoly { &self, domain: CircleDomain, ) -> CircleEvaluation { - B::evaluate(self, domain, &B::precompute_twiddles(domain.half_coset)) + B::evaluate_batch( + &[self], + &[domain], + &B::precompute_twiddles(domain.half_coset), + ) + .pop() + .unwrap() } /// Evaluates the polynomial at all points in the domain, using precomputed twiddles. @@ -62,7 +68,9 @@ impl CirclePoly { domain: CircleDomain, twiddles: &TwiddleTree, ) -> CircleEvaluation { - B::evaluate(self, domain, twiddles) + B::evaluate_batch(&[self], &[domain], twiddles) + .pop() + .unwrap() } }