From b6f41e242c8e02b71ae59fa01970eaa7735f6c79 Mon Sep 17 00:00:00 2001 From: Georg Wiese Date: Mon, 8 Jul 2024 17:20:39 +0200 Subject: [PATCH] Add `PoseidonGLMemory` machine (#1525) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements #1055 for the Poseidon machines. Pulled out of #1508. Specifically, this PR adds a new `PoseidonGLMemory` machine which receives 2 memory points and then: - Reads 24 32-Bit words and packs them into 12 field elements - Computes the Poseidon permutation (just like `PoseidonGL`) - For each of the 4 output field elements, it: - Invokes the `SplitGL` machine to get the canonical `u64` representation - Writes the 8 32-Bit words to memory at the provided memory pointer The read and write memory regions can even overlap! 🎉 This should simplify our RISC-V machine, as the syscall already expects two memory pointers. We can simply pass it to the machine directly. I started doing that in #1533, but I think it makes sense to wait until #1443 is merged. To test: ``` cargo run -r pil test_data/std/poseidon_gl_test.asm -o output -f --export-csv --prove-with estark-starky ``` I recommend reviewing the diff between `std/machines/hash/poseidon_gl.asm` and `std/machines/hash/poseidon_gl_memory.asm` ### Discussion The overhead of the memory read / write is quite high (18 extra witness columns, see [this comment](https://github.com/powdr-labs/powdr/blob/40bdca4368c3accccb753aa35ac1027ccb8def0e/std/machines/hash/poseidon_gl_memory.asm#L13-L23), mostly because we now need to have the input available in all rows (which previously was only the case for the outputs). If we had offsets other than 0 and 1, this could be avoided. Doing 24 parallel memory reads in the first row would *not* help, because we'd have to add 24 witness columns (instead of 2 now) to store the result of the memory operation. A few more notes: - With Vadcop, 18 extra witness columns in a secondary machine is *a lot* better than introducing more registers (either "regular" registers or assignment registers) in the main machine - As mentioned [here](https://github.com/powdr-labs/powdr/blob/40bdca4368c3accccb753aa35ac1027ccb8def0e/std/machines/hash/poseidon_gl_memory.asm#L111-L113), we could get rid of two permutations if either: - We were able to express explicitly that we want to call at most one operation in the current row, or - We had an optimizer that would be smart enough to batch the memory reads and writes. - We could also have just 1 read or write at a time (instead of 2), but we'd have to increase the block size from 31 to 32 and the implementation would be more complicated. - We could also store the full final state of the Poseidon permutation, instead of just the first 4 elements. This would need 8 more witness columns to make the entire output available in all rows. Then, one could use the machine to implement a Poseidon sponge, instead of. - Looking at the bootloader, maybe it makes sense to pass 3 input pointers instead of 1: One for the first 4 elements, one for the next 4, and one for the capacity (often just a constant). For example, when computing a Merkle root, you'd pass pointers for the two children hashes and a pointer to the capacity constant. --- pipeline/tests/powdr_std.rs | 7 + std/machines/hash/mod.asm | 1 + std/machines/hash/poseidon_bn254.asm | 3 - std/machines/hash/poseidon_gl.asm | 1 - std/machines/hash/poseidon_gl_memory.asm | 198 ++++++++++++++++++++++ test_data/std/poseidon_gl_memory_test.asm | 156 +++++++++++++++++ 6 files changed, 362 insertions(+), 4 deletions(-) create mode 100644 std/machines/hash/poseidon_gl_memory.asm create mode 100644 test_data/std/poseidon_gl_memory_test.asm diff --git a/pipeline/tests/powdr_std.rs b/pipeline/tests/powdr_std.rs index 96bbc47c2..a227dd015 100644 --- a/pipeline/tests/powdr_std.rs +++ b/pipeline/tests/powdr_std.rs @@ -32,6 +32,13 @@ fn poseidon_gl_test() { gen_estark_proof(f, Default::default()); } +#[test] +fn poseidon_gl_memory_test() { + let f = "std/poseidon_gl_memory_test.asm"; + verify_test_file(f, Default::default(), vec![]).unwrap(); + gen_estark_proof(f, Default::default()); +} + #[test] fn split_bn254_test() { let f = "std/split_bn254_test.asm"; diff --git a/std/machines/hash/mod.asm b/std/machines/hash/mod.asm index b8837dd7f..f173cc68a 100644 --- a/std/machines/hash/mod.asm +++ b/std/machines/hash/mod.asm @@ -1,2 +1,3 @@ mod poseidon_bn254; mod poseidon_gl; +mod poseidon_gl_memory; diff --git a/std/machines/hash/poseidon_bn254.asm b/std/machines/hash/poseidon_bn254.asm index 0f654b04f..21e83c583 100644 --- a/std/machines/hash/poseidon_bn254.asm +++ b/std/machines/hash/poseidon_bn254.asm @@ -2,9 +2,6 @@ use std::array; use std::utils::unchanged_until; // Implements the Poseidon permutation for the BN254 curve. -// Note that this relies on the trace table being non-wrapping, so it will -// only work with the Halo2 backend (which is the only backend that supports -// the BN254 curve). machine PoseidonBN254 with latch: FIRSTBLOCK, operation_id: operation_id, diff --git a/std/machines/hash/poseidon_gl.asm b/std/machines/hash/poseidon_gl.asm index 89983d3a1..be7b8bc57 100644 --- a/std/machines/hash/poseidon_gl.asm +++ b/std/machines/hash/poseidon_gl.asm @@ -32,7 +32,6 @@ machine PoseidonGL with let PARTIAL_ROUNDS: int = 22; let ROWS_PER_HASH = FULL_ROUNDS + PARTIAL_ROUNDS + 1; - pol constant L0 = [1] + [0]*; pol constant FIRSTBLOCK(i) { if i % ROWS_PER_HASH == 0 { 1 } else { 0 } }; pol constant LASTBLOCK(i) { if i % ROWS_PER_HASH == ROWS_PER_HASH - 1 { 1 } else { 0 } }; // Like LASTBLOCK, but also 1 in the last row of the table diff --git a/std/machines/hash/poseidon_gl_memory.asm b/std/machines/hash/poseidon_gl_memory.asm new file mode 100644 index 000000000..14a3ec490 --- /dev/null +++ b/std/machines/hash/poseidon_gl_memory.asm @@ -0,0 +1,198 @@ +use std::array; +use std::check::assert; +use std::utils::unchanged_until; +use std::utils::force_bool; +use std::utils::sum; +use std::convert::expr; +use std::machines::memory::Memory; +use std::machines::split::split_gl::SplitGL; + +// Implements the Poseidon permutation for the Goldilocks field. +// This version of the Poseidon machine receives memory pointers and interacts +// with memory directly to fetch its inputs and write its outputs. +// In comparison to std::machines::hash::poseidon_gl::PoseidonGL, this machine has: +// - 18 extra witness columns: +// - 12 to make the input state available in all rows +// - 3 to make the time step, input address, and output address available in all rows +// - 2 to store the low and high words of the memory read +// - 1 to store whether a memory read should be done (could be removed if we use an intermediate polynomial, see below) +// - 16 extra fixed columns to store a one-hot encoding of the row number (for the first 12 + 4 rows) +// - 5 extra permutations: +// - 2 to read the low and high words from the memory +// - 2 to write the low and high words to the memory +// - 1 to split the current output into low and high words +machine PoseidonGLMemory(mem: Memory, split_gl: SplitGL) with + latch: CLK_0, + operation_id: operation_id, + // Allow this machine to be connected via a permutation + call_selectors: sel, +{ + + // Hashes 8 "rate" elements and 4 "capacity" elements to 4 field elements + // by applying the Poseidon permutation and returning the first 4 rate elements. + // When the hash function is used only once, the capacity elements should be + // set to constants, where different constants can be used to define different + // hash functions. + // The input data is passed via a memory pointer: The machine will read 24 + // 32-Bit machine words, interpreted as 12 field elements stored in little-endian + // format. + // Similarly, the output data is written to memory at the provided pointer as + // 8 32-Bit machine words representing 4 field elements in little-endian format + // (in canonical form). + // Reads happen at the provided time step; writes happen at the next time step. + operation poseidon_permutation<0> input_addr, output_addr, time_step ->; + + col witness operation_id; + + // Ported from: + // - https://github.com/0xPolygonHermez/zkevm-proverjs/blob/main/pil/poseidong.pil + // - https://github.com/0xPolygonHermez/zkevm-proverjs/blob/main/src/sm/sm_poseidong.js + + // Number of field elements in the state + let STATE_SIZE: int = 12; + // Number of output elements + let OUTPUT_SIZE: int = 4; + // Number of full rounds + let FULL_ROUNDS: int = 8; + // Number of partial rounds (half of them before and half of them after the full rounds) + let PARTIAL_ROUNDS: int = 22; + let ROWS_PER_HASH = FULL_ROUNDS + PARTIAL_ROUNDS + 1; + + + // ------------- Begin memory read / write --------------- + + // Get an intermediate column that indicates that we're in an + // actual block, not a default block. Its value is constant + // within the block. + let used = array::sum(sel); + array::map(sel, |s| unchanged_until(s, LAST)); + std::utils::force_bool(used); + + // Repeat the input state in the whole block + col witness input[STATE_SIZE]; + array::map(input, |c| unchanged_until(c, LAST)); + array::zip(input, state, |i, s| CLK[0] * (i - s) = 0); + + // Repeat the time step and input / output address in the whole block + col witness time_step; + col witness input_addr; + col witness output_addr; + unchanged_until(time_step, LAST); + unchanged_until(input_addr, LAST); + unchanged_until(output_addr, LAST); + + // One-hot encoding of the row number (for the first rows) + assert(STATE_SIZE + OUTPUT_SIZE < ROWS_PER_HASH, || "Not enough rows to do memory read / write"); + let CLK: col[STATE_SIZE + OUTPUT_SIZE] = array::new(STATE_SIZE + OUTPUT_SIZE, |i| |row| if row % ROWS_PER_HASH == i { 1 } else { 0 }); + let CLK_0 = CLK[0]; + + col witness word_low, word_high; + + // Do *two* memory reads in each of the first STATE_SIZE rows + // For input i, we expect the low word at address input_addr + 8 * i and + // the high word at address input_addr + 8 * i + 4 + // TODO: This could be an intermediate polynomial, but for some reason estark-starky + // fails then, so we keep it as a witness for now + let do_mload; + do_mload = used * sum(STATE_SIZE, |i| CLK[i]); + let input_index = sum(STATE_SIZE, |i| expr(i) * CLK[i]); + link if do_mload ~> word_low = mem.mload(input_addr + 8 * input_index, time_step); + link if do_mload ~> word_high = mem.mload(input_addr + 8 * input_index + 4, time_step); + + // Combine the low and high words and write it into `input` + let current_input = array::sum(array::new(STATE_SIZE, |i| CLK[i] * input[i])); + do_mload * (word_low + word_high * 2**32 - current_input) = 0; + + // Do *two* memory writes in each of the next OUTPUT_SIZE rows + // For output i, we write the low word at address output_addr + 8 * i and + // the high word at address output_addr + 8 * i + 4 + let do_mstore = used * sum(OUTPUT_SIZE, |i| CLK[i + STATE_SIZE]); + let output_index = sum(OUTPUT_SIZE, |i| expr(i) * CLK[i + STATE_SIZE]); + // TODO: This translates to two additional permutations. But because they go to the same machine + // as the mloads above *and* never happen at the same time, they could actually be combined with + // the mload permutations. But there is currently no way to express this. + link if do_mstore ~> mem.mstore(output_addr + 8 * output_index, time_step + 1, word_low); + link if do_mstore ~> mem.mstore(output_addr + 8 * output_index + 4, time_step + 1, word_high); + + // Make sure that in row i + STATE_SIZE, word_low and word_high correspond to output i + let current_output = array::sum(array::new(OUTPUT_SIZE, |i| CLK[i + STATE_SIZE] * output[i])); + link if do_mstore ~> (word_low, word_high) = split_gl.split(current_output); + + + // ------------- End memory read / write --------------- + + pol constant LASTBLOCK(i) { if i % ROWS_PER_HASH == ROWS_PER_HASH - 1 { 1 } else { 0 } }; + // Like LASTBLOCK, but also 1 in the last row of the table + // Specified this way because we can't access the degree in the match statement + pol constant LAST = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]* + [1]; + + // Whether the current round is a partial round + pol constant PARTIAL = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]*; + + // The round constants + pol constant C_0 = [0xb585f766f2144405, 0x86287821f722c881, 0xe9fa634a21de0082, 0x92a756e67e2b9413, 0x3cc3f892184df408, 0x7131aa45268d7d8c, 0x99ad1aab0814283b, 0xeb84f608da56ef48, 0x7159cd30c3ac118e, 0xdcef0797c2b69ec7, 0xd0762cbc8ca6570c, 0x30a4680593258387, 0x15a16a8a8322d458, 0x5a3f1bb1c53a9645, 0x775005982d74d7f7, 0xf9cc95c22b4c1fcc, 0xc49366bb25e8513, 0xdd611f1000c17442, 0x2ff876fa5ef97c4, 0x3d06c8bd1514e2d9, 0xe89cd854d5d01d33, 0xece5a71e0cfedc75, 0x90004c1371b893c5, 0xde122bebe9a39368, 0x4d61e56a525d225a, 0x1478d361dbbf9fac, 0x475cd3205a3bdcde, 0xe70201e960cb78b8, 0x7be5b9ffda905e1c, 0xf3c12fe54d5c653b, 0x0]*; + pol constant C_1 = [0x7746a55f43921ad7, 0x59cd1a8a41c18e55, 0xf56f6d48959a600d, 0x70e741ebfee96586, 0x2e479dc157bf31bb, 0x9351036095630f9f, 0x438a7c91d416ca4d, 0xda608834c40e603d, 0x839b4e8fafead540, 0x3d639263da827b13, 0x34c6efb812b04bf5, 0x337dc00c61bd9ce1, 0x388a128b7fd9a609, 0xdb7f023869fb8d38, 0x86ab99b4dde6c8b0, 0x8d37f755f4ae9f6, 0x784d3d2f1698309, 0xd8185f9adfea4fd0, 0xc5cb72a2a51159b0, 0x9c9c98207cb10767, 0x5cd377dc8bb882a2, 0x5ff98fd5d51fe610, 0xb932b7cf752e5545, 0x4d001fd58f002526, 0x262e963c8da05d3d, 0x6f02dc07d141875c, 0x18a42105c31b7e88, 0x6f90ff3b6a65f108, 0xa3c95eaec244aa30, 0x40b9e922ed9771e2, 0x0]*; + pol constant C_2 = [0xb2fb0d31cee799b4, 0xc3b919ad495dc574, 0xf7d713e806391165, 0x19d5ee2af82ec1c, 0x6f49de07a6234346, 0xad535b24afc26bfb, 0xb60de3bcc5ea751c, 0x8f97fe408061f183, 0xd3f3e5e82920adc, 0xe273fd971bc8d0e7, 0x40bf0ab5fa14c112, 0xd5eca244c7a4ff1d, 0x2300e5d6baedf0fb, 0xb462065911d4e1fc, 0xb1204f603f51c080, 0xeec49b613478675b, 0x530fb67ea1809a81, 0xef87139ca9a3ab1e, 0x8470f39d2d5c900e, 0x65700b51aedfb5ef, 0xa7b0fb7883eee860, 0x83e8941918964615, 0xa0b1df81b6fe59fc, 0xca6637000eb4a9f8, 0x59e89b094d220ec2, 0x296a202ed8e556a2, 0x23e7414af663068, 0x42747a7245e7fa84, 0x230bca8f4df0544, 0x551f5b0fbe7b1840, 0x0]*; + pol constant C_3 = [0xf6760a4803427d7, 0xa484c4c5ef6a0781, 0x8297132b32825daf, 0x6f6f2ed772466352, 0x213ce7bede378d7b, 0x4627f5c6993e44be, 0xc99cab6aef6f58bc, 0xa93f485c96f37b89, 0x8f7d83bddee7bba8, 0x418f02702d227ed5, 0xb6b570fc7c5740d3, 0x7762638264d279bd, 0x2f63aa8647e15104, 0x49c24ae4437d8030, 0xef61ac8470250ecf, 0xf143933aed25e0b0, 0x410492299bb01f49, 0x3ba71336c34ee133, 0x25abb3f1d39fcb76, 0x911f451539869408, 0x7684403ec392950d, 0x5922040b47f150c1, 0x8ef1dd26770af2c2, 0x2f2339d624f91f78, 0x55d5b52b78b9c5e, 0x2afd67999bf32ee5, 0x15147108121967d7, 0xd1f507e43ab749b2, 0x4135c2bebfe148c6, 0x25032aa7c4cb1811, 0x0]*; + pol constant C_4 = [0xe10d666650f4e012, 0x308bbd23dc5416cc, 0xad6805e0e30b2c8a, 0x7cf416cfe7e14ca1, 0x5b0431345d4dea83, 0x645cf794b8f1cc58, 0x69a5ed92a72ee4ff, 0x6704e8ee8f18d563, 0x780f2243ea071d06, 0x8c25fda3b503038c, 0x5a27b9002de33454, 0xc1e434bedeefd767, 0xf1c36ce86ecec269, 0xd793862c112b0566, 0x1bbcd90f132c603f, 0xe4c5dd8255dfc622, 0x139542347424b9ac, 0x7d3a455d56b70238, 0x23eb8cc9b372442f, 0x7ae6849fbc3a0ec6, 0x5fa3f06f4fed3b52, 0xf97d750e3dd94521, 0x541a4f9cfbeed35, 0x6d1a7918c80df518, 0x82b27eb33514ef99, 0x7acfd96efa95491d, 0xe4a3dff1d7d6fef9, 0x1c86d265f15750cd, 0x166fc0cc438a3c72, 0xaaed34074b164346, 0x0]*; + pol constant C_5 = [0x8cae14cb07d09bf1, 0x6e4a40c18f30c09c, 0xac51d9f5fcf8535e, 0x61df517b86a46439, 0xa2de45780344d6a1, 0x241c70ed0af61617, 0x5e7b329c1ed4ad71, 0xcee3e9ac1e072119, 0xeb915845f3de1634, 0x2cbaed4daec8c07c, 0xb1a5b165b6d2b2d2, 0x299351a53b8ec22, 0x27181125183970c9, 0xaadd1106730d8feb, 0xcd1dabd964db557, 0xe7ad7756f193198e, 0x9cb0bd5ea1a1115e, 0x660d32e130182684, 0xd687ba55c64f6364, 0x3bb340eba06afe7e, 0x8df57ac11bc04831, 0x5080d4c2b86f56d7, 0x9e61106178bfc530, 0xdf9a4939342308e9, 0xd30094ca96b7ce7b, 0x6798ba0c0abb2c6d, 0x1a8d1a588085737, 0x3996ce73dd832c1c, 0x3762b59a8ae83efa, 0x8ffd96bbf9c9c81d, 0x0]*; + pol constant C_6 = [0xd438539c95f63e9f, 0x9a2eedb70d8f8cfa, 0x502ad7dc18c2ad87, 0x85dc499b11d77b75, 0x7103aaf94a7bf308, 0xacb8e076647905f1, 0x5fc0ac0800144885, 0x510d0e65e2b470c1, 0xd19e120d26b6f386, 0x5f58e6afcdd6ddc2, 0x8722e0ace9d1be22, 0xb2d456e4ad251b80, 0xe584029370dca96d, 0xc43b6e0e97b0d568, 0x11a3ae5beb9d1ec9, 0x92c2318b87fff9cb, 0x2e3f615c38f49a1, 0x297a863f48cd1f43, 0xda8d9e90fd8ff158, 0xb46e9d8b682ea65e, 0x2db01efa1e1e1897, 0xa7de115b56c78d70, 0xb3767e80935d8af2, 0xebc2151ee6c8398c, 0xcf5cb381cd0a1535, 0x34c6f57b26c92122, 0x11b4c74eda62beef, 0x8e7fba02983224bd, 0xe8928a4c89114750, 0x70fc91eb5937085c, 0x0]*; + pol constant C_7 = [0xef781c7ce35b4c3d, 0xe360c6e0ae486f38, 0x57a1550c110b3041, 0x4b959b48b9c10733, 0x5326fc0d97279301, 0x3737e9db4c4f474d, 0x32db829239774eca, 0xf6323f486b9038f0, 0x16ee53a7e5fecc6, 0x284650ac5e1b0eba, 0x788ee3b37e5680fb, 0x3e9ed1fda49cea0b, 0x4d9bbc3e02f1cfb2, 0xe29024c18ee6fca2, 0xf755bfeea585d11d, 0x739c25f8fd73596d, 0x985d4f4a9c5291ef, 0x90e0a736a751ebb7, 0xe3cbdc7d2fe45ea7, 0x8dcf22f9a3b34356, 0x54846de4aadb9ca2, 0x6a9242ac87538194, 0x98d5782065af06, 0x3cc2ba8a1116515, 0xfeed4db6919e5a7c, 0x5736e1bad206b5de, 0xe587cc0d69a73346, 0xba0dec7103255dd4, 0x2a440b51a4945ee5, 0x7f795e2a5f915440, 0x0]*; + pol constant C_8 = [0xcdc4a239b0c44426, 0xd5c7718fbfc647fb, 0x66bbd30e6ce0e583, 0xe8be3e5da8043e57, 0xa9ceb74fec024747, 0xe7ea5e33e75fffb6, 0xade699c5830f310, 0xb508cdeffa5ceef, 0xcb5fd54e7933e477, 0x635b337ee819dab5, 0x14a726661551e284, 0x2972a92ba450bed8, 0xea35bc29692af6f8, 0x5e50c27535b88c66, 0xa3b83250268ea4d7, 0x5636cac9f16dfed0, 0x775b9feafdcd26e7, 0x549f80ce550c4fd3, 0xb9a8c9b3aee52297, 0x77bdaeda586257a7, 0xba6745385893c784, 0xf7856ef7f9173e44, 0x31d191cd5c1466c7, 0xd341d037e840cf83, 0x41703f53753be59f, 0x20057d2a0056521b, 0x1ff7327017aa2a6e, 0x9e9cbd781628fc5b, 0x80cefd2b7d99ff83, 0x4543d9df5476d3cb, 0x0]*; + pol constant C_9 = [0x277fa208bf337bff, 0xc35eae071903ff0b, 0xda2abef589d644e, 0xf5c0bc1de6da8699, 0x27f8ec88bb21b1a3, 0x90dee49fc9bfc23a, 0x7cc5583b10415f21, 0xf2417089e4fb3cbd, 0xacb8417879fd449f, 0x9f9a036ed4f2d49f, 0x98b7672f9ef3b419, 0x20216dd77be493de, 0x18e21b4beabb4137, 0x10383f20a4ff9a87, 0x516306f4927c93af, 0xdd8f909a938e0172, 0x304265a6384f0f2d, 0xf73b2922f38bd64, 0xc0d28a5c10960bd3, 0xf19e400a5104d20d, 0x541d496344d2c75b, 0x2265fc92feb0dc09, 0x410fefafa319ac9d, 0x387cb5d25af4afcc, 0x5eeea940fcde8b6f, 0x3dea5bd5d0578bd7, 0x594e29c42473d06b, 0xdae8645996edd6a5, 0xbb9879c6e61fd62a, 0xf172d73e004fc90d, 0x0]*; + pol constant C_10 = [0xe17653a29da578a1, 0x849c2656969c4be7, 0xf061274fdb150d61, 0x40b12cbf09ef74bf, 0xfceb4fda1ded0893, 0xd1b1edf76bc09c92, 0x85df9ed2e166d64f, 0x60e75c2890d15730, 0x9c22190be7f74732, 0xb93e260cae5c170e, 0xbb93ae776bb30e3a, 0xadffe8cf28449ec6, 0x1e3b9fc625b554f4, 0x38e8ee9d71a45af8, 0xddb4ac49c9efa1da, 0xc6401fe115063f5b, 0x593664c39773012c, 0x16bf1f73fb7a9c3f, 0x45d7ac9b68f71a34, 0xc368a348e46d950f, 0xe909678474e687fe, 0x17dfc8e4f7ba8a57, 0xbdf8f242e316c4ab, 0xbba2515f22909e87, 0x4cd1f1b175100206, 0x16e50d897d4634ac, 0xf6f31db1899b12d5, 0xdebe0853b1a1d378, 0x6e7c8f1a84265034, 0xdfd1c4febcc81238, 0x0]*; + pol constant C_11 = [0xc54302f225db2c76, 0xc0572c8c08cbbbad, 0x28b8ec3ae9c29633, 0xa637093ecb2ad631, 0xfac6ff1346a41675, 0xb65481ba645c602, 0x6604df4fee32bcb1, 0xa6217d8bf660f29c, 0x5d693c1ba3ba3621, 0xb0a7eae879ddb76d, 0x28fd3b046380f850, 0x1c4dbb1c4c27d243, 0x25d64362697828fd, 0xdd5118375bf1a9b9, 0x64bb6dec369d4418, 0x8ad97b33f1ac1455, 0x4f0a2e5fb028f2ce, 0x6d1f5a59005bec17, 0xeeb76e397069e804, 0x9ef1cd60e679f284, 0xdfe89923f6c9c2ff, 0x9001a64209f21db8, 0x9e8cd55b57637ed0, 0x7248fe7705f38e47, 0x4a20358574454ec0, 0x29bff3ecb9b7a6e3, 0xc02ac5e47312d3ca, 0xa49229d24d014343, 0x164bb2de1bbeddc8, 0xbc8dfb627fe558fc, 0x0]*; + let C = [C_0, C_1, C_2, C_3, C_4, C_5, C_6, C_7, C_8, C_9, C_10, C_11]; + + // State of the Poseidon permutation (8 rate elements and 4 capacity elements) + pol commit state[STATE_SIZE]; + + // The first OUTPUT_SIZE elements of the *final* state + // (constrained to be constant within the block and equal to parts of the state in the last row) + pol commit output[OUTPUT_SIZE]; + + // Add round constants + let a: expr[STATE_SIZE] = array::zip(state, C, |state, C| state + C); + + // Compute S-Boxes (x^7) + let x2: expr[STATE_SIZE] = array::map(a, |a| a * a); + let x4: expr[STATE_SIZE] = array::map(x2, |x2| x2 * x2); + let x6: expr[STATE_SIZE] = array::zip(x4, x2, |x4, x2| x4 * x2); + let x7: expr[STATE_SIZE] = array::zip(x6, a, |x6, a| x6 * a); + + // Apply S-Boxes on the first element and otherwise if it is a full round. + let b: expr[STATE_SIZE] = array::new(STATE_SIZE, |i| if i == 0 { + x7[i] + } else { + PARTIAL * (a[i] - x7[i]) + x7[i] + }); + + // The MDS matrix + let M = [ + [25, 15, 41, 16, 2, 28, 13, 13, 39, 18, 34, 20], + [20, 17, 15, 41, 16, 2, 28, 13, 13, 39, 18, 34], + [34, 20, 17, 15, 41, 16, 2, 28, 13, 13, 39, 18], + [18, 34, 20, 17, 15, 41, 16, 2, 28, 13, 13, 39], + [39, 18, 34, 20, 17, 15, 41, 16, 2, 28, 13, 13], + [13, 39, 18, 34, 20, 17, 15, 41, 16, 2, 28, 13], + [13, 13, 39, 18, 34, 20, 17, 15, 41, 16, 2, 28], + [28, 13, 13, 39, 18, 34, 20, 17, 15, 41, 16, 2], + [ 2, 28, 13, 13, 39, 18, 34, 20, 17, 15, 41, 16], + [16, 2, 28, 13, 13, 39, 18, 34, 20, 17, 15, 41], + [41, 16, 2, 28, 13, 13, 39, 18, 34, 20, 17, 15], + [15, 41, 16, 2, 28, 13, 13, 39, 18, 34, 20, 17] + ]; + + // Multiply with MDS Matrix + let dot_product = |v1, v2| array::sum(array::zip(v1, v2, |v1_i, v2_i| v1_i * v2_i)); + let c: expr[STATE_SIZE] = array::map(M, |M_row_i| dot_product(M_row_i, b)); + + // Copy c to state in the next row + array::zip(state, c, |state, c| (state' - c) * (1-LAST) = 0); + + // In the last row, the first OUTPUT_SIZE elements of the state should equal output + array::zip(output, state, |output, state| LASTBLOCK * (output - state) = 0); + + // The output should stay constant in the block + array::map(output, |c| unchanged_until(c, LAST)); +} diff --git a/test_data/std/poseidon_gl_memory_test.asm b/test_data/std/poseidon_gl_memory_test.asm new file mode 100644 index 000000000..0166ee70c --- /dev/null +++ b/test_data/std/poseidon_gl_memory_test.asm @@ -0,0 +1,156 @@ +use std::machines::hash::poseidon_gl_memory::PoseidonGLMemory; +use std::machines::memory::Memory; +use std::machines::split::split_gl::SplitGL; + +machine Main with degree: 65536 { + reg pc[@pc]; + reg X1[<=]; + reg X2[<=]; + reg X3[<=]; + reg X4[<=]; + reg X5[<=]; + reg X6[<=]; + reg X7[<=]; + reg X8[<=]; + reg X9[<=]; + reg X10[<=]; + reg X11[<=]; + reg X12[<=]; + reg ADDR1[<=]; + reg ADDR2[<=]; + + SplitGL split; + + // Increase the time step by 2 in each row, so that the poseidon machine + // can read in the given time step and write in the next time step. + col fixed STEP(i) { 2 * i }; + Memory memory; + instr mstore_le ADDR1, X1, X2 -> + link ~> memory.mstore(ADDR1, STEP, X2) + link ~> memory.mstore(ADDR1 + 4, STEP, X1); + + PoseidonGLMemory poseidon(memory, split); + instr poseidon ADDR1, ADDR2 -> link ~> poseidon.poseidon_permutation(ADDR1, ADDR2, STEP); + + col witness val_low, val_high; + instr assert_eq ADDR1, X1 -> + link ~> val_low = memory.mload(ADDR1, STEP) + link ~> val_high = memory.mload(ADDR1 + 4, STEP) + { + val_low + 2**32 * val_high = X1 + } + + function main { + + // See test vectors at: + // https://github.com/0xPolygonHermez/zkevm-testvectors/blob/main/poseidon/raw-hash.json + + mstore_le 0, 0, 0; + mstore_le 8, 0, 0; + mstore_le 16, 0, 0; + mstore_le 24, 0, 0; + mstore_le 32, 0, 0; + mstore_le 40, 0, 0; + mstore_le 48, 0, 0; + mstore_le 56, 0, 0; + mstore_le 64, 0, 0; + mstore_le 72, 0, 0; + mstore_le 80, 0, 0; + mstore_le 88, 0, 0; + + poseidon 0, 0; + + assert_eq 0, 4330397376401421145; + assert_eq 8, 14124799381142128323; + assert_eq 16, 8742572140681234676; + assert_eq 24, 14345658006221440202; + + mstore_le 0, 0, 1; + mstore_le 8, 0, 1; + mstore_le 16, 0, 1; + mstore_le 24, 0, 1; + mstore_le 32, 0, 1; + mstore_le 40, 0, 1; + mstore_le 48, 0, 1; + mstore_le 56, 0, 1; + mstore_le 64, 0, 1; + mstore_le 72, 0, 1; + mstore_le 80, 0, 1; + mstore_le 88, 0, 1; + + poseidon 0, 0; + + assert_eq 0, 16428316519797902711; + assert_eq 8, 13351830238340666928; + assert_eq 16, 682362844289978626; + assert_eq 24, 12150588177266359240; + + // Pass test vector (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1) + mstore_le 0, 0xffffffff, 0; + mstore_le 8, 0xffffffff, 0; + mstore_le 16, 0xffffffff, 0; + mstore_le 24, 0xffffffff, 0; + mstore_le 32, 0xffffffff, 0; + mstore_le 40, 0xffffffff, 0; + mstore_le 48, 0xffffffff, 0; + mstore_le 56, 0xffffffff, 0; + mstore_le 64, 0xffffffff, 0; + mstore_le 72, 0xffffffff, 0; + mstore_le 80, 0xffffffff, 0; + mstore_le 88, 0xffffffff, 0; + + poseidon 0, 0; + + assert_eq 0, 13691089994624172887; + assert_eq 8, 15662102337790434313; + assert_eq 16, 14940024623104903507; + assert_eq 24, 10772674582659927682; + + // Pass test vector (923978, 235763497586, 9827635653498, 112870, 289273673480943876, 230295874986745876, 6254867324987, 2087, 0, 0, 0, 0) + // = (0xe194a, 0x36_e4997a72, 0x8f0_2cbb6b7a, 0x1b8e6, 0x403b4df_96b43904, 0x3322ce0_cdd2a014, 0x5b0_5325203b, 0x827, 0x0, 0x0, 0x0, 0x0) + mstore_le 0, 0, 0xe194a; + mstore_le 8, 0x36, 0xe4997a72; + mstore_le 16, 0x8f0, 0x2cbb6b7a; + mstore_le 24, 0, 0x1b8e6; + mstore_le 32, 0x403b4df, 0x96b43904; + mstore_le 40, 0x3322ce0, 0xcdd2a014; + mstore_le 48, 0x5b0, 0x5325203b; + mstore_le 56, 0, 0x827; + mstore_le 64, 0, 0; + mstore_le 72, 0, 0; + mstore_le 80, 0, 0; + mstore_le 88, 0, 0; + + poseidon 0, 0; + + assert_eq 0, 1892171027578617759; + assert_eq 8, 984732815927439256; + assert_eq 16, 7866041765487844082; + assert_eq 24, 8161503938059336191; + + // Repeat the first test, but be fancy with the memory pointers being passed: + + mstore_le 100, 0, 0; + mstore_le 108, 0, 0; + mstore_le 116, 0, 0; + mstore_le 124, 0, 0; + mstore_le 132, 0, 0; + mstore_le 140, 0, 0; + mstore_le 148, 0, 0; + mstore_le 156, 0, 0; + mstore_le 164, 0, 0; + mstore_le 172, 0, 0; + mstore_le 180, 0, 0; + mstore_le 188, 0, 0; + + // This will read bytes [100, 191] and write the result to bytes [104, 131] + poseidon 100, 104; + + assert_eq 104, 4330397376401421145; + assert_eq 112, 14124799381142128323; + assert_eq 120, 8742572140681234676; + assert_eq 128, 14345658006221440202; + + return; + } +}