From c0e7d3b19a8d19f13a7d254d8af97d4fd6157c3b Mon Sep 17 00:00:00 2001 From: mohanson Date: Sun, 14 Sep 2025 16:35:18 +0800 Subject: [PATCH] Implement simd --- README.md | 1 + example/blake2b_simd.py | 23 + example/blake2b_simd/.cargo/config.toml | 5 + example/blake2b_simd/.gitignore | 1 + example/blake2b_simd/Cargo.lock | 16 + example/blake2b_simd/Cargo.toml | 10 + example/blake2b_simd/bin/blake2b_simd.wasm | Bin 0 -> 31645 bytes example/blake2b_simd/bin/blake2b_simd.wat | 11570 +++++++++++++++++++ example/blake2b_simd/src/lib.rs | 33 + pywasm/__init__.py | 1 + pywasm/arith.py | 222 + pywasm/core.py | 1807 ++- pywasm/opcode.py | 472 + script/build_spec.py | 3 + test/spec.py | 59 +- 15 files changed, 14050 insertions(+), 173 deletions(-) create mode 100644 example/blake2b_simd.py create mode 100644 example/blake2b_simd/.cargo/config.toml create mode 100644 example/blake2b_simd/.gitignore create mode 100644 example/blake2b_simd/Cargo.lock create mode 100644 example/blake2b_simd/Cargo.toml create mode 100755 example/blake2b_simd/bin/blake2b_simd.wasm create mode 100644 example/blake2b_simd/bin/blake2b_simd.wat create mode 100644 example/blake2b_simd/src/lib.rs create mode 100644 pywasm/arith.py diff --git a/README.md b/README.md index 97d6dd90..908327f4 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ A brief description for `example` | example/blake2b.py | Blake2b hashing algorithm | | example/blake2b_direct.py | Make the hash result returned as a value, not as an output parameter | | example/blake2b_iter.py | A benchmarking example using the blake2b hash function | +| example/blake2b_simd.py | Use SIMD instructions to accelerate the blake2b hash function | | example/fibonacci.py | Fibonacci, which contains loop and recursion | | example/fibonacci_env.py | Call python/native function in wasm | | example/pi.py | Calculate π using the agm algorithm | diff --git a/example/blake2b_simd.py b/example/blake2b_simd.py new file mode 100644 index 00000000..059e5490 --- /dev/null +++ b/example/blake2b_simd.py @@ -0,0 +1,23 @@ +import pywasm + +runtime = pywasm.core.Runtime() +module = runtime.instance_from_file('example/blake2b_simd/bin/blake2b_simd.wasm') +memory = runtime.exported_memory(module, 'memory') + +data = bytearray(b'abc') +data_size = len(data) +data_ptr = runtime.invocate(module, 'alloc', [data_size])[0] +hash_size = 64 +hash_ptr = runtime.invocate(module, 'alloc', [hash_size])[0] +memory.put(data_ptr, data) +runtime.invocate(module, 'blake2b', [data_ptr, data_size, hash_ptr, hash_size]) +hash = memory.get(hash_ptr, hash_size) +print(hash.hex()) +assert hash == bytearray([ + 0xba, 0x80, 0xa5, 0x3f, 0x98, 0x1c, 0x4d, 0x0d, 0x6a, 0x27, 0x97, 0xb6, 0x9f, 0x12, 0xf6, 0xe9, + 0x4c, 0x21, 0x2f, 0x14, 0x68, 0x5a, 0xc4, 0xb7, 0x4b, 0x12, 0xbb, 0x6f, 0xdb, 0xff, 0xa2, 0xd1, + 0x7d, 0x87, 0xc5, 0x39, 0x2a, 0xab, 0x79, 0x2d, 0xc2, 0x52, 0xd5, 0xde, 0x45, 0x33, 0xcc, 0x95, + 0x18, 0xd3, 0x8a, 0xa8, 0xdb, 0xf1, 0x92, 0x5a, 0xb9, 0x23, 0x86, 0xed, 0xd4, 0x00, 0x99, 0x23, +]) +runtime.invocate(module, 'dealloc', [data_ptr, data_size]) +runtime.invocate(module, 'dealloc', [hash_ptr, hash_size]) diff --git a/example/blake2b_simd/.cargo/config.toml b/example/blake2b_simd/.cargo/config.toml new file mode 100644 index 00000000..5bd38ca9 --- /dev/null +++ b/example/blake2b_simd/.cargo/config.toml @@ -0,0 +1,5 @@ +[build] +target = "wasm32-unknown-unknown" + +[target.wasm32-unknown-unknown] +rustflags = ["-C", "target-feature=+simd128"] diff --git a/example/blake2b_simd/.gitignore b/example/blake2b_simd/.gitignore new file mode 100644 index 00000000..ea8c4bf7 --- /dev/null +++ b/example/blake2b_simd/.gitignore @@ -0,0 +1 @@ +/target diff --git a/example/blake2b_simd/Cargo.lock b/example/blake2b_simd/Cargo.lock new file mode 100644 index 00000000..3d1edd22 --- /dev/null +++ b/example/blake2b_simd/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "blake2b_simd" +version = "1.0.0" +dependencies = [ + "blake2ya", +] + +[[package]] +name = "blake2ya" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e6144a361a49cc2a282f75205a8e8658dac95e99020acdd7c9831fc4baf01" diff --git a/example/blake2b_simd/Cargo.toml b/example/blake2b_simd/Cargo.toml new file mode 100644 index 00000000..14aa9bf4 --- /dev/null +++ b/example/blake2b_simd/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "blake2b_simd" +version = "1.0.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +blake2ya = "1.0" diff --git a/example/blake2b_simd/bin/blake2b_simd.wasm b/example/blake2b_simd/bin/blake2b_simd.wasm new file mode 100755 index 0000000000000000000000000000000000000000..3a10205985d8d6bc50f4e0b20844a159104f3fb7 GIT binary patch literal 31645 zcmc(Id6Zn&S?617>#lC8EK9a*ZMUA=c1ta_YG0fX^g1aS#W67?`F@)Wa1O&&!I1nJ5@gEMu%<&llWCjQZ%p3)5G{4_> z@2h&H?p7p`laXEh-n+|pzy15}cVBf|_2rYcWm)!jjXdmJxM1;zec@sEf?RCgM{ti0 zHd*=sjj;_-4eUQyl{&*f8pJumjAJ}OX!)v zw5_xLhvnfF{!INQr&vkfI%IwA`_XVGX4t8VY)x~O)T{=jZf1!uUt@_UH(KJKYzJ!a zeTI>9B`fs<#M(LHui3RxV)D8>tNYgMH;2JTJUnT$2_2~) z;$YJPUZy@AdUlkX`a$#jdc-g}`P~Cjbb3;>2c_urrT9je#flQMQ%?w7rzgAQtpLlu z)W_MAc9gRDUeEmMA^YC*Y}oG} z@nXu_?ZF=+xi*)f<;T`P^I2eG)6~R(HsXdse-@Dk>}P6T}q3W&jJ9l$RFh%8Rag9-)tW zJ6Baic^9Yt;vb&)qu=bm++Rn$g0|_wyF^swpER;dgE$~5Iu^OR=q78cGeqNRa?~Eg5ef*8#yy7 zfG~b9$Ae>c_u9d+{j1&q&V0xNm;-85ZC_QMx=R3WH^h7Z0z~6hfx1=gQ@g>JRd1i# zE}((#UF-zt2^k^)ANIDYu~oHO?GacQzlZCb@UXxhwL=E(P~&XgEnqNWjO`**up9r6 ztNqdiBoC?s(sh7Sq@BnsaCt9L79ePmWw!D8UZrHhURi06no#?I@~Wr$%iX)`O(^Ky zgc?)Omwgl$U|^7^z(>#U(Zdq=fPKUdQiQ-T0)i6o_zAVYN8P2OL2TY=&Cl5=3j9;FP{n!dXg6!-2h2k#D%)PsCCaRg`e zq;WE7Tpd)f+uID5Son4YyWGQP5OF&w9uvNM>NcShBgWVXEcB3ZkT}E{-~#}QQ8=U< z!U2T6M0FqW29waKNIxd;HM~F2NAw^$K1Mnx)IBn2Ou-I>6MzhU?f?}8e@1Pihh{kfC0+##Km|(e2fl|;;G51gv(E?R%rcfSo=63P{5QI~p z`}Q8!fvR)@8+583*8vp&-$jx%q7x)>j3PUz#toeZsKGta9H2}N^dJXv!*nRKv;(aj zn*QFTgD`nO+z&r6h$WhDH+0@kjTW8XRG#}mCk$YKQ?Q^lhdKu-s-gfwnIp48i`H6Qrilsusq;ugXE4dr=& zbRO75p3=UdJR!&lut)4shXy!`ePrkj$dh^tItMrf6T2ySLczw6Ly-G@dEVQ%&vE*p z@lE6@?HkGy>_g0=nGRqnTBrTcb=W8M7XD+s=Igk!S0RL5!#;(Iz&^3^7}*Z)Zz#|G z^msS!Z(-CEU=MU*087F8y8Z1q=oC#4`CIX9L7s2}{uY`vMrRac3}a+cNS-2-8~3+B zhyE7O2jmHLz5)BB-a=0YI3@Oa6Y>*5o@g+CzZ=tt{$kf{bd5RagZu>NPJHRQ4rvcY* zz&>G`V+?bf`&)?S#{Dhymi|`aWys$`IydNVg)<>}if6kC`vk|w=u-Rn(4aiW=?QM! z-wMP7>!O0l_qydty@j3*$WtQIO~@0Bm$1<|_ewomHq)sYDX zaZ~mwaRom%_qX8Sjr&{BNq-By9k5TykFHyu^ta-{ z;wSYMdOBdA(Bpo7djtL!^Tz1@H}|(t>KpgBAdvo6yhcdtB+6g6eNt~l(?gtsNN&nL zMQ_2aVT=YJB z9}9a(JTOAGq5ufOP%sz+oiM4}O_2y&=RNdndra1XDh?|ty}F6&l|)dC!T+KBRCwrACF7?ji&%JgVtI^#*8R1LCnv2DD6jPjGk967E8|1#|_~o#9o_ zr5?L#r@(SSF)BhkSG^HdL;(;ZZZqXc6e*>&qNou&$p>HqK19j6D7lO781P0*k4ZaK zOt6Vz5=)8^Q>Q>dZjZY2BH%!8iGw%96#-Fjg2uR{0}vfZ6xs%gT=H-bw^8LtmqQ36 z7t1axr7D5qqQ|67@Bqjp%B`j*3xF<`#A->Qk6jnlt)Lsi+G(~*z!OR?s0&F@m|Iyf z1wYu;BMG(JTd^if&uO(SV3U9cW#~bP7i?|xy`iqbN9QiI;rm`g&~@7nllGg3`=Ko{Ep`&GVu9FWCFeC9_z<90$gMyIm_9GS ze|Jq}^S4gKxnQ5i$=rE!GPh2H1EmEZC>gXU9R-y{>L_&%SK?t%h|KyHP)O23ida7# z#m&@(vS?1gesN&~iWpV#_g|MDV&D|cyFxnX`k7hi$)}y!w=BzgD+w>+`if1@4m73M z^mMpMS(vfr$Q0|4wPt&k|C`o(!B1%U{e9!JA$ntZgWMEVf98W>Rkz?Y{L8}OKu`eY zKPiI@&wtl-!1LZos5|F5fyV$dYn}_`ad;>xW=K{#{#P74mK=7x3L4g*kVb?Is06OG zWz~ZM`70ZMwO-(W@GZ-I(tc|s@%hjG-SIygntj`Izw`LTpE>@Sx4r{YpG3cmcm^XT zE`Q{ox_`Iu_P;6ZnZEDu-}^}!@k}t{d5rkO$Nubv3-8ZXaR<~vsz?H4^C?8uYT=?U;e#|ul)U=SQ3z?ElsSRPJ_S8z@X2min>52*V%?)B}XyLxT$7tVW$DfQ%Y zpbo%P;yJop{5uOcuyR1Bh~y+Ziq#|YC1B=P8!oSpC};2d%R+-#ps z{V4s4ijF%8-==HAT`KMHGLnwb0XhCdHznV9yDWBVg1Y(wUj;h&0>l#nBY7wcWZh5_8iqJT zENFnq7aiM$2Lgd)5!Rt?Q7(4c1uJz!iVco5_%67QF>8@yyV8h{4#N}pML{WNj(GsC z@L9zFhlP6r9HB#y(DAId5~CuZrH~}(r2rRAT;f|#0h8{17IXqn+s^o}^GWx`&h)w& zcx->9-(XAF4gu(mcx^)f_6URz7=TUPA+%s=XbPUN*d>|CGzoS#TY@%N$@goHYaJC1 zdaj`#gRZ9EB`dY{Q5H1gzzc_ge$F;fee2s8j)YMzW4@o9K)X zQD8*@VahCiAOJeM(h+7(NTqD}1vER-OlUUx@JQ%(0GQ^fwO+Y321HN{6vmz1zO&*# z{R1${S?zO82XrFHXdp{qF&L3}S;8=whLO`;^{>XQH<9v!^U=x)L--ic#3b9l zjP2619|lr}XFb7@Zz8%CkNjhxQbb3js+_b<4FyLMX_LM?AhKZ#eavW0k?;uiV z=J0dh(OoNPm!1I%wC9W;IfdN}tqMjfp$WYTflI5d0hQEpgAoWiMs~tq77vWxng|#-2ZV@7{(8)g!sPt35GF3G;@BMOG876-agGpIt^V5n=yi}0q1!?yScNwu zLox@e2JH{4X^FbJN9#Bj)5alGh%O@z44PlS$p~Dgn4rsrdL}4OJriUk1KI`#5d)5J z4SE%3_08d%Y6DS9WpHJTxku^-+l7qfn5fG7zylao!OD7!0Z4;(?>~!g+Q#<8y-mj zMlTatFY@51%O_cY_~9w-utiMTi~9RPtp7C8C*E8_yIzZSChi21o;^0oP$Pb1#1ONA zX=|*tBJ{9UxYJSr^RavDyE4%{;v(J(;^!Rq%!+4eR|eVkT$!7eYzS(XNF#2-%h8pA zr?geXM127}U=Gd^0@Wg6d?LxNs5tzCP~q7|Bo0NwA3c#E?*`K_$G=e0OiU*K@g8TW zzl9IKB~0b*{|EH{o`n@~(el4=<@*E<5KoZsB(@mP-L%L<{D_AvDgw{O4Zr2TVSJkl z-{y#Kv$Su6^tEq`QiDt;tF{ z!K#TvU2%@|MI2*t1y1;v-BY|dw##!NanCse8+H--!8&2ag=O*TvtGh?S1?8gKaW&B zFL6-(6Z5e+GaFRTw9m#ahe?@_DhYC$L@%L%GsQj%$kE@r60dPL3-2n9Ox(vv9C}4u zMS)>BTCgy3@YO^Onh;*pJu%4rxCsH2jf%5;1|ray@-UY-+vNt#+eu*yjd&OVJuya& znB;}RTT3U0amhL*VL>yfyox|hK`~0){;qVwJ)#onAr%J~gJcvKZ};Np1D~07#sk2x zo*|QUqMix=1zckkY&h!#&PaePbTLq=7?~U;4%@*Fhn$ipNE3-BHlBhX`~?6)*Hu)f zjsTkS;>-|b{|O`r0;!{riVmh|5ZQLqK@9`m5>BCxvc*=;F_Hma04#9>?nB2#uN;^S zg1C=74L$=hx8}O1PGGAIm2m=<0g#Z&0D8hpkQ$U+sWNeu7*Ls^kjhZow65R|sz~Jl zRzVJgotNyYwStS%=sH~tX5LHJs5%n9bfE3q!jO;3qPV-5c6TtTp1WhsI>_9>UMx&^ z7t?+Y5saw23N=V^E!1p~4@yk7MZ5$dxE@A{Am=$Ra_;BiMi;S7p$`CfLoe=V8%{5J zgH)_3kTjtQX6cC#q47SUP#et??bk^t%ou(>5fGZ_5gJd+CJOqNNP`wpkOYL&w`Zne zMn=F;tQNjPOc=U=IH^@eo3NNZcopygNDIDDIkA7}s%TBTE50ob-HG;eM@5hjAw*tn z-wZftx05h4=xd;&I7HGu^OzUq?hcQ~U~X`t3eQ7aM?53dAI8Y~OdKg-Vi#PigJHO# zp9S9;j1=u8tV+;13WbXA&Y^6I5n=y<2l|gV{YP9|xcC`u=CQv2AZz*M0_`BFGSn=3U&|Dii%%d2%dP(iJ&U?BpV=B`eB(4sdIDDb@^I~~{m*)=?@ zt*xF){UUc>pbHsKB#5me!;%4wq_JnGu3wT_%!#}q$=0NSUWmOfN%}?n9HtiXs4-)& zPRCe&F{zgY)dCHSYFfX%#4ILXR`kBCdtWiVX0WuP_lvssk4>)`EUoB$Rrh|`^qRrair&|B?_Zi; zGgw;tyb zAD=dla)lrVAD=OgaxFQZ7<_!zJjxaFHIL8p3i%H9N-MbWY2EwT!NJms-WPT6OQzQh zmR9s$(!F0Wy=JhqqW5Lp`-}GS_r7U*&0uLo?`7TlIPI-JSX%M8#%2iA^qRrairy!6??+6p z87!^neOmW^%=DVU(u&?^bnhojuNf?@Fyk1%!p5yTxATZ^!QRo#dREV(YnVf#_W`)3 zS%f|h8G(2{1e73y!H`*h9OW@mv$lq<)XSXA?Fs@DcLE4hvJGUc!7Nq?a5ZAdWL$m34;N7(6gO3vrht!L8%${#^5dHA5ZV5QHr)gVhxMCO1PtP_u zfeb!JKpA$h0w94sir$%S7DoMp{Rn`H&q}^g%25kE8%6 zO3FDpiriHtm@eybhQn6YNi`voLxFRcs7s27Cc2`b_oJbWcsvL#&r|+h%!hEo8*%U? zl(5N0oLL3)B1nUoDcHgM0z%Oj7)FA-1n7jWN>Nl$3D#nbaLC7aFXK#TtrndIP^6#M z9IIlzXek9)K1phKlp7bRmjPRpfu;BcrKMrSbb$%Nc2V7;D#K2&JQFYu3F@zIFOhd~ z!Ui+}(KWn6dSG~kNp84kAqfxh=<|wOSZq>ta*I`aywXQhL^C00Q_Nlx$lGyYRx2#S zO5~O7?QxIOYNL}dO01$Y1$2l7%M*FtM?Kh~==7H9ez4!rshPlx`&(hhgxE`sd!-}r zB*>?6C%R3Y3gy$mW&#>4={H0wLI8&YSHhXH8R7_yQn2OLYJj|$!XF^YI5Jd4`ov)k ztTIO`v}Gc&0Vt5Z?M{^x!!VU z?Fp0(?t4(#0(P=|9{Te9KRPI0gTGK`bo}0x?}NIsij>0PyM>JrB+WxLR+U&f*F~jS ze`GfGpP0q#6Dk`z792ORf{Sxh1~Ui3lXCs4ROIRU00W&o^0(&1*Gu_WfeUk7k|?tn zJ}2kQ^gVief=Y&1&8e*(L1;;Rlz5x%Kj zco(5}#(i*Rf@RjuGF;eNgKYn`CoZi413=>s?1TA;;SyUYVv7M~BI|1pky zVM;xL`-8JDJdRf#(dqaK!opE3nCX9nXr(@@k=|#4MYcagtF^)3pD-&~T4R$-I3RV7 zU`%jSBgc*rAHPb0&J`z+_&z;K@&to}W2gt9-8+l7^&q;`k85cD=lN2HKj~lQHF&Yo zf09I?^+jHT*3ZQ4n5@WE&AL3-vT#1YfGR6TxpLgBEDNR+7%SyH)PlLXneJ%Ppl_Nq z52w*SSVO5lR|Y2cabjN*TmhN|L9tRZ;w$XlKY|_ zh7?n;DRVJOlGa|_M3w*-4b3q*1JVU`q)V-OQE*TVfAr2*I#rV2>AY)IS3{x9Nr&n=m;{#IO%{v!34BxkX#9fP}PNrxJAEI zRSYegfYcQ_DGUXvblH5iDrvU#n;)C%bA3WJ;W z*tN4PhT;jK2x#t>v=J$%=<^f@q{{L}z&Lw&@`HCwaEG<|cD&=G9p~r_qd}8$Q_J(l^;CA?~CDfQ?F`3S^g&&dUXB(Bj$pa zEEC<5|I-e0H5?8%W9rC;OvTR6-?<4>(Y`TLcVZI21x(!;V(NO1j*z1PZNekN(TxZU zqIdG6c0btTtxrb>b+H$?AEPup4YK=5hW@osIx&>k4P-EQgxbQyC{PXDk}$sc6*MXA z`sD>w3X&X*h--A!M70JqjwYHTBI=;@i<4&Ek%hi3jHCl zXp7uu?ZL&p&*B@iNX_tytcr^2r?`MjN0eg?X54v_zy@I;u^vPYp{R!$_68-5BO8rJ zV?Y4AkeUA+iGiaW1Yjk13v*My!0tOuryYIBBv7K|~y@ z|MZ7|0Abow5xylQ&Z3u@?zNmFJcQBY1g^zN{SskWNo^>pr}R_r_h7wz)$`}|^Up`^ zxXELgyQ8#%-vTE}Ls9GhKmc0Tv?^JL?Janl6ewdyz-*F?&`GkBx`bhHoj4+9JC!Hx z;KK$Cb5HSoZJ7~44#*CC;?1$2vh1nOcdBsz(MmgWD(g?G=Q(q_+w9-Q!l z+k1f9W;V%!Bf<s2*`nQTWQNVLI$G`?|0xEM7SVj_d%+r2TLQD=ds=`k5ZoaAbW77 zYRf^;SHwE>kK7`gD&0?oh^irc4@Mx$%RG7w4-WPi=Hc>QB-g^Tjn|M@Q3Od@0@xyC zhgT8aB_A9xBG(v^OC#cb1xAE-_-HvStVVpp&^E}yMC4Vq#Cjv)|c|9yR0n2kiEHm{iMsEQoT54MRd0@(Svzfi) zAFHwxP5mLUfNn=U8sWlYV`JsghC#c9kwIsVbCb7wg;v- zG9?;J5!MA$U~;aCzRli(%P=KR?rbMe$R8XV!ec5}Lvlcl01+E750+<`heH%79AAGD z)dpemYJO~2;TgOTkk^0-_8JUAXtbE9GX*xfZzunwyOBEj!+7NZ+!R&$3Qr|W zOAH zI}|nSe`>Jp0{k>w(3b%~PshZ>)f3OtnGZ~i02mOB`By&mzakI7Peq~T;uJ-SQGW=9 zaW+zP25ndcoTTmf4?znA(AdIn$~5=<=lb1^4@J&{&NQlX6)zAb^Hw?h&a{0Rium&ADS78!#r zk~n-Bfo8S?2YIl1jimJPb!cb+-(eQ0SdQ_Rm7Gh00m}%GIKG!bMbSJ%>>V!WaYk1Zh>4n=7;2e4g#oiG=6nkxzgi1i8hIQ;65+4Up=@Eh21DLiI zL8eEbE8y~wL~0a}dUPUSG5RH8B19U*2<3b=32VgW(xapdAK`#>_oJbOXgHcqlJ3!7 zG=%4h(~I__AzqM#b!&n}SyfUlh*5Bq*xwRETMffBh6ZCH$buV%k#j4N@U}chiCsB& z%4fZkg=Hib06_5gy`YR@s9-UOxT)W!faLvNGAkB}m3(g=@t7fUV0k~Y(eg-DWE8#@ zJOCMjgTF)^9*moS34Y=0<4LokMS;kQDAZWTfGsZXWvX;ds_}x0S$3mEY;c6JPRfWq z4xp7Bz>#Z+1D0aVE~9t(U!lj_eq8U8Ek96Fr_y}mTUsCkB#u1vu>(!Q^pINGVYLL{zFN98FDj-J^1TEl zl7(Fd0Kqk5<00ScCjXt?PlH*wi<-;=`!McFqs(WqKVV&XJ1*GLr=UW(V1w^}j9Gwe z@~2P}97D^$r=OG47H;x++EPKQlo?EGLRvF`6~#eL$x*oOL_QLvukEZKJshQ$W1$s?|v&~4dd$KQwco}UG==r zJRd%>c(Q%?bmR2G%IU+C&3gCv;^A)l_{{Q3_uOGZo>^$M&rUYG^_BMWW8wEnaP~Y(M`mtzP#M-uFL@EPJIRh-KVnZ?cbsn)IIm$ z2kuG*%X%C*PXnJn$A@?Z&-a26Td#WFdDSy^8@=&qjNc!6#-9tLV!r2}C|xBkfVnRfH=$;B3!^1Pk09s=A+$Fgq0HGD zh$om{Kii+)oB#8eKZ^PKuRkb3ur9)0>?qYZ2aPdex2%`3<_?VgAGmT36jpe&ywVbC zWHLy36aAEN3%n<8VcsP0%zwPHo&Ee5JX7x}Kk|}mY{4gm564oDyeGf6;nS=y)tfUb z=M=fpnO}SaZL4ld`uE?b#0Jr1{V3WXCMyGZXL6Z*rjRLSN||z|lBs4h*=#nK&1VbQ zVz!hmXDiuiE|bgVa=Co2kSpd&xpJBbEa-mYF7Bj_cF;~nN3&mowR4f-O#cC;2%9e7ae5p_>mP(~^sZy$zGv#bKSI(CU zMKwPtbT(=z)$wY+$`3oJA&gUI-P)mns7EiA66 zll7J63Ds&ZugolHr5hXym?P1Ybq}sNTqkgS2C{@&S=Pr7L>B5N+t#*QjHjAjo;lf? ze1w0W<w4=i5uw>9O~`f2_IKZBI^W?VOw!hdntNa@=F5bD6RFvRw4o&dHU@ z$#=CIr;m@_|NgN&9<${Wom{R{$Ts0Aik()cldswOPo+2me<{Tc{!mIJ_&X`l1Jm#O z_A~EXE>*ML6Yp=_pQ*K{%hS`-;*qjBb2lyc#SUI;Y#|tH{sK$<`bKy4-)l)^uLZjr zz!p5rpM4psTw_YEG&rTrAUJ)ZzR;R)cZUyy|CJ!Fl#Pr^-FEAAvkhhztGRYFlc^S4 z*+Q$3Yt)kEjV6>Is(0%rbB$7K<~Y;@6U(`Ju2pR|TZM8tm#?&HBO6bg(toa|bb1MY zqJ}G$>YZvSmn{^U&0Mos$<#)#J+Z%HJ6EZ;8ihZ45y>bn$f(So32}ZE#bcL$~AFNhvmz%{>p;<50 zE5%Z$UfZ!auv&TDY6y*!lS}o5ndY1z-#}Uh(%2AK%H-tH#V$Odrt6{lx%tIy+r|%%{@d|W>%@bJPkIHFNi$tcCnmm7csL~tld71iN%U& z;KmHy*a?aN1)GePV)I0u3zV`QSVJRI$mJW=O0HPj`LF+(3m7fY_?IiiOr}`QXS4N8 zv)#$o?#L1RfX-&KotcH1Zb`c0cWMEwkPeddR~ufVxdRtzb~ z4kjlNX^>o=TV83O3}@*50Y3jOW_@L`OEb}aA7%bchNTM3>@-nt$Y&8$atJNCX04Q8KQpKIQ#ztxQn6fbRgu|c;iL+HP%ASd)YurD zDrSl#OZ|!YN)ygFpYLP~oqV%dtGo*nbt);l?xJk=b0v8&?2@Kw?A)hW`or>>oCHB6 z!qsNB(`wYJQyW4lXef-+>tNJdokp|V&epTVYPnF)*6y1OV9@zgbvnG#?&g=E@gTWg z1aGU2Mgygoe6>=0+reObKN6XAxO}47K<3uyG^(9^sh%%XYH#oDNcyAoo!OCKPv2>` z%GE}$)oOII`P#SK6Tp`}jO4w*qf{vta_vR|DQ_E9m|X20Jq~hQe&O`V{LIOvoK`H{ z&5ItL&gZBw1+!Ycfh4ff$+a4F?3x^rkLBrfeD3C?G0Q%8b1#jv*Iw4Ak!7rU)?H_((yxsxx{s}Mt`l&_ZB zwRf^2)1x!sR#7{!9=F2v7Za2iHc-+EGt&gAY^GJnw_Eu}5qsQHu2Xy0F|JV%%N~2* z(XkEo+MD<|lzp>EKr5*5WlG4`YPI_bN@I6TP*+un8s#R^hg`8$&gL5h>`0Fu4b#1; zd~0k|J__7-<$pG*H9L zmg)RX&MZr*GSF@K-Etd6D{Oq=ZAzKid(M2rm)VHCe7;i7RdXejmQihPR%-EWwzYMd z&bm1_y|hSY(H$DLt@vbjk$;)ldT?k7cPP1amz~5s>c*$*$J=(~eTazG2)deW9PYFZ zHO|>l#_#5y>|~`fnNj1tgA#>mc=1rCbSRhKZ~eHv6;)^Cr_-HweFeLu<)MjWxNF+itELVx@k5apCyH@JaZc89Do!*n0BxO1&}PK6HG3 zu~DC2p15^kaRD2?r6tr%4t17RNZ7= size + min_overhead\00 \02\10\00)\00\00\00\ac\04\00\00\09\00\00\00assertion failed: psize <= size + max_overhead\00\00 \02\10\00)\00\00\00\b2\04\00\00\0d\00\00\00memory allocation of bytes failed\00\00\c8\02\10\00\15\00\00\00\dd\02\10\00\0d\00\00\00library/std/src/alloc.rs\fc\02\10\00\18\00\00\00d\01\00\00\09\00\00\00\04\00\00\00\0c\00\00\00\04\00\00\00\08\00\00\00\00\00\00\00\08\00\00\00\04\00\00\00\09\00\00\00\00\00\00\00\08\00\00\00\04\00\00\00\0a\00\00\00\0b\00\00\00\0c\00\00\00\0d\00\00\00\0e\00\00\00\10\00\00\00\04\00\00\00\0f\00\00\00\10\00\00\00\11\00\00\00\12\00\00\00capacity overflow\00\00\00|\03\10\00\11\00\00\00): \00\01\00\00\00\00\00\00\00\99\03\10\00\02\00\00\0000010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899range start index out of range for slice of length t\04\10\00\12\00\00\00\86\04\10\00\22\00\00\00range end index \b8\04\10\00\10\00\00\00\86\04\10\00\22\00\00\00slice index starts at but ends at \00\d8\04\10\00\16\00\00\00\ee\04\10\00\0d\00\00\00copy_from_slice: source slice length () does not match destination slice length (\00\00\00\0c\05\10\00&\00\00\002\05\10\00+\00\00\00\98\03\10\00\01\00\00\00")) diff --git a/example/blake2b_simd/src/lib.rs b/example/blake2b_simd/src/lib.rs new file mode 100644 index 00000000..5825b115 --- /dev/null +++ b/example/blake2b_simd/src/lib.rs @@ -0,0 +1,33 @@ +#[unsafe(no_mangle)] +pub extern "C" fn alloc(size: usize) -> *mut u8 { + let layout = std::alloc::Layout::array::(size).unwrap(); + unsafe { std::alloc::alloc(layout) } +} + +#[unsafe(no_mangle)] +pub extern "C" fn alloc_zeroed(size: usize) -> *mut u8 { + let layout = std::alloc::Layout::array::(size).unwrap(); + unsafe { std::alloc::alloc_zeroed(layout) } +} + +#[unsafe(no_mangle)] +pub extern "C" fn blake2b( + data_ptr: *mut u8, + data_size: usize, + hash_ptr: *mut u8, + hash_size: usize, +) { + let data_buf = unsafe { std::slice::from_raw_parts(data_ptr, data_size) }; + let hash_buf = unsafe { std::slice::from_raw_parts_mut(hash_ptr, hash_size) }; + let mut p = blake2ya::blake2b_params(); + p.digest(64); + let mut h = blake2ya::blake2b(p); + h.update(data_buf); + h.digest(hash_buf); +} + +#[unsafe(no_mangle)] +pub extern "C" fn dealloc(ptr: *mut u8, size: usize) { + let layout = std::alloc::Layout::array::(size).unwrap(); + unsafe { std::alloc::dealloc(ptr, layout) } +} diff --git a/pywasm/__init__.py b/pywasm/__init__.py index aca036b1..4db1e28e 100644 --- a/pywasm/__init__.py +++ b/pywasm/__init__.py @@ -1,5 +1,6 @@ import platform +from . import arith from . import core from . import leb128 from . import log diff --git a/pywasm/arith.py b/pywasm/arith.py new file mode 100644 index 00000000..ce54dee9 --- /dev/null +++ b/pywasm/arith.py @@ -0,0 +1,222 @@ +import ctypes +import math +import struct + + +class I: + def __init__(self, blen: int) -> None: + self.bits_length = blen * 8 + self.byte_length = blen + self.mask = (1 << self.bits_length) - 1 + self.mask_sign = 1 << (self.bits_length - 1) + self.max = (1 << (self.bits_length - 1)) - 1 + self.min = -1 << (self.bits_length - 1) + + def from_bytearray(self, data: bytearray) -> int: + assert len(data) == self.byte_length + return int.from_bytes(data, 'little', signed=True) + + def into_bytearray(self, n: int) -> bytearray: + return bytearray(n.to_bytes(self.byte_length, 'little', signed=True)) + + def fit(self, n: int) -> int: + n = n & self.mask + n = n - ((n & self.mask_sign) << 1) + return n + + def sat(self, n: int) -> int: + return max(min(n, self.max), self.min) + + def add(self, a: int, b: int) -> int: + return self.fit(a + b) + + def add_sat(self, a: int, b: int) -> int: + return self.sat(a + b) + + def sub(self, a: int, b: int) -> int: + return self.fit(a - b) + + def sub_sat(self, a: int, b: int) -> int: + return self.sat(a - b) + + def mul(self, a: int, b: int) -> int: + return self.fit(a * b) + + def div(self, a: int, b: int) -> int: + assert a != self.min or b != -1 + # Python's default division of integers is return the floor (towards negative infinity) with no + # ability to change that. You can read the BDFL's reason why. + # See: https://python-history.blogspot.com/2010/08/why-pythons-integer-division-floors.html + # But in webassembly, it requires do truncation towards zero. + return a // b if a * b > 0 else (a + (-a % b)) // b + + def rem(self, a: int, b: int) -> int: + return a % b if a * b > 0 else -(-a % b) + + def shl(self, a: int, b: int) -> int: + return self.fit(a << (b & (self.bits_length - 1))) + + def shr(self, a: int, b: int) -> int: + return self.fit(a >> (b & (self.bits_length - 1))) + + +class U: + def __init__(self, blen: int) -> None: + self.bits_length = blen * 8 + self.byte_length = blen + self.mask = (1 << self.bits_length) - 1 + self.mask_sign = 1 << (self.bits_length - 1) + self.max = self.mask + self.min = 0 + + def from_bytearray(self, data: bytearray) -> int: + assert len(data) == self.byte_length + return int.from_bytes(data, 'little', signed=False) + + def into_bytearray(self, n: int) -> bytearray: + return bytearray(n.to_bytes(self.byte_length, 'little', signed=False)) + + def fit(self, n: int) -> int: + return n & self.mask + + def sat(self, n: int) -> int: + return max(min(n, self.max), self.min) + + def add(self, a: int, b: int) -> int: + return self.fit(a + b) + + def add_sat(self, a: int, b: int) -> int: + return self.sat(a + b) + + def sub(self, a: int, b: int) -> int: + return self.fit(a - b) + + def sub_sat(self, a: int, b: int) -> int: + return self.sat(a - b) + + def mul(self, a: int, b: int) -> int: + return self.fit(a * b) + + def div(self, a: int, b: int) -> int: + return a // b + + def rem(self, a: int, b: int) -> int: + return a % b + + def shl(self, a: int, b: int) -> int: + return self.fit(a << (b & (self.bits_length - 1))) + + def shr(self, a: int, b: int) -> int: + return self.fit(a >> (b & (self.bits_length - 1))) + + def rotl(self, a: int, b: int) -> int: + b = b % self.bits_length + return ((a << b) & self.mask) | (a >> (self.bits_length - b)) + + def rotr(self, a: int, b: int) -> int: + b = b % self.bits_length + return (a >> b) | ((a << (self.bits_length - b)) & self.mask) + + def clz(self, a: int) -> int: + b = 0 + for _ in range(self.bits_length): + if a & self.mask_sign != 0: + break + b += 1 + a = a << 1 + return b + + def ctz(self, a: int) -> int: + b = 0 + for _ in range(self.bits_length): + if a & 1 != 0: + break + b += 1 + a = a >> 1 + return b + + def popcnt(self, a: int) -> int: + b = 0 + for _ in range(self.bits_length): + if a & 1 != 0: + b += 1 + a = a >> 1 + return b + + +class F32: + def __init__(self): + pass + + def from_bytearray(self, data: bytearray) -> float: + assert len(data) == 4 + return struct.unpack('f', data)[0] + + def into_bytearray(self, n: float) -> bytearray: + return bytearray(struct.pack('f', n)) + + def fit(self, n: float) -> float: + return ctypes.c_float(n).value + + def div(self, a: float, b: float) -> float: + return self.fit(f64.div(a, b)) + + def min(self, a: float, b: float) -> float: + return self.fit(f64.min(a, b)) + + def max(self, a: float, b: float) -> float: + return self.fit(f64.max(a, b)) + + +class F64: + def __init__(self): + pass + + def from_bytearray(self, data: bytearray) -> float: + assert len(data) == 8 + return struct.unpack('d', data)[0] + + def into_bytearray(self, n: float) -> bytearray: + return bytearray(struct.pack('d', n)) + + def fit(self, n: float) -> float: + return n + + def div(self, a: float, b: float) -> float: + match b: + case 0: + s = +1 if math.copysign(1, a) == math.copysign(1, b) else -1 + c = math.copysign(math.inf, s) + if a == 0 or math.isnan(a): + c = math.copysign(math.nan, s) + case _: + c = a / b + return c + + def min(self, a: float, b: float) -> float: + c = min(a, b) + if math.isnan(a): + c = a + if math.isnan(b): + c = b + return c + + def max(self, a: float, b: float) -> float: + c = max(a, b) + if math.isnan(a): + c = a + if math.isnan(b): + c = b + return c + + +i8 = I(1) +u8 = U(1) +i16 = I(2) +u16 = U(2) +i32 = I(4) +i64 = I(8) +u32 = U(4) +u64 = U(8) +f32 = F32() +f64 = F64() diff --git a/pywasm/core.py b/pywasm/core.py index c4ef88bd..4241b4ef 100644 --- a/pywasm/core.py +++ b/pywasm/core.py @@ -1,6 +1,7 @@ import ctypes import io import math +import pywasm.arith import pywasm.leb128 import pywasm.log import pywasm.opcode @@ -12,7 +13,7 @@ class ValType: # Value types are encoded by a single byte. def __init__(self, data: int) -> typing.Self: - assert data in [0x7f, 0x7e, 0x7d, 0x7c, 0x70, 0x6f] + assert data in [0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x70, 0x6f] self.data = data def __eq__(self, value: typing.Self) -> bool: @@ -27,6 +28,7 @@ def __repr__(self) -> str: 0x7e: 'i64', 0x7d: 'f32', 0x7c: 'f64', + 0x7b: 'v128', 0x70: 'ref.func', 0x6f: 'ref.extern', }[self.data] @@ -47,6 +49,10 @@ def f32(cls) -> typing.Self: def f64(cls) -> typing.Self: return cls(0x7c) + @classmethod + def v128(cls) -> typing.Self: + return cls(0x7b) + @classmethod def ref_func(cls) -> typing.Self: return cls(0x70) @@ -63,7 +69,7 @@ def from_reader(cls, r: typing.BinaryIO) -> typing.Self: class ValInst: # Values are represented by themselves. - blen = 8 + blen = 16 def __init__(self, type: ValType, data: bytearray) -> typing.Self: assert len(data) == self.blen @@ -83,6 +89,8 @@ def __repr__(self) -> str: return f'{self.type} {self.into_f32()}' case 0x7c: return f'{self.type} {self.into_f64()}' + case 0x7b: + return f'{self.type} {self.into_v128().hex()}' case 0x70: body = repr(self.into_ref()) if self.data[4] != 0x00 else 'null' return f'{self.type} {body}' @@ -92,25 +100,19 @@ def __repr__(self) -> str: @classmethod def from_i32(cls, n: int) -> typing.Self: - n = n & 0xffffffff - n = n - ((n & 0x80000000) << 1) - return cls(ValType.i32(), bytearray(struct.pack(' typing.Self: - n = n & 0xffffffffffffffff - n = n - ((n & 0x8000000000000000) << 1) - return cls(ValType.i64(), bytearray(struct.pack(' typing.Self: - n = n & 0xffffffff - return cls(ValType.i32(), bytearray(struct.pack(' typing.Self: - n = n & 0xffffffffffffffff - return cls(ValType.i64(), bytearray(struct.pack(' typing.Self: @@ -133,12 +135,77 @@ def from_f64_u64(cls, n: int) -> typing.Self: o.type = ValType.f64() return o + @classmethod + def from_v128(cls, n: bytearray) -> typing.Self: + assert len(n) == 16 + return cls(ValType.v128(), n + bytearray(cls.blen - 16)) + + @classmethod + def from_v128_i8(cls, n: typing.List[int]) -> typing.Self: + assert len(n) == 16 + data = bytearray().join([pywasm.arith.i8.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_u8(cls, n: typing.List[int]) -> typing.Self: + assert len(n) == 16 + data = bytearray().join([pywasm.arith.u8.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_i16(cls, n: typing.List[int]) -> typing.Self: + assert len(n) == 8 + data = bytearray().join([pywasm.arith.i16.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_u16(cls, n: typing.List[int]) -> typing.Self: + assert len(n) == 8 + data = bytearray().join([pywasm.arith.u16.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_i32(cls, n: typing.List[int]) -> typing.Self: + assert len(n) == 4 + data = bytearray().join([pywasm.arith.i32.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_u32(cls, n: typing.List[int]) -> typing.Self: + assert len(n) == 4 + data = bytearray().join([pywasm.arith.u32.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_i64(cls, n: typing.List[int]) -> typing.Self: + assert len(n) == 2 + data = bytearray().join([pywasm.arith.i64.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_u64(cls, n: typing.List[int]) -> typing.Self: + assert len(n) == 2 + data = bytearray().join([pywasm.arith.u64.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_f32(cls, n: typing.List[float]) -> typing.Self: + assert len(n) == 4 + data = bytearray().join([pywasm.arith.f32.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + + @classmethod + def from_v128_f64(cls, n: typing.List[float]) -> typing.Self: + assert len(n) == 2 + data = bytearray().join([pywasm.arith.f64.into_bytearray(e) for e in n]) + bytearray(cls.blen - 16) + return cls(ValType.v128(), data) + @classmethod def from_ref(cls, type: ValType, n: int) -> typing.Self: return cls(type, bytearray(struct.pack(' typing.Self: + def from_all(cls, type: ValType, n: typing.Union[int, float, bytearray]) -> typing.Self: match type.data: case 0x7f: return cls.from_i32(n) @@ -148,6 +215,8 @@ def from_all(cls, type: ValType, n: typing.Union[int, float]) -> typing.Self: return cls.from_f32(n) case 0x7c: return cls.from_f64(n) + case 0x7b: + return cls.from_v128(n) case 0x70: return cls.from_ref(type, n) case 0x6f: @@ -173,11 +242,44 @@ def into_f32(self) -> float: def into_f64(self) -> float: return struct.unpack(' bytearray: + return self.data[:16].copy() + + def into_v128_i8(self) -> typing.List[int]: + return [pywasm.arith.i8.fit(e) for e in self.data] + + def into_v128_u8(self) -> typing.List[int]: + return [pywasm.arith.u8.fit(e) for e in self.data] + + def into_v128_i16(self) -> typing.List[int]: + return [pywasm.arith.i16.from_bytearray(bytearray(self.data[i:i + 2])) for i in range(0, 16, 2)] + + def into_v128_u16(self) -> typing.List[int]: + return [pywasm.arith.u16.from_bytearray(bytearray(self.data[i:i + 2])) for i in range(0, 16, 2)] + + def into_v128_i32(self) -> typing.List[int]: + return [pywasm.arith.i32.from_bytearray(bytearray(self.data[i:i + 4])) for i in range(0, 16, 4)] + + def into_v128_u32(self) -> typing.List[int]: + return [pywasm.arith.u32.from_bytearray(bytearray(self.data[i:i + 4])) for i in range(0, 16, 4)] + + def into_v128_i64(self) -> typing.List[int]: + return [pywasm.arith.i64.from_bytearray(bytearray(self.data[i:i + 8])) for i in range(0, 16, 8)] + + def into_v128_u64(self) -> typing.List[int]: + return [pywasm.arith.u64.from_bytearray(bytearray(self.data[i:i + 8])) for i in range(0, 16, 8)] + + def into_v128_f32(self) -> typing.List[float]: + return [pywasm.arith.f32.from_bytearray(bytearray(self.data[i:i + 4])) for i in range(0, 16, 4)] + + def into_v128_f64(self) -> typing.List[float]: + return [pywasm.arith.f64.from_bytearray(bytearray(self.data[i:i + 8])) for i in range(0, 16, 8)] + def into_ref(self) -> int: assert self.data[4] == 0x01 return self.into_i32() - def into_all(self) -> typing.Union[int, float]: + def into_all(self) -> typing.Union[int, float, bytearray]: match self.type.data: case 0x7f: return self.into_i32() @@ -187,6 +289,8 @@ def into_all(self) -> typing.Union[int, float]: return self.into_f32() case 0x7c: return self.into_f64() + case 0x7b: + return self.into_v128() case 0x70: return self.into_ref() case 0x6f: @@ -223,7 +327,7 @@ def from_reader(cls, r: typing.BinaryIO) -> typing.Self: n = ord(r.read(1)) if n == 0x40: return cls(0x00, 0x40) - if n in [0x7f, 0x7e, 0x7d, 0x7c, 0x70, 0x6f]: + if n in [0x7f, 0x7e, 0x7d, 0x7c, 0x7b, 0x70, 0x6f]: return cls(0x01, n) r.seek(-1, 1) return cls(0x02, pywasm.leb128.i.decode_reader(r)[0]) @@ -263,6 +367,7 @@ def from_reader(cls, r: typing.BinaryIO) -> typing.Self: if b >= 0xfc: e = pywasm.leb128.u.encode(pywasm.leb128.u.decode_reader(r)[0]) b = int.from_bytes(bytearray([b]) + e) + assert b in pywasm.opcode.name, hex(b) o = Inst(b, []) match o.opcode: case pywasm.opcode.block: @@ -457,6 +562,149 @@ def from_reader(cls, r: typing.BinaryIO) -> typing.Self: o.args.append(pywasm.leb128.u.decode_reader(r)[0]) case pywasm.opcode.table_fill: o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + case pywasm.opcode.v128_load: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 5 + case pywasm.opcode.v128_load8x8_s: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.v128_load8x8_u: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.v128_load16x4_s: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.v128_load16x4_u: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.v128_load32x2_s: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.v128_load32x2_u: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.v128_load8_splat: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 1 + case pywasm.opcode.v128_load16_splat: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 2 + case pywasm.opcode.v128_load32_splat: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 3 + case pywasm.opcode.v128_load64_splat: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.v128_store: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 5 + case pywasm.opcode.v128_const: + o.args.append(bytearray(r.read(16))) + case pywasm.opcode.i8x16_shuffle: + for _ in range(16): + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + for e in o.args: + assert e < 32 + case pywasm.opcode.i8x16_extract_lane_s: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 16 + case pywasm.opcode.i8x16_extract_lane_u: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 16 + case pywasm.opcode.i8x16_replace_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 16 + case pywasm.opcode.i16x8_extract_lane_s: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 8 + case pywasm.opcode.i16x8_extract_lane_u: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 8 + case pywasm.opcode.i16x8_replace_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 8 + case pywasm.opcode.i32x4_extract_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.i32x4_replace_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.i64x2_extract_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 2 + case pywasm.opcode.i64x2_replace_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 2 + case pywasm.opcode.f32x4_extract_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.f32x4_replace_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 4 + case pywasm.opcode.f64x2_extract_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 2 + case pywasm.opcode.f64x2_replace_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[0] < 2 + case pywasm.opcode.v128_load8_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[2] < 16 + case pywasm.opcode.v128_load16_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[2] < 8 + case pywasm.opcode.v128_load32_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[2] < 4 + case pywasm.opcode.v128_load64_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[2] < 2 + case pywasm.opcode.v128_store8_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[2] < 16 + case pywasm.opcode.v128_store16_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[2] < 8 + case pywasm.opcode.v128_store32_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[2] < 4 + case pywasm.opcode.v128_store64_lane: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + assert o.args[2] < 2 + case pywasm.opcode.v128_load32_zero: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + case pywasm.opcode.v128_load64_zero: + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) + o.args.append(pywasm.leb128.u.decode_reader(r)[0]) return o @@ -1674,16 +1922,16 @@ def evaluate(self) -> None: b = self.stack.value.pop().into_u32() tabl.elem[b] = a case pywasm.opcode.i32_load: - a = ValInst.from_i32(struct.unpack(' None: self.stack.value.append(c) case pywasm.opcode.i32_clz: a = self.stack.value.pop().into_u32() - b = 0 - for _ in range(32): - if a & 0x80000000 != 0: - break - b += 1 - a = a << 1 + b = pywasm.arith.u32.clz(a) c = ValInst.from_i32(b) self.stack.value.append(c) case pywasm.opcode.i32_ctz: a = self.stack.value.pop().into_u32() - b = 0 - for _ in range(32): - if a & 0x00000001 != 0: - break - b += 1 - a = a >> 1 + b = pywasm.arith.u32.ctz(a) c = ValInst.from_i32(b) self.stack.value.append(c) case pywasm.opcode.i32_popcnt: a = self.stack.value.pop().into_u32() - b = 0 - for _ in range(32): - if a & 0x00000001 != 0: - b += 1 - a = a >> 1 + b = pywasm.arith.u32.popcnt(a) c = ValInst.from_i32(b) self.stack.value.append(c) case pywasm.opcode.i32_add: b = self.stack.value.pop().into_i32() a = self.stack.value.pop().into_i32() - c = ValInst.from_i32(a + b) + c = ValInst.from_i32(pywasm.arith.i32.add(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_sub: b = self.stack.value.pop().into_i32() a = self.stack.value.pop().into_i32() - c = ValInst.from_i32(a - b) + c = ValInst.from_i32(pywasm.arith.i32.sub(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_mul: b = self.stack.value.pop().into_i32() a = self.stack.value.pop().into_i32() - c = ValInst.from_i32(a * b) + c = ValInst.from_i32(pywasm.arith.i32.mul(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_div_s: b = self.stack.value.pop().into_i32() a = self.stack.value.pop().into_i32() - assert a != -1 << 31 or b != -1 - # Python's default division of integers is return the floor (towards negative infinity) with no - # ability to change that. You can read the BDFL's reason why. - # See: https://python-history.blogspot.com/2010/08/why-pythons-integer-division-floors.html - # But in webassembly, it requires do truncation towards zero. - c = a // b if a * b > 0 else (a + (-a % b)) // b - d = ValInst.from_i32(c) - self.stack.value.append(d) + c = ValInst.from_i32(pywasm.arith.i32.div(a, b)) + self.stack.value.append(c) case pywasm.opcode.i32_div_u: b = self.stack.value.pop().into_u32() a = self.stack.value.pop().into_u32() - c = ValInst.from_i32(a // b) + c = ValInst.from_u32(pywasm.arith.u32.div(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_rem_s: b = self.stack.value.pop().into_i32() a = self.stack.value.pop().into_i32() - c = a % b if a * b > 0 else -(-a % b) - d = ValInst.from_i32(c) - self.stack.value.append(d) + c = ValInst.from_i32(pywasm.arith.i32.rem(a, b)) + self.stack.value.append(c) case pywasm.opcode.i32_rem_u: b = self.stack.value.pop().into_u32() a = self.stack.value.pop().into_u32() - c = ValInst.from_i32(a % b) + c = ValInst.from_u32(pywasm.arith.u32.rem(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_and: b = self.stack.value.pop().into_i32() @@ -2020,94 +2247,77 @@ def evaluate(self) -> None: case pywasm.opcode.i32_shl: b = self.stack.value.pop().into_i32() a = self.stack.value.pop().into_i32() - c = ValInst.from_i32(a << (b % 0x20)) + c = ValInst.from_i32(pywasm.arith.i32.shl(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_shr_s: b = self.stack.value.pop().into_i32() a = self.stack.value.pop().into_i32() - c = ValInst.from_i32(a >> (b % 0x20)) + c = ValInst.from_i32(pywasm.arith.i32.shr(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_shr_u: b = self.stack.value.pop().into_u32() a = self.stack.value.pop().into_u32() - c = ValInst.from_i32(a >> (b % 0x20)) + c = ValInst.from_u32(pywasm.arith.u32.shr(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_rotl: - b = self.stack.value.pop().into_i32() + b = self.stack.value.pop().into_u32() a = self.stack.value.pop().into_u32() - c = ValInst.from_i32((((a << (b % 0x20)) & 0xffffffff) | (a >> (0x20 - (b % 0x20))))) + c = ValInst.from_u32(pywasm.arith.u32.rotl(a, b)) self.stack.value.append(c) case pywasm.opcode.i32_rotr: - b = self.stack.value.pop().into_i32() + b = self.stack.value.pop().into_u32() a = self.stack.value.pop().into_u32() - c = ValInst.from_i32(((a >> (b % 0x20)) | ((a << (0x20 - (b % 0x20))) & 0xffffffff))) + c = ValInst.from_u32(pywasm.arith.u32.rotr(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_clz: a = self.stack.value.pop().into_u64() - b = 0 - for _ in range(64): - if a & 0x8000000000000000 != 0: - break - b += 1 - a = a << 1 + b = pywasm.arith.u64.clz(a) c = ValInst.from_i64(b) self.stack.value.append(c) case pywasm.opcode.i64_ctz: a = self.stack.value.pop().into_u64() - b = 0 - for _ in range(64): - if a & 0x0000000000000001 != 0: - break - b += 1 - a = a >> 1 + b = pywasm.arith.u64.ctz(a) c = ValInst.from_i64(b) self.stack.value.append(c) case pywasm.opcode.i64_popcnt: a = self.stack.value.pop().into_u64() - b = 0 - for _ in range(64): - if a & 0x0000000000000001 != 0: - b += 1 - a = a >> 1 + b = pywasm.arith.u64.popcnt(a) c = ValInst.from_i64(b) self.stack.value.append(c) case pywasm.opcode.i64_add: b = self.stack.value.pop().into_i64() a = self.stack.value.pop().into_i64() - c = ValInst.from_i64(a + b) + c = ValInst.from_i64(pywasm.arith.i64.add(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_sub: b = self.stack.value.pop().into_i64() a = self.stack.value.pop().into_i64() - c = ValInst.from_i64(a - b) + c = ValInst.from_i64(pywasm.arith.i64.sub(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_mul: b = self.stack.value.pop().into_i64() a = self.stack.value.pop().into_i64() - c = ValInst.from_i64(a * b) + c = ValInst.from_i64(pywasm.arith.i64.mul(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_div_s: b = self.stack.value.pop().into_i64() a = self.stack.value.pop().into_i64() - assert a != -1 << 63 or b != -1 - c = a // b if a * b > 0 else (a + (-a % b)) // b - d = ValInst.from_i64(c) - self.stack.value.append(d) + c = ValInst.from_i64(pywasm.arith.i64.div(a, b)) + self.stack.value.append(c) case pywasm.opcode.i64_div_u: b = self.stack.value.pop().into_u64() a = self.stack.value.pop().into_u64() - c = ValInst.from_i64(a // b) + c = ValInst.from_i64(pywasm.arith.u64.div(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_rem_s: b = self.stack.value.pop().into_i64() a = self.stack.value.pop().into_i64() - c = a % b if a * b > 0 else -(-a % b) - d = ValInst.from_i64(c) - self.stack.value.append(d) + c = ValInst.from_i64(pywasm.arith.i64.rem(a, b)) + self.stack.value.append(c) case pywasm.opcode.i64_rem_u: b = self.stack.value.pop().into_u64() a = self.stack.value.pop().into_u64() - c = ValInst.from_i64(a % b) + c = ValInst.from_u64(pywasm.arith.u64.rem(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_and: b = self.stack.value.pop().into_i64() @@ -2127,27 +2337,27 @@ def evaluate(self) -> None: case pywasm.opcode.i64_shl: b = self.stack.value.pop().into_i64() a = self.stack.value.pop().into_i64() - c = ValInst.from_i64(a << (b % 0x40)) + c = ValInst.from_i64(pywasm.arith.i64.shl(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_shr_s: b = self.stack.value.pop().into_i64() a = self.stack.value.pop().into_i64() - c = ValInst.from_i64(a >> (b % 0x40)) + c = ValInst.from_i64(pywasm.arith.i64.shr(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_shr_u: b = self.stack.value.pop().into_u64() a = self.stack.value.pop().into_u64() - c = ValInst.from_i64(a >> (b % 0x40)) + c = ValInst.from_u64(pywasm.arith.u64.shr(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_rotl: - b = self.stack.value.pop().into_i64() + b = self.stack.value.pop().into_u64() a = self.stack.value.pop().into_u64() - c = ValInst.from_i64((((a << (b % 0x40)) & 0xffffffffffffffff) | (a >> (0x40 - (b % 0x40))))) + c = ValInst.from_u64(pywasm.arith.u64.rotl(a, b)) self.stack.value.append(c) case pywasm.opcode.i64_rotr: - b = self.stack.value.pop().into_i64() + b = self.stack.value.pop().into_u64() a = self.stack.value.pop().into_u64() - c = ValInst.from_i64(((a >> (b % 0x40)) | ((a << (0x40 - (b % 0x40))) & 0xffffffffffffffff))) + c = ValInst.from_u64(pywasm.arith.u64.rotr(a, b)) self.stack.value.append(c) case pywasm.opcode.f32_abs: a = self.stack.value.pop() @@ -2202,34 +2412,19 @@ def evaluate(self) -> None: case pywasm.opcode.f32_div: b = self.stack.value.pop().into_f32() a = self.stack.value.pop().into_f32() - match b: - case 0: - s = +1 if math.copysign(1, a) == math.copysign(1, b) else -1 - c = math.copysign(math.inf, s) - if a == 0 or math.isnan(a): - c = math.copysign(math.nan, s) - case _: - c = a / b + c = pywasm.arith.f32.div(a, b) d = ValInst.from_f32(c) self.stack.value.append(d) case pywasm.opcode.f32_min: b = self.stack.value.pop().into_f32() a = self.stack.value.pop().into_f32() - c = min(a, b) - if math.isnan(a): - c = a - if math.isnan(b): - c = b + c = pywasm.arith.f32.min(a, b) d = ValInst.from_f32(c) self.stack.value.append(d) case pywasm.opcode.f32_max: b = self.stack.value.pop().into_f32() a = self.stack.value.pop().into_f32() - c = max(a, b) - if math.isnan(a): - c = a - if math.isnan(b): - c = b + c = pywasm.arith.f32.max(a, b) d = ValInst.from_f32(c) self.stack.value.append(d) case pywasm.opcode.f32_copysign: @@ -2290,34 +2485,19 @@ def evaluate(self) -> None: case pywasm.opcode.f64_div: b = self.stack.value.pop().into_f64() a = self.stack.value.pop().into_f64() - match b: - case 0: - s = +1 if math.copysign(1, a) == math.copysign(1, b) else -1 - c = math.copysign(math.inf, s) - if a == 0 or math.isnan(a): - c = math.copysign(math.nan, s) - case _: - c = a / b + c = pywasm.arith.f64.div(a, b) d = ValInst.from_f64(c) self.stack.value.append(d) case pywasm.opcode.f64_min: b = self.stack.value.pop().into_f64() a = self.stack.value.pop().into_f64() - c = min(a, b) - if math.isnan(a): - c = a - if math.isnan(b): - c = b + c = pywasm.arith.f64.min(a, b) d = ValInst.from_f64(c) self.stack.value.append(d) case pywasm.opcode.f64_max: b = self.stack.value.pop().into_f64() a = self.stack.value.pop().into_f64() - c = max(a, b) - if math.isnan(a): - c = a - if math.isnan(b): - c = b + c = pywasm.arith.f64.max(a, b) d = ValInst.from_f64(c) self.stack.value.append(d) case pywasm.opcode.f64_copysign: @@ -2327,7 +2507,7 @@ def evaluate(self) -> None: self.stack.value.append(c) case pywasm.opcode.i32_wrap_i64: a = self.stack.value.pop().into_i64() - b = ValInst.from_i32(a) + b = ValInst.from_i32(pywasm.arith.i32.fit(a)) self.stack.value.append(b) case pywasm.opcode.i32_trunc_f32_s: a = self.stack.value.pop().into_f32() @@ -2443,34 +2623,29 @@ def evaluate(self) -> None: self.stack.value.append(b) case pywasm.opcode.i32_extend8_s: a = self.stack.value.pop().into_i32() - b = a & 0xff - c = b - ((b & 0x80) << 1) - d = ValInst.from_i32(c) - self.stack.value.append(d) + b = pywasm.arith.i8.fit(a) + c = ValInst.from_i32(b) + self.stack.value.append(c) case pywasm.opcode.i32_extend16_s: a = self.stack.value.pop().into_i32() - b = a & 0xffff - c = b - ((b & 0x8000) << 1) - d = ValInst.from_i32(c) - self.stack.value.append(d) + b = pywasm.arith.i16.fit(a) + c = ValInst.from_i32(b) + self.stack.value.append(c) case pywasm.opcode.i64_extend8_s: a = self.stack.value.pop().into_i64() - b = a & 0xff - c = b - ((b & 0x80) << 1) - d = ValInst.from_i64(c) - self.stack.value.append(d) + b = pywasm.arith.i8.fit(a) + c = ValInst.from_i64(b) + self.stack.value.append(c) case pywasm.opcode.i64_extend16_s: a = self.stack.value.pop().into_i64() - b = a & 0xffff - c = b - ((b & 0x8000) << 1) - d = ValInst.from_i64(c) - self.stack.value.append(d) + b = pywasm.arith.i16.fit(a) + c = ValInst.from_i64(b) + self.stack.value.append(c) case pywasm.opcode.i64_extend32_s: a = self.stack.value.pop().into_i64() - b = a & 0xffffffff - c = b - ((b & 0x80000000) << 1) - d = ValInst.from_i64(c) - self.stack.value.append(d) + b = pywasm.arith.i32.fit(a) + c = ValInst.from_i64(b) + self.stack.value.append(c) case pywasm.opcode.ref_null: a = ValInst.zero(ValType(instr.args[0])) self.stack.value.append(a) @@ -2488,56 +2663,56 @@ def evaluate(self) -> None: a = self.stack.value.pop().into_f32() if math.isnan(a): a = 0.0 - b = int(max(-0x80000000, min(a, +0x7fffffff))) + b = int(max(pywasm.arith.i32.min, min(a, pywasm.arith.i32.max))) c = ValInst.from_i32(b) self.stack.value.append(c) case pywasm.opcode.i32_trunc_sat_f32_u: a = self.stack.value.pop().into_f32() if math.isnan(a): a = 0.0 - b = int(max(+0x00000000, min(a, +0xffffffff))) + b = int(max(pywasm.arith.u32.min, min(a, pywasm.arith.u32.max))) c = ValInst.from_u32(b) self.stack.value.append(c) case pywasm.opcode.i32_trunc_sat_f64_s: a = self.stack.value.pop().into_f64() if math.isnan(a): a = 0.0 - b = int(max(-0x80000000, min(a, +0x7fffffff))) + b = int(max(pywasm.arith.i32.min, min(a, pywasm.arith.i32.max))) c = ValInst.from_i32(b) self.stack.value.append(c) case pywasm.opcode.i32_trunc_sat_f64_u: a = self.stack.value.pop().into_f64() if math.isnan(a): a = 0.0 - b = int(max(+0x00000000, min(a, +0xffffffff))) + b = int(max(pywasm.arith.u32.min, min(a, pywasm.arith.u32.max))) c = ValInst.from_u32(b) self.stack.value.append(c) case pywasm.opcode.i64_trunc_sat_f32_s: a = self.stack.value.pop().into_f32() if math.isnan(a): a = 0.0 - b = int(max(-0x8000000000000000, min(a, +0x7fffffffffffffff))) + b = int(max(pywasm.arith.i64.min, min(a, pywasm.arith.i64.max))) c = ValInst.from_i64(b) self.stack.value.append(c) case pywasm.opcode.i64_trunc_sat_f32_u: a = self.stack.value.pop().into_f32() if math.isnan(a): a = 0.0 - b = int(max(+0x0000000000000000, min(a, +0xffffffffffffffff))) + b = int(max(pywasm.arith.u64.min, min(a, pywasm.arith.u64.max))) c = ValInst.from_u64(b) self.stack.value.append(c) case pywasm.opcode.i64_trunc_sat_f64_s: a = self.stack.value.pop().into_f64() if math.isnan(a): a = 0.0 - b = int(max(-0x8000000000000000, min(a, +0x7fffffffffffffff))) + b = int(max(pywasm.arith.i64.min, min(a, pywasm.arith.i64.max))) c = ValInst.from_i64(b) self.stack.value.append(c) case pywasm.opcode.i64_trunc_sat_f64_u: a = self.stack.value.pop().into_f64() if math.isnan(a): a = 0.0 - b = int(max(+0x0000000000000000, min(a, +0xffffffffffffffff))) + b = int(max(pywasm.arith.u64.min, min(a, pywasm.arith.u64.max))) c = ValInst.from_u64(b) self.stack.value.append(c) case pywasm.opcode.memory_init: @@ -2616,6 +2791,1318 @@ def evaluate(self) -> None: assert d + n <= len(tabl.elem) for i in range(n): tabl.elem[d+i] = s + case pywasm.opcode.v128_load: + a = ValInst.from_v128(self.evaluate_mem_load(instr.args[1], 16)) + self.stack.value.append(a) + case pywasm.opcode.v128_load8x8_s: + a = self.evaluate_mem_load(instr.args[1], 8) + b = bytearray() + for i in range(8): + n = a[i] + n = n - ((n & 0x80) << 1) + b.extend(bytearray(n.to_bytes(2, 'little', signed=True))) + self.stack.value.append(ValInst.from_v128(b)) + case pywasm.opcode.v128_load8x8_u: + a = self.evaluate_mem_load(instr.args[1], 8) + b = bytearray() + for i in range(8): + n = a[i] + b.extend(bytearray(n.to_bytes(2, 'little'))) + self.stack.value.append(ValInst.from_v128(b)) + case pywasm.opcode.v128_load16x4_s: + a = self.evaluate_mem_load(instr.args[1], 8) + b = bytearray() + for i in range(4): + n = int.from_bytes(a[i*2:i*2+2], 'little') + n = n - ((n & 0x8000) << 1) + b.extend(bytearray(n.to_bytes(4, 'little', signed=True))) + self.stack.value.append(ValInst.from_v128(b)) + case pywasm.opcode.v128_load16x4_u: + a = self.evaluate_mem_load(instr.args[1], 8) + b = bytearray() + for i in range(4): + n = int.from_bytes(a[i*2:i*2+2], 'little') + b.extend(bytearray(n.to_bytes(4, 'little'))) + self.stack.value.append(ValInst.from_v128(b)) + case pywasm.opcode.v128_load32x2_s: + a = self.evaluate_mem_load(instr.args[1], 8) + b = bytearray() + for i in range(2): + n = int.from_bytes(a[i*4:i*4+4], 'little') + n = n - ((n & 0x80000000) << 1) + b.extend(bytearray(n.to_bytes(8, 'little', signed=True))) + self.stack.value.append(ValInst.from_v128(b)) + case pywasm.opcode.v128_load32x2_u: + a = self.evaluate_mem_load(instr.args[1], 8) + b = bytearray() + for i in range(2): + n = int.from_bytes(a[i*4:i*4+4], 'little') + b.extend(bytearray(n.to_bytes(8, 'little'))) + self.stack.value.append(ValInst.from_v128(b)) + case pywasm.opcode.v128_load8_splat: + a = self.evaluate_mem_load(instr.args[1], 1) + self.stack.value.append(ValInst.from_v128(a * 16)) + case pywasm.opcode.v128_load16_splat: + a = self.evaluate_mem_load(instr.args[1], 2) + self.stack.value.append(ValInst.from_v128(a * 8)) + case pywasm.opcode.v128_load32_splat: + a = self.evaluate_mem_load(instr.args[1], 4) + self.stack.value.append(ValInst.from_v128(a * 4)) + case pywasm.opcode.v128_load64_splat: + a = self.evaluate_mem_load(instr.args[1], 8) + self.stack.value.append(ValInst.from_v128(a * 2)) + case pywasm.opcode.v128_store: + self.evaluate_mem_save(instr.args[1], 16) + case pywasm.opcode.v128_const: + a = ValInst.from_v128(instr.args[0]) + self.stack.value.append(a) + case pywasm.opcode.i8x16_shuffle: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = a + b + d = [c[instr.args[i]] for i in range(16)] + e = ValInst.from_v128_i8(d) + self.stack.value.append(e) + case pywasm.opcode.i8x16_swizzle: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_i8() + c = [a[b[i]] if b[i] < 16 else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_splat: + a = self.stack.value.pop().into_i32() + b = ValInst.from_v128_i8([pywasm.arith.i8.fit(a)] * 16) + self.stack.value.append(b) + case pywasm.opcode.i16x8_splat: + a = self.stack.value.pop().into_i32() + b = ValInst.from_v128_i16([pywasm.arith.i16.fit(a)] * 8) + self.stack.value.append(b) + case pywasm.opcode.i32x4_splat: + a = self.stack.value.pop().into_i32() + b = ValInst.from_v128_i32([a] * 4) + self.stack.value.append(b) + case pywasm.opcode.i64x2_splat: + a = self.stack.value.pop().into_i64() + b = ValInst.from_v128_i64([a] * 2) + self.stack.value.append(b) + case pywasm.opcode.f32x4_splat: + a = self.stack.value.pop().data[0:4] + b = ValInst.from_v128(a * 4) + self.stack.value.append(b) + case pywasm.opcode.f64x2_splat: + a = self.stack.value.pop().data[0:8] + b = ValInst.from_v128(a * 2) + self.stack.value.append(b) + case pywasm.opcode.i8x16_extract_lane_s: + a = self.stack.value.pop().into_v128_i8() + b = ValInst.from_i32(a[instr.args[0]]) + self.stack.value.append(b) + case pywasm.opcode.i8x16_extract_lane_u: + a = self.stack.value.pop().into_v128_u8() + b = ValInst.from_i32(a[instr.args[0]]) + self.stack.value.append(b) + case pywasm.opcode.i8x16_replace_lane: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i8() + a[instr.args[0]] = pywasm.arith.i8.fit(b) + c = ValInst.from_v128_i8(a) + self.stack.value.append(c) + case pywasm.opcode.i16x8_extract_lane_s: + a = self.stack.value.pop().into_v128_i16() + b = ValInst.from_i32(a[instr.args[0]]) + self.stack.value.append(b) + case pywasm.opcode.i16x8_extract_lane_u: + a = self.stack.value.pop().into_v128_u16() + b = ValInst.from_i32(a[instr.args[0]]) + self.stack.value.append(b) + case pywasm.opcode.i16x8_replace_lane: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i16() + a[instr.args[0]] = pywasm.arith.i16.fit(b) + c = ValInst.from_v128_i16(a) + self.stack.value.append(c) + case pywasm.opcode.i32x4_extract_lane: + a = self.stack.value.pop().into_v128_i32() + b = ValInst.from_i32(a[instr.args[0]]) + self.stack.value.append(b) + case pywasm.opcode.i32x4_replace_lane: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i32() + a[instr.args[0]] = b + c = ValInst.from_v128_i32(a) + self.stack.value.append(c) + case pywasm.opcode.i64x2_extract_lane: + a = self.stack.value.pop().into_v128_i64() + b = ValInst.from_i64(a[instr.args[0]]) + self.stack.value.append(b) + case pywasm.opcode.i64x2_replace_lane: + b = self.stack.value.pop().into_i64() + a = self.stack.value.pop().into_v128_i64() + a[instr.args[0]] = b + c = ValInst.from_v128_i64(a) + self.stack.value.append(c) + case pywasm.opcode.f32x4_extract_lane: + a = self.stack.value.pop().into_v128() + b = instr.args[0] + c = ValInst(ValType.f32(), a[b * 4: b * 4 + 4] + bytearray(ValInst.blen - 4)) + self.stack.value.append(c) + case pywasm.opcode.f32x4_replace_lane: + b = self.stack.value.pop().into_u32() + a = self.stack.value.pop().into_v128() + c = instr.args[0] + a[c * 4: c * 4 + 4] = pywasm.arith.u32.into_bytearray(b) + d = ValInst(ValType.v128(), a) + self.stack.value.append(d) + case pywasm.opcode.f64x2_extract_lane: + a = self.stack.value.pop().into_v128() + b = instr.args[0] + c = ValInst(ValType.f64(), a[b * 8: b * 8 + 8] + bytearray(ValInst.blen - 8)) + self.stack.value.append(c) + case pywasm.opcode.f64x2_replace_lane: + b = self.stack.value.pop().into_u64() + a = self.stack.value.pop().into_v128() + c = instr.args[0] + a[c * 8: c * 8 + 8] = pywasm.arith.u64.into_bytearray(b) + d = ValInst(ValType.v128(), a) + self.stack.value.append(d) + case pywasm.opcode.i8x16_eq: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [-1 if a[i] == b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_ne: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [-1 if a[i] != b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_lt_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [-1 if a[i] < b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_lt_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [-1 if a[i] < b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_gt_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [-1 if a[i] > b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_gt_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [-1 if a[i] > b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_le_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [-1 if a[i] <= b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_le_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [-1 if a[i] <= b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_ge_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [-1 if a[i] >= b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_ge_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [-1 if a[i] >= b[i] else 0 for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_eq: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [-1 if a[i] == b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_ne: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [-1 if a[i] != b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_lt_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [-1 if a[i] < b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_lt_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [-1 if a[i] < b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_gt_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [-1 if a[i] > b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_gt_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [-1 if a[i] > b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_le_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [-1 if a[i] <= b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_le_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [-1 if a[i] <= b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_ge_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [-1 if a[i] >= b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_ge_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [-1 if a[i] >= b[i] else 0 for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_eq: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [-1 if a[i] == b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_ne: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [-1 if a[i] != b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_lt_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [-1 if a[i] < b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_lt_u: + b = self.stack.value.pop().into_v128_u32() + a = self.stack.value.pop().into_v128_u32() + c = [-1 if a[i] < b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_gt_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [-1 if a[i] > b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_gt_u: + b = self.stack.value.pop().into_v128_u32() + a = self.stack.value.pop().into_v128_u32() + c = [-1 if a[i] > b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_le_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [-1 if a[i] <= b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_le_u: + b = self.stack.value.pop().into_v128_u32() + a = self.stack.value.pop().into_v128_u32() + c = [-1 if a[i] <= b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_ge_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [-1 if a[i] >= b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_ge_u: + b = self.stack.value.pop().into_v128_u32() + a = self.stack.value.pop().into_v128_u32() + c = [-1 if a[i] >= b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_eq: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [-1 if a[i] == b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_ne: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [-1 if a[i] != b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_lt: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [-1 if a[i] < b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_gt: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [-1 if a[i] > b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_le: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [-1 if a[i] <= b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_ge: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [-1 if a[i] >= b[i] else 0 for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_eq: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [-1 if a[i] == b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_ne: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [-1 if a[i] != b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_lt: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [-1 if a[i] < b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_gt: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [-1 if a[i] > b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_le: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [-1 if a[i] <= b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_ge: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [-1 if a[i] >= b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.v128_not: + a = self.stack.value.pop().into_v128() + b = ValInst.from_v128(bytearray([pywasm.arith.u8.fit(~e) for e in a])) + self.stack.value.append(b) + case pywasm.opcode.v128_and: + b = self.stack.value.pop().into_v128() + a = self.stack.value.pop().into_v128() + c = ValInst.from_v128(bytearray([a[i] & b[i] for i in range(16)])) + self.stack.value.append(c) + case pywasm.opcode.v128_andnot: + b = self.stack.value.pop().into_v128() + a = self.stack.value.pop().into_v128() + c = ValInst.from_v128(bytearray([a[i] & ~b[i] for i in range(16)])) + self.stack.value.append(c) + case pywasm.opcode.v128_or: + b = self.stack.value.pop().into_v128() + a = self.stack.value.pop().into_v128() + c = ValInst.from_v128(bytearray([a[i] | b[i] for i in range(16)])) + self.stack.value.append(c) + case pywasm.opcode.v128_xor: + b = self.stack.value.pop().into_v128() + a = self.stack.value.pop().into_v128() + c = ValInst.from_v128(bytearray([a[i] ^ b[i] for i in range(16)])) + self.stack.value.append(c) + case pywasm.opcode.v128_bitselect: + c = self.stack.value.pop().into_v128() + b = self.stack.value.pop().into_v128() + a = self.stack.value.pop().into_v128() + d = ValInst.from_v128(bytearray([(a[i] & c[i]) | (b[i] & ~c[i]) for i in range(16)])) + self.stack.value.append(d) + case pywasm.opcode.v128_any_true: + a = self.stack.value.pop().into_v128() + b = 1 if any(e != 0 for e in a) else 0 + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.v128_load8_lane: + a = self.stack.value.pop().into_v128() + b = self.evaluate_mem_load(instr.args[1], 1) + c = instr.args[2] + a[1 * c: 1 * c + 1] = b + d = ValInst(ValType.v128(), a + bytearray(ValInst.blen - 16)) + self.stack.value.append(d) + case pywasm.opcode.v128_load16_lane: + a = self.stack.value.pop().into_v128() + b = self.evaluate_mem_load(instr.args[1], 2) + c = instr.args[2] + a[2 * c: 2 * c + 2] = b + d = ValInst(ValType.v128(), a + bytearray(ValInst.blen - 16)) + self.stack.value.append(d) + case pywasm.opcode.v128_load32_lane: + a = self.stack.value.pop().into_v128() + b = self.evaluate_mem_load(instr.args[1], 4) + c = instr.args[2] + a[4 * c: 4 * c + 4] = b + d = ValInst(ValType.v128(), a + bytearray(ValInst.blen - 16)) + self.stack.value.append(d) + case pywasm.opcode.v128_load64_lane: + a = self.stack.value.pop().into_v128() + b = self.evaluate_mem_load(instr.args[1], 8) + c = instr.args[2] + a[8 * c: 8 * c + 8] = b + d = ValInst(ValType.v128(), a + bytearray(ValInst.blen - 16)) + self.stack.value.append(d) + case pywasm.opcode.v128_store8_lane: + a = self.stack.value.pop().into_v128() + b = instr.args[2] + a[:1] = a[1 * b: 1 * b + 1] + self.stack.value.append(ValInst.from_v128(a)) + self.evaluate_mem_save(instr.args[1], 1) + case pywasm.opcode.v128_store16_lane: + a = self.stack.value.pop().into_v128() + b = instr.args[2] + a[:2] = a[2 * b: 2 * b + 2] + self.stack.value.append(ValInst.from_v128(a)) + self.evaluate_mem_save(instr.args[1], 2) + case pywasm.opcode.v128_store32_lane: + a = self.stack.value.pop().into_v128() + b = instr.args[2] + a[:4] = a[4 * b: 4 * b + 4] + self.stack.value.append(ValInst.from_v128(a)) + self.evaluate_mem_save(instr.args[1], 4) + case pywasm.opcode.v128_store64_lane: + a = self.stack.value.pop().into_v128() + b = instr.args[2] + a[:8] = a[8 * b: 8 * b + 8] + self.stack.value.append(ValInst.from_v128(a)) + self.evaluate_mem_save(instr.args[1], 8) + case pywasm.opcode.v128_load32_zero: + a = self.evaluate_mem_load(instr.args[1], 4) + b = a + bytearray(ValInst.blen - 4) + c = ValInst.from_v128(b) + self.stack.value.append(c) + case pywasm.opcode.v128_load64_zero: + a = self.evaluate_mem_load(instr.args[1], 8) + b = a + bytearray(ValInst.blen - 8) + c = ValInst.from_v128(b) + self.stack.value.append(c) + case pywasm.opcode.f32x4_demote_f64x2_zero: + a = self.stack.value.pop().into_v128_f64() + b = a + [0.0, 0.0] + c = ValInst.from_v128_f32(b) + self.stack.value.append(c) + case pywasm.opcode.f64x2_promote_low_f32x4: + a = self.stack.value.pop().into_v128_f32() + b = a[:2] + c = ValInst.from_v128_f64(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_abs: + a = self.stack.value.pop().into_v128_i8() + b = [pywasm.arith.i8.fit(abs(e)) for e in a] + c = ValInst.from_v128_i8(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_neg: + a = self.stack.value.pop().into_v128_i8() + b = [pywasm.arith.i8.fit(-e) for e in a] + c = ValInst.from_v128_i8(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_popcnt: + a = self.stack.value.pop().into_v128_u8() + b = [pywasm.arith.u8.popcnt(e) for e in a] + c = ValInst.from_v128_u8(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_all_true: + a = self.stack.value.pop().into_v128_i8() + b = 1 if all(e != 0 for e in a) else 0 + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_bitmask: + a = self.stack.value.pop().into_v128_i8() + b = 0 + for i in range(16): + if a[i] < 0: + b |= 1 << i + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_narrow_i16x8_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = a + b + d = [pywasm.arith.i8.sat(e) for e in c] + e = ValInst.from_v128_i8(d) + self.stack.value.append(e) + case pywasm.opcode.i8x16_narrow_i16x8_u: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = a + b + d = [pywasm.arith.u8.sat(e) for e in c] + e = ValInst.from_v128_u8(d) + self.stack.value.append(e) + case pywasm.opcode.f32x4_ceil: + a = self.stack.value.pop().into_v128_f32() + b = [e if math.isnan(e) or math.isinf(e) else float(math.ceil(e)) for e in a] + c = ValInst.from_v128_f32(b) + self.stack.value.append(c) + case pywasm.opcode.f32x4_floor: + a = self.stack.value.pop().into_v128_f32() + b = [e if math.isnan(e) or math.isinf(e) else float(math.floor(e)) for e in a] + c = ValInst.from_v128_f32(b) + self.stack.value.append(c) + case pywasm.opcode.f32x4_trunc: + a = self.stack.value.pop().into_v128_f32() + b = [e if math.isnan(e) or math.isinf(e) else float(math.trunc(e)) for e in a] + c = ValInst.from_v128_f32(b) + self.stack.value.append(c) + case pywasm.opcode.f32x4_nearest: + a = self.stack.value.pop().into_v128_f32() + b = [e if math.isnan(e) or math.isinf(e) else float(round(e)) for e in a] + c = ValInst.from_v128_f32(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_shl: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i8() + c = ValInst.from_v128_i8([pywasm.arith.i8.shl(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i8x16_shr_s: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i8() + c = ValInst.from_v128_i8([pywasm.arith.i8.shr(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i8x16_shr_u: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_u8() + c = ValInst.from_v128_u8([pywasm.arith.u8.shr(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i8x16_add: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [pywasm.arith.i8.add(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_add_sat_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [pywasm.arith.i8.add_sat(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_add_sat_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [pywasm.arith.u8.add_sat(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_u8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_sub: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [pywasm.arith.i8.sub(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_sub_sat_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [pywasm.arith.i8.sub_sat(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_sub_sat_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [pywasm.arith.u8.sub_sat(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_u8(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_ceil: + a = self.stack.value.pop().into_v128_f64() + b = [e if math.isnan(e) or math.isinf(e) else float(math.ceil(e)) for e in a] + c = ValInst.from_v128_f64(b) + self.stack.value.append(c) + case pywasm.opcode.f64x2_floor: + a = self.stack.value.pop().into_v128_f64() + b = [e if math.isnan(e) or math.isinf(e) else float(math.floor(e)) for e in a] + c = ValInst.from_v128_f64(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_min_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [min(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_min_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [min(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_u8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_max_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [max(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_i8(c) + self.stack.value.append(d) + case pywasm.opcode.i8x16_max_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [max(a[i], b[i]) for i in range(16)] + d = ValInst.from_v128_u8(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_trunc: + a = self.stack.value.pop().into_v128_f64() + b = [e if math.isnan(e) or math.isinf(e) else float(math.trunc(e)) for e in a] + c = ValInst.from_v128_f64(b) + self.stack.value.append(c) + case pywasm.opcode.i8x16_avgr_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [(a[i] + b[i] + 1) // 2 for i in range(16)] + d = ValInst.from_v128_u8(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_extadd_pairwise_i8x16_s: + a = self.stack.value.pop().into_v128_i8() + b = [a[i] + a[i+1] for i in range(0, 16, 2)] + c = ValInst.from_v128_i16(b) + self.stack.value.append(c) + case pywasm.opcode.i16x8_extadd_pairwise_i8x16_u: + a = self.stack.value.pop().into_v128_u8() + b = [a[i] + a[i+1] for i in range(0, 16, 2)] + c = ValInst.from_v128_u16(b) + self.stack.value.append(c) + case pywasm.opcode.i32x4_extadd_pairwise_i16x8_s: + a = self.stack.value.pop().into_v128_i16() + b = [a[i] + a[i+1] for i in range(0, 8, 2)] + c = ValInst.from_v128_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i32x4_extadd_pairwise_i16x8_u: + a = self.stack.value.pop().into_v128_u16() + b = [a[i] + a[i+1] for i in range(0, 8, 2)] + c = ValInst.from_v128_u32(b) + self.stack.value.append(c) + case pywasm.opcode.i16x8_abs: + a = self.stack.value.pop().into_v128_i16() + b = [pywasm.arith.i16.fit(abs(e)) for e in a] + c = ValInst.from_v128_i16(b) + self.stack.value.append(c) + case pywasm.opcode.i16x8_neg: + a = self.stack.value.pop().into_v128_i16() + b = [pywasm.arith.i16.fit(-e) for e in a] + c = ValInst.from_v128_i16(b) + self.stack.value.append(c) + case pywasm.opcode.i16x8_q15mulr_sat_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [pywasm.arith.i16.sat((a[i] * b[i] + 0x4000) >> 15) for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_all_true: + a = self.stack.value.pop().into_v128_i16() + b = 1 if all(e != 0 for e in a) else 0 + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i16x8_bitmask: + a = self.stack.value.pop().into_v128_i16() + b = 0 + for i in range(8): + if a[i] < 0: + b |= 1 << i + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i16x8_narrow_i32x4_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = a + b + d = [pywasm.arith.i16.sat(e) for e in c] + e = ValInst.from_v128_i16(d) + self.stack.value.append(e) + case pywasm.opcode.i16x8_narrow_i32x4_u: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = a + b + d = [pywasm.arith.u16.sat(e) for e in c] + e = ValInst.from_v128_u16(d) + self.stack.value.append(e) + case pywasm.opcode.i16x8_extend_low_i8x16_s: + a = self.stack.value.pop().into_v128_i8()[:8] + b = ValInst.from_v128_i16(a) + self.stack.value.append(b) + case pywasm.opcode.i16x8_extend_high_i8x16_s: + a = self.stack.value.pop().into_v128_i8()[8:] + b = ValInst.from_v128_i16(a) + self.stack.value.append(b) + case pywasm.opcode.i16x8_extend_low_i8x16_u: + a = self.stack.value.pop().into_v128_u8()[:8] + b = ValInst.from_v128_i16(a) + self.stack.value.append(b) + case pywasm.opcode.i16x8_extend_high_i8x16_u: + a = self.stack.value.pop().into_v128_u8()[8:] + b = ValInst.from_v128_i16(a) + self.stack.value.append(b) + case pywasm.opcode.i16x8_shl: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i16() + c = ValInst.from_v128_i16([pywasm.arith.i16.shl(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i16x8_shr_s: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i16() + c = ValInst.from_v128_i16([pywasm.arith.i16.shr(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i16x8_shr_u: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_u16() + c = ValInst.from_v128_u16([pywasm.arith.u16.shr(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i16x8_add: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [pywasm.arith.i16.add(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_add_sat_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [pywasm.arith.i16.add_sat(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_add_sat_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [pywasm.arith.u16.add_sat(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_u16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_sub: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [pywasm.arith.i16.sub(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_sub_sat_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [pywasm.arith.i16.sub_sat(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_sub_sat_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [pywasm.arith.u16.sub_sat(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_u16(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_nearest: + a = self.stack.value.pop().into_v128_f64() + b = [e if math.isnan(e) or math.isinf(e) else float(round(e)) for e in a] + c = ValInst.from_v128_f64(b) + self.stack.value.append(c) + case pywasm.opcode.i16x8_mul: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [pywasm.arith.i16.mul(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_min_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [min(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_min_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [min(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_u16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_max_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [max(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_max_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [max(a[i], b[i]) for i in range(8)] + d = ValInst.from_v128_u16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_avgr_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [(a[i] + b[i] + 1) // 2 for i in range(8)] + d = ValInst.from_v128_u16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_extmul_low_i8x16_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [a[i] * b[i] for i in range(8)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_extmul_high_i8x16_s: + b = self.stack.value.pop().into_v128_i8() + a = self.stack.value.pop().into_v128_i8() + c = [a[i] * b[i] for i in range(8, 16)] + d = ValInst.from_v128_i16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_extmul_low_i8x16_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [a[i] * b[i] for i in range(8)] + d = ValInst.from_v128_u16(c) + self.stack.value.append(d) + case pywasm.opcode.i16x8_extmul_high_i8x16_u: + b = self.stack.value.pop().into_v128_u8() + a = self.stack.value.pop().into_v128_u8() + c = [a[i] * b[i] for i in range(8, 16)] + d = ValInst.from_v128_u16(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_abs: + a = self.stack.value.pop().into_v128_i32() + b = [pywasm.arith.i32.fit(abs(e)) for e in a] + c = ValInst.from_v128_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i32x4_neg: + a = self.stack.value.pop().into_v128_i32() + b = [pywasm.arith.i32.fit(-e) for e in a] + c = ValInst.from_v128_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i32x4_all_true: + a = self.stack.value.pop().into_v128_i32() + b = 1 if all(e != 0 for e in a) else 0 + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i32x4_bitmask: + a = self.stack.value.pop().into_v128_i32() + b = 0 + for i in range(4): + if a[i] < 0: + b |= 1 << i + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i32x4_extend_low_i16x8_s: + a = self.stack.value.pop().into_v128_i16()[:4] + b = ValInst.from_v128_i32(a) + self.stack.value.append(b) + case pywasm.opcode.i32x4_extend_high_i16x8_s: + a = self.stack.value.pop().into_v128_i16()[4:] + b = ValInst.from_v128_i32(a) + self.stack.value.append(b) + case pywasm.opcode.i32x4_extend_low_i16x8_u: + a = self.stack.value.pop().into_v128_u16()[:4] + b = ValInst.from_v128_i32(a) + self.stack.value.append(b) + case pywasm.opcode.i32x4_extend_high_i16x8_u: + a = self.stack.value.pop().into_v128_u16()[4:] + b = ValInst.from_v128_i32(a) + self.stack.value.append(b) + case pywasm.opcode.i32x4_shl: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i32() + c = ValInst.from_v128_i32([pywasm.arith.i32.shl(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i32x4_shr_s: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i32() + c = ValInst.from_v128_i32([pywasm.arith.i32.shr(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i32x4_shr_u: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_u32() + c = ValInst.from_v128_u32([pywasm.arith.u32.shr(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i32x4_add: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [pywasm.arith.i32.add(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_sub: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [pywasm.arith.i32.sub(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_mul: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [pywasm.arith.i32.mul(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_min_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [min(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_min_u: + b = self.stack.value.pop().into_v128_u32() + a = self.stack.value.pop().into_v128_u32() + c = [min(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_u32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_max_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [max(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_max_u: + b = self.stack.value.pop().into_v128_u32() + a = self.stack.value.pop().into_v128_u32() + c = [max(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_u32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_dot_i16x8_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [pywasm.arith.i32.fit(a[i*2] * b[i*2] + a[i*2+1] * b[i*2+1]) for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_extmul_low_i16x8_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [a[i] * b[i] for i in range(4)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_extmul_high_i16x8_s: + b = self.stack.value.pop().into_v128_i16() + a = self.stack.value.pop().into_v128_i16() + c = [a[i] * b[i] for i in range(4, 8)] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_extmul_low_i16x8_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [a[i] * b[i] for i in range(4)] + d = ValInst.from_v128_u32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_extmul_high_i16x8_u: + b = self.stack.value.pop().into_v128_u16() + a = self.stack.value.pop().into_v128_u16() + c = [a[i] * b[i] for i in range(4, 8)] + d = ValInst.from_v128_u32(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_abs: + a = self.stack.value.pop().into_v128_i64() + b = [pywasm.arith.i64.fit(abs(e)) for e in a] + c = ValInst.from_v128_i64(b) + self.stack.value.append(c) + case pywasm.opcode.i64x2_neg: + a = self.stack.value.pop().into_v128_i64() + b = [pywasm.arith.i64.fit(-e) for e in a] + c = ValInst.from_v128_i64(b) + self.stack.value.append(c) + case pywasm.opcode.i64x2_all_true: + a = self.stack.value.pop().into_v128_i64() + b = 1 if all(e != 0 for e in a) else 0 + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i64x2_bitmask: + a = self.stack.value.pop().into_v128_i64() + b = 0 + for i in range(2): + if a[i] < 0: + b |= 1 << i + c = ValInst.from_i32(b) + self.stack.value.append(c) + case pywasm.opcode.i64x2_extend_low_i32x4_s: + a = self.stack.value.pop().into_v128_i32()[:2] + b = ValInst.from_v128_i64(a) + self.stack.value.append(b) + case pywasm.opcode.i64x2_extend_high_i32x4_s: + a = self.stack.value.pop().into_v128_i32()[2:] + b = ValInst.from_v128_i64(a) + self.stack.value.append(b) + case pywasm.opcode.i64x2_extend_low_i32x4_u: + a = self.stack.value.pop().into_v128_u32()[:2] + b = ValInst.from_v128_i64(a) + self.stack.value.append(b) + case pywasm.opcode.i64x2_extend_high_i32x4_u: + a = self.stack.value.pop().into_v128_u32()[2:] + b = ValInst.from_v128_i64(a) + self.stack.value.append(b) + case pywasm.opcode.i64x2_shl: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i64() + c = ValInst.from_v128_i64([pywasm.arith.i64.shl(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i64x2_shr_s: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_i64() + c = ValInst.from_v128_i64([pywasm.arith.i64.shr(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i64x2_shr_u: + b = self.stack.value.pop().into_i32() + a = self.stack.value.pop().into_v128_u64() + c = ValInst.from_v128_u64([pywasm.arith.u64.shr(e, b) for e in a]) + self.stack.value.append(c) + case pywasm.opcode.i64x2_add: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [pywasm.arith.i64.add(a[i], b[i]) for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_sub: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [pywasm.arith.i64.sub(a[i], b[i]) for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_mul: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [pywasm.arith.i64.mul(a[i], b[i]) for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_eq: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [-1 if a[i] == b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_ne: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [-1 if a[i] != b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_lt_s: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [-1 if a[i] < b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_gt_s: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [-1 if a[i] > b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_le_s: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [-1 if a[i] <= b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_ge_s: + b = self.stack.value.pop().into_v128_i64() + a = self.stack.value.pop().into_v128_i64() + c = [-1 if a[i] >= b[i] else 0 for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_extmul_low_i32x4_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [a[i] * b[i] for i in range(2)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_extmul_high_i32x4_s: + b = self.stack.value.pop().into_v128_i32() + a = self.stack.value.pop().into_v128_i32() + c = [a[i] * b[i] for i in range(2, 4)] + d = ValInst.from_v128_i64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_extmul_low_i32x4_u: + b = self.stack.value.pop().into_v128_u32() + a = self.stack.value.pop().into_v128_u32() + c = [a[i] * b[i] for i in range(2)] + d = ValInst.from_v128_u64(c) + self.stack.value.append(d) + case pywasm.opcode.i64x2_extmul_high_i32x4_u: + b = self.stack.value.pop().into_v128_u32() + a = self.stack.value.pop().into_v128_u32() + c = [a[i] * b[i] for i in range(2, 4)] + d = ValInst.from_v128_u64(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_abs: + a = self.stack.value.pop().into_v128_f32() + b = [abs(e) for e in a] + c = ValInst.from_v128_f32(b) + self.stack.value.append(c) + case pywasm.opcode.f32x4_neg: + a = self.stack.value.pop().into_v128_f32() + b = [-e for e in a] + c = ValInst.from_v128_f32(b) + self.stack.value.append(c) + case pywasm.opcode.f32x4_sqrt: + a = self.stack.value.pop().into_v128_f32() + b = [math.sqrt(e) if e >= 0 else math.nan for e in a] + c = ValInst.from_v128_f32(b) + self.stack.value.append(c) + case pywasm.opcode.f32x4_add: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [a[i] + b[i] for i in range(4)] + d = ValInst.from_v128_f32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_sub: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [a[i] - b[i] for i in range(4)] + d = ValInst.from_v128_f32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_mul: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [a[i] * b[i] for i in range(4)] + d = ValInst.from_v128_f32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_div: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [pywasm.arith.f32.div(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_f32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_min: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [pywasm.arith.f32.min(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_f32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_max: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [pywasm.arith.f32.max(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_f32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_pmin: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [min(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_f32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_pmax: + b = self.stack.value.pop().into_v128_f32() + a = self.stack.value.pop().into_v128_f32() + c = [max(a[i], b[i]) for i in range(4)] + d = ValInst.from_v128_f32(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_abs: + a = self.stack.value.pop().into_v128_f64() + b = [abs(e) for e in a] + c = ValInst.from_v128_f64(b) + self.stack.value.append(c) + case pywasm.opcode.f64x2_neg: + a = self.stack.value.pop().into_v128_f64() + b = [-e for e in a] + c = ValInst.from_v128_f64(b) + self.stack.value.append(c) + case pywasm.opcode.f64x2_sqrt: + a = self.stack.value.pop().into_v128_f64() + b = [math.sqrt(e) if e >= 0 else math.nan for e in a] + c = ValInst.from_v128_f64(b) + self.stack.value.append(c) + case pywasm.opcode.f64x2_add: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [a[i] + b[i] for i in range(2)] + d = ValInst.from_v128_f64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_sub: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [a[i] - b[i] for i in range(2)] + d = ValInst.from_v128_f64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_mul: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [a[i] * b[i] for i in range(2)] + d = ValInst.from_v128_f64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_div: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [pywasm.arith.f64.div(a[i], b[i]) for i in range(2)] + d = ValInst.from_v128_f64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_min: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [pywasm.arith.f64.min(a[i], b[i]) for i in range(2)] + d = ValInst.from_v128_f64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_max: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [pywasm.arith.f64.max(a[i], b[i]) for i in range(2)] + d = ValInst.from_v128_f64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_pmin: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [min(a[i], b[i]) for i in range(2)] + d = ValInst.from_v128_f64(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_pmax: + b = self.stack.value.pop().into_v128_f64() + a = self.stack.value.pop().into_v128_f64() + c = [max(a[i], b[i]) for i in range(2)] + d = ValInst.from_v128_f64(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_trunc_sat_f32x4_s: + a = self.stack.value.pop().into_v128_f32() + b = [0 if math.isnan(e) else e for e in a] + c = [int(max(pywasm.arith.i32.min, min(e, pywasm.arith.i32.max))) for e in b] + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_trunc_sat_f32x4_u: + a = self.stack.value.pop().into_v128_f32() + b = [0 if math.isnan(e) else e for e in a] + c = [int(max(pywasm.arith.u32.min, min(e, pywasm.arith.u32.max))) for e in b] + d = ValInst.from_v128_u32(c) + self.stack.value.append(d) + case pywasm.opcode.f32x4_convert_i32x4_s: + a = self.stack.value.pop().into_v128_i32() + b = ValInst.from_v128_f32([float(e) for e in a]) + self.stack.value.append(b) + case pywasm.opcode.f32x4_convert_i32x4_u: + a = self.stack.value.pop().into_v128_u32() + b = ValInst.from_v128_f32([float(e) for e in a]) + self.stack.value.append(b) + case pywasm.opcode.i32x4_trunc_sat_f64x2_s_zero: + a = self.stack.value.pop().into_v128_f64() + b = [0 if math.isnan(e) else e for e in a] + c = [int(max(pywasm.arith.i32.min, min(e, pywasm.arith.i32.max))) for e in b] + c.extend([0, 0]) + d = ValInst.from_v128_i32(c) + self.stack.value.append(d) + case pywasm.opcode.i32x4_trunc_sat_f64x2_u_zero: + a = self.stack.value.pop().into_v128_f64() + b = [0 if math.isnan(e) else e for e in a] + c = [int(max(pywasm.arith.u32.min, min(e, pywasm.arith.u32.max))) for e in b] + c.extend([0, 0]) + d = ValInst.from_v128_u32(c) + self.stack.value.append(d) + case pywasm.opcode.f64x2_convert_low_i32x4_s: + a = self.stack.value.pop().into_v128_i32()[:2] + b = ValInst.from_v128_f64([float(e) for e in a]) + self.stack.value.append(b) + case pywasm.opcode.f64x2_convert_low_i32x4_u: + a = self.stack.value.pop().into_v128_u32()[:2] + b = ValInst.from_v128_f64([float(e) for e in a]) + self.stack.value.append(b) case _: assert 0 diff --git a/pywasm/opcode.py b/pywasm/opcode.py index 29a51eb6..1c8dacd4 100644 --- a/pywasm/opcode.py +++ b/pywasm/opcode.py @@ -201,6 +201,242 @@ table_grow = 0xfc0f table_size = 0xfc10 table_fill = 0xfc11 +v128_load = 0xfd00 +v128_load8x8_s = 0xfd01 +v128_load8x8_u = 0xfd02 +v128_load16x4_s = 0xfd03 +v128_load16x4_u = 0xfd04 +v128_load32x2_s = 0xfd05 +v128_load32x2_u = 0xfd06 +v128_load8_splat = 0xfd07 +v128_load16_splat = 0xfd08 +v128_load32_splat = 0xfd09 +v128_load64_splat = 0xfd0a +v128_store = 0xfd0b +v128_const = 0xfd0c +i8x16_shuffle = 0xfd0d +i8x16_swizzle = 0xfd0e +i8x16_splat = 0xfd0f +i16x8_splat = 0xfd10 +i32x4_splat = 0xfd11 +i64x2_splat = 0xfd12 +f32x4_splat = 0xfd13 +f64x2_splat = 0xfd14 +i8x16_extract_lane_s = 0xfd15 +i8x16_extract_lane_u = 0xfd16 +i8x16_replace_lane = 0xfd17 +i16x8_extract_lane_s = 0xfd18 +i16x8_extract_lane_u = 0xfd19 +i16x8_replace_lane = 0xfd1a +i32x4_extract_lane = 0xfd1b +i32x4_replace_lane = 0xfd1c +i64x2_extract_lane = 0xfd1d +i64x2_replace_lane = 0xfd1e +f32x4_extract_lane = 0xfd1f +f32x4_replace_lane = 0xfd20 +f64x2_extract_lane = 0xfd21 +f64x2_replace_lane = 0xfd22 +i8x16_eq = 0xfd23 +i8x16_ne = 0xfd24 +i8x16_lt_s = 0xfd25 +i8x16_lt_u = 0xfd26 +i8x16_gt_s = 0xfd27 +i8x16_gt_u = 0xfd28 +i8x16_le_s = 0xfd29 +i8x16_le_u = 0xfd2a +i8x16_ge_s = 0xfd2b +i8x16_ge_u = 0xfd2c +i16x8_eq = 0xfd2d +i16x8_ne = 0xfd2e +i16x8_lt_s = 0xfd2f +i16x8_lt_u = 0xfd30 +i16x8_gt_s = 0xfd31 +i16x8_gt_u = 0xfd32 +i16x8_le_s = 0xfd33 +i16x8_le_u = 0xfd34 +i16x8_ge_s = 0xfd35 +i16x8_ge_u = 0xfd36 +i32x4_eq = 0xfd37 +i32x4_ne = 0xfd38 +i32x4_lt_s = 0xfd39 +i32x4_lt_u = 0xfd3a +i32x4_gt_s = 0xfd3b +i32x4_gt_u = 0xfd3c +i32x4_le_s = 0xfd3d +i32x4_le_u = 0xfd3e +i32x4_ge_s = 0xfd3f +i32x4_ge_u = 0xfd40 +f32x4_eq = 0xfd41 +f32x4_ne = 0xfd42 +f32x4_lt = 0xfd43 +f32x4_gt = 0xfd44 +f32x4_le = 0xfd45 +f32x4_ge = 0xfd46 +f64x2_eq = 0xfd47 +f64x2_ne = 0xfd48 +f64x2_lt = 0xfd49 +f64x2_gt = 0xfd4a +f64x2_le = 0xfd4b +f64x2_ge = 0xfd4c +v128_not = 0xfd4d +v128_and = 0xfd4e +v128_andnot = 0xfd4f +v128_or = 0xfd50 +v128_xor = 0xfd51 +v128_bitselect = 0xfd52 +v128_any_true = 0xfd53 +v128_load8_lane = 0xfd54 +v128_load16_lane = 0xfd55 +v128_load32_lane = 0xfd56 +v128_load64_lane = 0xfd57 +v128_store8_lane = 0xfd58 +v128_store16_lane = 0xfd59 +v128_store32_lane = 0xfd5a +v128_store64_lane = 0xfd5b +v128_load32_zero = 0xfd5c +v128_load64_zero = 0xfd5d +f32x4_demote_f64x2_zero = 0xfd5e +f64x2_promote_low_f32x4 = 0xfd5f +i8x16_abs = 0xfd60 +i8x16_neg = 0xfd61 +i8x16_popcnt = 0xfd62 +i8x16_all_true = 0xfd63 +i8x16_bitmask = 0xfd64 +i8x16_narrow_i16x8_s = 0xfd65 +i8x16_narrow_i16x8_u = 0xfd66 +f32x4_ceil = 0xfd67 +f32x4_floor = 0xfd68 +f32x4_trunc = 0xfd69 +f32x4_nearest = 0xfd6a +i8x16_shl = 0xfd6b +i8x16_shr_s = 0xfd6c +i8x16_shr_u = 0xfd6d +i8x16_add = 0xfd6e +i8x16_add_sat_s = 0xfd6f +i8x16_add_sat_u = 0xfd70 +i8x16_sub = 0xfd71 +i8x16_sub_sat_s = 0xfd72 +i8x16_sub_sat_u = 0xfd73 +f64x2_ceil = 0xfd74 +f64x2_floor = 0xfd75 +i8x16_min_s = 0xfd76 +i8x16_min_u = 0xfd77 +i8x16_max_s = 0xfd78 +i8x16_max_u = 0xfd79 +f64x2_trunc = 0xfd7a +i8x16_avgr_u = 0xfd7b +i16x8_extadd_pairwise_i8x16_s = 0xfd7c +i16x8_extadd_pairwise_i8x16_u = 0xfd7d +i32x4_extadd_pairwise_i16x8_s = 0xfd7e +i32x4_extadd_pairwise_i16x8_u = 0xfd7f +i16x8_abs = 0xfd8001 +i16x8_neg = 0xfd8101 +i16x8_q15mulr_sat_s = 0xfd8201 +i16x8_all_true = 0xfd8301 +i16x8_bitmask = 0xfd8401 +i16x8_narrow_i32x4_s = 0xfd8501 +i16x8_narrow_i32x4_u = 0xfd8601 +i16x8_extend_low_i8x16_s = 0xfd8701 +i16x8_extend_high_i8x16_s = 0xfd8801 +i16x8_extend_low_i8x16_u = 0xfd8901 +i16x8_extend_high_i8x16_u = 0xfd8a01 +i16x8_shl = 0xfd8b01 +i16x8_shr_s = 0xfd8c01 +i16x8_shr_u = 0xfd8d01 +i16x8_add = 0xfd8e01 +i16x8_add_sat_s = 0xfd8f01 +i16x8_add_sat_u = 0xfd9001 +i16x8_sub = 0xfd9101 +i16x8_sub_sat_s = 0xfd9201 +i16x8_sub_sat_u = 0xfd9301 +f64x2_nearest = 0xfd9401 +i16x8_mul = 0xfd9501 +i16x8_min_s = 0xfd9601 +i16x8_min_u = 0xfd9701 +i16x8_max_s = 0xfd9801 +i16x8_max_u = 0xfd9901 +i16x8_avgr_u = 0xfd9b01 +i16x8_extmul_low_i8x16_s = 0xfd9c01 +i16x8_extmul_high_i8x16_s = 0xfd9d01 +i16x8_extmul_low_i8x16_u = 0xfd9e01 +i16x8_extmul_high_i8x16_u = 0xfd9f01 +i32x4_abs = 0xfda001 +i32x4_neg = 0xfda101 +i32x4_all_true = 0xfda301 +i32x4_bitmask = 0xfda401 +i32x4_extend_low_i16x8_s = 0xfda701 +i32x4_extend_high_i16x8_s = 0xfda801 +i32x4_extend_low_i16x8_u = 0xfda901 +i32x4_extend_high_i16x8_u = 0xfdaa01 +i32x4_shl = 0xfdab01 +i32x4_shr_s = 0xfdac01 +i32x4_shr_u = 0xfdad01 +i32x4_add = 0xfdae01 +i32x4_sub = 0xfdb101 +i32x4_mul = 0xfdb501 +i32x4_min_s = 0xfdb601 +i32x4_min_u = 0xfdb701 +i32x4_max_s = 0xfdb801 +i32x4_max_u = 0xfdb901 +i32x4_dot_i16x8_s = 0xfdba01 +i32x4_extmul_low_i16x8_s = 0xfdbc01 +i32x4_extmul_high_i16x8_s = 0xfdbd01 +i32x4_extmul_low_i16x8_u = 0xfdbe01 +i32x4_extmul_high_i16x8_u = 0xfdbf01 +i64x2_abs = 0xfdc001 +i64x2_neg = 0xfdc101 +i64x2_all_true = 0xfdc301 +i64x2_bitmask = 0xfdc401 +i64x2_extend_low_i32x4_s = 0xfdc701 +i64x2_extend_high_i32x4_s = 0xfdc801 +i64x2_extend_low_i32x4_u = 0xfdc901 +i64x2_extend_high_i32x4_u = 0xfdca01 +i64x2_shl = 0xfdcb01 +i64x2_shr_s = 0xfdcc01 +i64x2_shr_u = 0xfdcd01 +i64x2_add = 0xfdce01 +i64x2_sub = 0xfdd101 +i64x2_mul = 0xfdd501 +i64x2_eq = 0xfdd601 +i64x2_ne = 0xfdd701 +i64x2_lt_s = 0xfdd801 +i64x2_gt_s = 0xfdd901 +i64x2_le_s = 0xfdda01 +i64x2_ge_s = 0xfddb01 +i64x2_extmul_low_i32x4_s = 0xfddc01 +i64x2_extmul_high_i32x4_s = 0xfddd01 +i64x2_extmul_low_i32x4_u = 0xfdde01 +i64x2_extmul_high_i32x4_u = 0xfddf01 +f32x4_abs = 0xfde001 +f32x4_neg = 0xfde101 +f32x4_sqrt = 0xfde301 +f32x4_add = 0xfde401 +f32x4_sub = 0xfde501 +f32x4_mul = 0xfde601 +f32x4_div = 0xfde701 +f32x4_min = 0xfde801 +f32x4_max = 0xfde901 +f32x4_pmin = 0xfdea01 +f32x4_pmax = 0xfdeb01 +f64x2_abs = 0xfdec01 +f64x2_neg = 0xfded01 +f64x2_sqrt = 0xfdef01 +f64x2_add = 0xfdf001 +f64x2_sub = 0xfdf101 +f64x2_mul = 0xfdf201 +f64x2_div = 0xfdf301 +f64x2_min = 0xfdf401 +f64x2_max = 0xfdf501 +f64x2_pmin = 0xfdf601 +f64x2_pmax = 0xfdf701 +i32x4_trunc_sat_f32x4_s = 0xfdf801 +i32x4_trunc_sat_f32x4_u = 0xfdf901 +f32x4_convert_i32x4_s = 0xfdfa01 +f32x4_convert_i32x4_u = 0xfdfb01 +i32x4_trunc_sat_f64x2_s_zero = 0xfdfc01 +i32x4_trunc_sat_f64x2_u_zero = 0xfdfd01 +f64x2_convert_low_i32x4_s = 0xfdfe01 +f64x2_convert_low_i32x4_u = 0xfdff01 name: typing.Dict[int, str] = {} name[unreachable] = 'unreachable' @@ -404,3 +640,239 @@ name[table_grow] = 'table.grow' name[table_size] = 'table.size' name[table_fill] = 'table.fill' +name[v128_load] = 'v128.load' +name[v128_load8x8_s] = 'v128.load8x8_s' +name[v128_load8x8_u] = 'v128.load8x8_u' +name[v128_load16x4_s] = 'v128.load16x4_s' +name[v128_load16x4_u] = 'v128.load16x4_u' +name[v128_load32x2_s] = 'v128.load32x2_s' +name[v128_load32x2_u] = 'v128.load32x2_u' +name[v128_load8_splat] = 'v128.load8_splat' +name[v128_load16_splat] = 'v128.load16_splat' +name[v128_load32_splat] = 'v128.load32_splat' +name[v128_load64_splat] = 'v128.load64_splat' +name[v128_store] = 'v128.store' +name[v128_const] = 'v128.const' +name[i8x16_shuffle] = 'i8x16.shuffle' +name[i8x16_swizzle] = 'i8x16.swizzle' +name[i8x16_splat] = 'i8x16.splat' +name[i16x8_splat] = 'i16x8.splat' +name[i32x4_splat] = 'i32x4.splat' +name[i64x2_splat] = 'i64x2.splat' +name[f32x4_splat] = 'f32x4.splat' +name[f64x2_splat] = 'f64x2.splat' +name[i8x16_extract_lane_s] = 'i8x16.extract_lane_s' +name[i8x16_extract_lane_u] = 'i8x16.extract_lane_u' +name[i8x16_replace_lane] = 'i8x16.replace_lane' +name[i16x8_extract_lane_s] = 'i16x8.extract_lane_s' +name[i16x8_extract_lane_u] = 'i16x8.extract_lane_u' +name[i16x8_replace_lane] = 'i16x8.replace_lane' +name[i32x4_extract_lane] = 'i32x4.extract_lane' +name[i32x4_replace_lane] = 'i32x4.replace_lane' +name[i64x2_extract_lane] = 'i64x2.extract_lane' +name[i64x2_replace_lane] = 'i64x2.replace_lane' +name[f32x4_extract_lane] = 'f32x4.extract_lane' +name[f32x4_replace_lane] = 'f32x4.replace_lane' +name[f64x2_extract_lane] = 'f64x2.extract_lane' +name[f64x2_replace_lane] = 'f64x2.replace_lane' +name[i8x16_eq] = 'i8x16.eq' +name[i8x16_ne] = 'i8x16.ne' +name[i8x16_lt_s] = 'i8x16.lt_s' +name[i8x16_lt_u] = 'i8x16.lt_u' +name[i8x16_gt_s] = 'i8x16.gt_s' +name[i8x16_gt_u] = 'i8x16.gt_u' +name[i8x16_le_s] = 'i8x16.le_s' +name[i8x16_le_u] = 'i8x16.le_u' +name[i8x16_ge_s] = 'i8x16.ge_s' +name[i8x16_ge_u] = 'i8x16.ge_u' +name[i16x8_eq] = 'i16x8.eq' +name[i16x8_ne] = 'i16x8.ne' +name[i16x8_lt_s] = 'i16x8.lt_s' +name[i16x8_lt_u] = 'i16x8.lt_u' +name[i16x8_gt_s] = 'i16x8.gt_s' +name[i16x8_gt_u] = 'i16x8.gt_u' +name[i16x8_le_s] = 'i16x8.le_s' +name[i16x8_le_u] = 'i16x8.le_u' +name[i16x8_ge_s] = 'i16x8.ge_s' +name[i16x8_ge_u] = 'i16x8.ge_u' +name[i32x4_eq] = 'i32x4.eq' +name[i32x4_ne] = 'i32x4.ne' +name[i32x4_lt_s] = 'i32x4.lt_s' +name[i32x4_lt_u] = 'i32x4.lt_u' +name[i32x4_gt_s] = 'i32x4.gt_s' +name[i32x4_gt_u] = 'i32x4.gt_u' +name[i32x4_le_s] = 'i32x4.le_s' +name[i32x4_le_u] = 'i32x4.le_u' +name[i32x4_ge_s] = 'i32x4.ge_s' +name[i32x4_ge_u] = 'i32x4.ge_u' +name[f32x4_eq] = 'f32x4.eq' +name[f32x4_ne] = 'f32x4.ne' +name[f32x4_lt] = 'f32x4.lt' +name[f32x4_gt] = 'f32x4.gt' +name[f32x4_le] = 'f32x4.le' +name[f32x4_ge] = 'f32x4.ge' +name[f64x2_eq] = 'f64x2.eq' +name[f64x2_ne] = 'f64x2.ne' +name[f64x2_lt] = 'f64x2.lt' +name[f64x2_gt] = 'f64x2.gt' +name[f64x2_le] = 'f64x2.le' +name[f64x2_ge] = 'f64x2.ge' +name[v128_not] = 'v128.not' +name[v128_and] = 'v128.and' +name[v128_andnot] = 'v128.andnot' +name[v128_or] = 'v128.or' +name[v128_xor] = 'v128.xor' +name[v128_bitselect] = 'v128.bitselect' +name[v128_any_true] = 'v128.any_true' +name[v128_load8_lane] = 'v128.load8_lane' +name[v128_load16_lane] = 'v128.load16_lane' +name[v128_load32_lane] = 'v128.load32_lane' +name[v128_load64_lane] = 'v128.load64_lane' +name[v128_store8_lane] = 'v128.store8_lane' +name[v128_store16_lane] = 'v128.store16_lane' +name[v128_store32_lane] = 'v128.store32_lane' +name[v128_store64_lane] = 'v128.store64_lane' +name[v128_load32_zero] = 'v128.load32_zero' +name[v128_load64_zero] = 'v128.load64_zero' +name[f32x4_demote_f64x2_zero] = 'f32x4.demote_f64x2_zero' +name[f64x2_promote_low_f32x4] = 'f64x2.promote_low_f32x4' +name[i8x16_abs] = 'i8x16.abs' +name[i8x16_neg] = 'i8x16.neg' +name[i8x16_popcnt] = 'i8x16.popcnt' +name[i8x16_all_true] = 'i8x16.all_true' +name[i8x16_bitmask] = 'i8x16.bitmask' +name[i8x16_narrow_i16x8_s] = 'i8x16.narrow_i16x8_s' +name[i8x16_narrow_i16x8_u] = 'i8x16.narrow_i16x8_u' +name[f32x4_ceil] = 'f32x4.ceil' +name[f32x4_floor] = 'f32x4.floor' +name[f32x4_trunc] = 'f32x4.trunc' +name[f32x4_nearest] = 'f32x4.nearest' +name[i8x16_shl] = 'i8x16.shl' +name[i8x16_shr_s] = 'i8x16.shr_s' +name[i8x16_shr_u] = 'i8x16.shr_u' +name[i8x16_add] = 'i8x16.add' +name[i8x16_add_sat_s] = 'i8x16.add_sat_s' +name[i8x16_add_sat_u] = 'i8x16.add_sat_u' +name[i8x16_sub] = 'i8x16.sub' +name[i8x16_sub_sat_s] = 'i8x16.sub_sat_s' +name[i8x16_sub_sat_u] = 'i8x16.sub_sat_u' +name[f64x2_ceil] = 'f64x2.ceil' +name[f64x2_floor] = 'f64x2.floor' +name[i8x16_min_s] = 'i8x16.min_s' +name[i8x16_min_u] = 'i8x16.min_u' +name[i8x16_max_s] = 'i8x16.max_s' +name[i8x16_max_u] = 'i8x16.max_u' +name[f64x2_trunc] = 'f64x2.trunc' +name[i8x16_avgr_u] = 'i8x16.avgr_u' +name[i16x8_extadd_pairwise_i8x16_s] = 'i16x8.extadd_pairwise_i8x16_s' +name[i16x8_extadd_pairwise_i8x16_u] = 'i16x8.extadd_pairwise_i8x16_u' +name[i32x4_extadd_pairwise_i16x8_s] = 'i32x4.extadd_pairwise_i16x8_s' +name[i32x4_extadd_pairwise_i16x8_u] = 'i32x4.extadd_pairwise_i16x8_u' +name[i16x8_abs] = 'i16x8.abs' +name[i16x8_neg] = 'i16x8.neg' +name[i16x8_q15mulr_sat_s] = 'i16x8.q15mulr_sat_s' +name[i16x8_all_true] = 'i16x8.all_true' +name[i16x8_bitmask] = 'i16x8.bitmask' +name[i16x8_narrow_i32x4_s] = 'i16x8.narrow_i32x4_s' +name[i16x8_narrow_i32x4_u] = 'i16x8.narrow_i32x4_u' +name[i16x8_extend_low_i8x16_s] = 'i16x8.extend_low_i8x16_s' +name[i16x8_extend_high_i8x16_s] = 'i16x8.extend_high_i8x16_s' +name[i16x8_extend_low_i8x16_u] = 'i16x8.extend_low_i8x16_u' +name[i16x8_extend_high_i8x16_u] = 'i16x8.extend_high_i8x16_u' +name[i16x8_shl] = 'i16x8.shl' +name[i16x8_shr_s] = 'i16x8.shr_s' +name[i16x8_shr_u] = 'i16x8.shr_u' +name[i16x8_add] = 'i16x8.add' +name[i16x8_add_sat_s] = 'i16x8.add_sat_s' +name[i16x8_add_sat_u] = 'i16x8.add_sat_u' +name[i16x8_sub] = 'i16x8.sub' +name[i16x8_sub_sat_s] = 'i16x8.sub_sat_s' +name[i16x8_sub_sat_u] = 'i16x8.sub_sat_u' +name[f64x2_nearest] = 'f64x2.nearest' +name[i16x8_mul] = 'i16x8.mul' +name[i16x8_min_s] = 'i16x8.min_s' +name[i16x8_min_u] = 'i16x8.min_u' +name[i16x8_max_s] = 'i16x8.max_s' +name[i16x8_max_u] = 'i16x8.max_u' +name[i16x8_avgr_u] = 'i16x8.avgr_u' +name[i16x8_extmul_low_i8x16_s] = 'i16x8.extmul_low_i8x16_s' +name[i16x8_extmul_high_i8x16_s] = 'i16x8.extmul_high_i8x16_s' +name[i16x8_extmul_low_i8x16_u] = 'i16x8.extmul_low_i8x16_u' +name[i16x8_extmul_high_i8x16_u] = 'i16x8.extmul_high_i8x16_u' +name[i32x4_abs] = 'i32x4.abs' +name[i32x4_neg] = 'i32x4.neg' +name[i32x4_all_true] = 'i32x4.all_true' +name[i32x4_bitmask] = 'i32x4.bitmask' +name[i32x4_extend_low_i16x8_s] = 'i32x4.extend_low_i16x8_s' +name[i32x4_extend_high_i16x8_s] = 'i32x4.extend_high_i16x8_s' +name[i32x4_extend_low_i16x8_u] = 'i32x4.extend_low_i16x8_u' +name[i32x4_extend_high_i16x8_u] = 'i32x4.extend_high_i16x8_u' +name[i32x4_shl] = 'i32x4.shl' +name[i32x4_shr_s] = 'i32x4.shr_s' +name[i32x4_shr_u] = 'i32x4.shr_u' +name[i32x4_add] = 'i32x4.add' +name[i32x4_sub] = 'i32x4.sub' +name[i32x4_mul] = 'i32x4.mul' +name[i32x4_min_s] = 'i32x4.min_s' +name[i32x4_min_u] = 'i32x4.min_u' +name[i32x4_max_s] = 'i32x4.max_s' +name[i32x4_max_u] = 'i32x4.max_u' +name[i32x4_dot_i16x8_s] = 'i32x4.dot_i16x8_s' +name[i32x4_extmul_low_i16x8_s] = 'i32x4.extmul_low_i16x8_s' +name[i32x4_extmul_high_i16x8_s] = 'i32x4.extmul_high_i16x8_s' +name[i32x4_extmul_low_i16x8_u] = 'i32x4.extmul_low_i16x8_u' +name[i32x4_extmul_high_i16x8_u] = 'i32x4.extmul_high_i16x8_u' +name[i64x2_abs] = 'i64x2.abs' +name[i64x2_neg] = 'i64x2.neg' +name[i64x2_all_true] = 'i64x2.all_true' +name[i64x2_bitmask] = 'i64x2.bitmask' +name[i64x2_extend_low_i32x4_s] = 'i64x2.extend_low_i32x4_s' +name[i64x2_extend_high_i32x4_s] = 'i64x2.extend_high_i32x4_s' +name[i64x2_extend_low_i32x4_u] = 'i64x2.extend_low_i32x4_u' +name[i64x2_extend_high_i32x4_u] = 'i64x2.extend_high_i32x4_u' +name[i64x2_shl] = 'i64x2.shl' +name[i64x2_shr_s] = 'i64x2.shr_s' +name[i64x2_shr_u] = 'i64x2.shr_u' +name[i64x2_add] = 'i64x2.add' +name[i64x2_sub] = 'i64x2.sub' +name[i64x2_mul] = 'i64x2.mul' +name[i64x2_eq] = 'i64x2.eq' +name[i64x2_ne] = 'i64x2.ne' +name[i64x2_lt_s] = 'i64x2.lt_s' +name[i64x2_gt_s] = 'i64x2.gt_s' +name[i64x2_le_s] = 'i64x2.le_s' +name[i64x2_ge_s] = 'i64x2.ge_s' +name[i64x2_extmul_low_i32x4_s] = 'i64x2.extmul_low_i32x4_s' +name[i64x2_extmul_high_i32x4_s] = 'i64x2.extmul_high_i32x4_s' +name[i64x2_extmul_low_i32x4_u] = 'i64x2.extmul_low_i32x4_u' +name[i64x2_extmul_high_i32x4_u] = 'i64x2.extmul_high_i32x4_u' +name[f32x4_abs] = 'f32x4.abs' +name[f32x4_neg] = 'f32x4.neg' +name[f32x4_sqrt] = 'f32x4.sqrt' +name[f32x4_add] = 'f32x4.add' +name[f32x4_sub] = 'f32x4.sub' +name[f32x4_mul] = 'f32x4.mul' +name[f32x4_div] = 'f32x4.div' +name[f32x4_min] = 'f32x4.min' +name[f32x4_max] = 'f32x4.max' +name[f32x4_pmin] = 'f32x4.pmin' +name[f32x4_pmax] = 'f32x4.pmax' +name[f64x2_abs] = 'f64x2.abs' +name[f64x2_neg] = 'f64x2.neg' +name[f64x2_sqrt] = 'f64x2.sqrt' +name[f64x2_add] = 'f64x2.add' +name[f64x2_sub] = 'f64x2.sub' +name[f64x2_mul] = 'f64x2.mul' +name[f64x2_div] = 'f64x2.div' +name[f64x2_min] = 'f64x2.min' +name[f64x2_max] = 'f64x2.max' +name[f64x2_pmin] = 'f64x2.pmin' +name[f64x2_pmax] = 'f64x2.pmax' +name[i32x4_trunc_sat_f32x4_s] = 'i32x4.trunc_sat_f32x4_s' +name[i32x4_trunc_sat_f32x4_u] = 'i32x4.trunc_sat_f32x4_u' +name[f32x4_convert_i32x4_s] = 'f32x4.convert_i32x4_s' +name[f32x4_convert_i32x4_u] = 'f32x4.convert_i32x4_u' +name[i32x4_trunc_sat_f64x2_s_zero] = 'i32x4.trunc_sat_f64x2_s_zero' +name[i32x4_trunc_sat_f64x2_u_zero] = 'i32x4.trunc_sat_f64x2_u_zero' +name[f64x2_convert_low_i32x4_s] = 'f64x2.convert_low_i32x4_s' +name[f64x2_convert_low_i32x4_u] = 'f64x2.convert_low_i32x4_u' diff --git a/script/build_spec.py b/script/build_spec.py index d3472b2f..8ac4a7cd 100644 --- a/script/build_spec.py +++ b/script/build_spec.py @@ -30,3 +30,6 @@ def cd(dst: str) -> typing.Generator[None, typing.Any, None]: with cd('res/spec/test/core'): for e in sorted(glob.glob('*.wast')): call(f'wast2json {e}') +with cd('res/spec/test/core/simd'): + for e in sorted(glob.glob('*.wast')): + call(f'wast2json {e}') diff --git a/test/spec.py b/test/spec.py index 7733c6b7..73924f65 100644 --- a/test/spec.py +++ b/test/spec.py @@ -1,21 +1,20 @@ import glob import json import math +import pathlib import pywasm -import re import typing pywasm.log.lvl = 0 -unittest_regex = 'res/spec/test/core/.*.json' -def valj(j: typing.Dict[str, str]) -> pywasm.ValInst: +def valj(j: typing.Dict) -> pywasm.ValInst: match j['type']: case 'i32': - return pywasm.ValInst.from_i32(int(j['value'])) + return pywasm.ValInst.from_i32(pywasm.arith.i32.fit(int(j['value']))) case 'i64': - return pywasm.ValInst.from_i64(int(j['value'])) + return pywasm.ValInst.from_i64(pywasm.arith.i64.fit(int(j['value']))) case 'f32': match j['value']: case 'nan:canonical': @@ -32,6 +31,24 @@ def valj(j: typing.Dict[str, str]) -> pywasm.ValInst: return pywasm.ValInst.from_f64_u64(0x7ff8000000000001) case _: return pywasm.ValInst.from_f64_u64(int(j['value'])) + case 'v128': + match j['lane_type']: + case 'i8': + return pywasm.ValInst.from_v128_i8([pywasm.arith.i8.fit(int(c)) for c in j['value']]) + case 'i16': + return pywasm.ValInst.from_v128_i16([pywasm.arith.i16.fit(int(c)) for c in j['value']]) + case 'i32': + return pywasm.ValInst.from_v128_i32([pywasm.arith.i32.fit(int(c)) for c in j['value']]) + case 'i64': + return pywasm.ValInst.from_v128_i64([pywasm.arith.i64.fit(int(c)) for c in j['value']]) + case 'f32': + data = [valj({'type': 'f32', 'value': e}).into_u32() for e in j['value']] + return pywasm.ValInst.from_v128_u32(data) + case 'f64': + data = [valj({'type': 'f64', 'value': e}).into_u64() for e in j['value']] + return pywasm.ValInst.from_v128_u64(data) + case _: + assert 0 case 'funcref': match j['value']: case 'null': @@ -63,6 +80,18 @@ def vale(a: pywasm.ValInst, b: pywasm.ValInst) -> bool: if math.isnan(a.into_f64()): return math.isnan(b.into_f64()) return math.isclose(a.into_f64(), b.into_f64(), rel_tol=1e-6) + if a.type == pywasm.core.ValType.v128(): + if a.into_v128() == b.into_v128(): + return True + x = [pywasm.core.ValInst.from_f32(e) for e in a.into_v128_f32()] + y = [pywasm.core.ValInst.from_f32(e) for e in b.into_v128_f32()] + if all([vale(x, y) for x, y in zip(x, y)]): + return True + x = [pywasm.core.ValInst.from_f64(e) for e in a.into_v128_f64()] + y = [pywasm.core.ValInst.from_f64(e) for e in b.into_v128_f64()] + if all([vale(x, y) for x, y in zip(x, y)]): + return True + return False if a.type == pywasm.core.ValType.ref_func(): return a.into_i64() == b.into_i64() if a.type == pywasm.core.ValType.ref_extern(): @@ -115,11 +144,12 @@ def host(runtime: pywasm.core.Runtime) -> typing.Dict[str, typing.Dict[str, pywa } -for name in sorted(glob.glob('res/spec/test/core/*.json')): - if not re.match(unittest_regex, name): - continue - with open(name) as f: - suit = json.load(f)['commands'] +all_test = [ + *sorted(glob.glob('res/spec/test/core/*.json')), + *sorted(glob.glob('res/spec/test/core/simd/*.json')) +] +for desc in all_test: + suit = json.loads(pathlib.Path(desc).read_text())['commands'] runtime = pywasm.Runtime() cmodule: pywasm.ModuleInst lmodule: pywasm.ModuleInst @@ -208,8 +238,9 @@ def host(runtime: pywasm.core.Runtime) -> typing.Dict[str, typing.Dict[str, pywa case _: assert 0 case 'assert_uninstantiable': + name = pathlib.Path(desc).parent.joinpath(elem['filename']) try: - lmodule = runtime.instance_from_file(f'res/spec/test/core/{elem['filename']}') + lmodule = runtime.instance_from_file(name) except: runtime.machine.stack.frame.clear() runtime.machine.stack.label.clear() @@ -217,8 +248,9 @@ def host(runtime: pywasm.core.Runtime) -> typing.Dict[str, typing.Dict[str, pywa else: assert 0 case 'assert_unlinkable': + name = pathlib.Path(desc).parent.joinpath(elem['filename']) try: - lmodule = runtime.instance_from_file(f'res/spec/test/core/{elem['filename']}') + lmodule = runtime.instance_from_file(name) except: runtime.machine.stack.frame.clear() runtime.machine.stack.label.clear() @@ -226,7 +258,8 @@ def host(runtime: pywasm.core.Runtime) -> typing.Dict[str, typing.Dict[str, pywa else: assert 0 case 'module': - lmodule = runtime.instance_from_file(f'res/spec/test/core/{elem['filename']}') + name = pathlib.Path(desc).parent.joinpath(elem['filename']) + lmodule = runtime.instance_from_file(name) if 'name' in elem: mmodule[elem['name']] = lmodule case 'register':