chore: recompile all wasm

mnater · Oct 10, 2021 · 90facef · 90facef
1 parent 451674b
commit 90facef
Show file tree

Hide file tree

Showing 276 changed files with 11,178 additions and 7,314 deletions.
diff --git a/lang/af/af.wasm b/lang/af/af.wasm
diff --git a/lang/af/src/hyphenEngine.ts b/lang/af/src/hyphenEngine.ts
@@ -5,21 +5,106 @@
  * declare function logc(arg0: i32): void;
  */
 
+/*
+ * MEMORY LAYOUT (static)
+ *
+ * #--------------------# <- Offset 0
+ * |        word        |
+ * | 64 * Uint16 = 128B |
+ * #--------------------# <- 128 (tw)
+ * |   translatedWord   |
+ * |  64 * Uint8 = 64B  |
+ * #--------------------# <- 192 (hp)
+ * |    hyphenPoints    |
+ * |  64 * Uint8 = 64B  |
+ * #--------------------# <- 256 (translateMapOffset)
+ * |    translateMap    |
+ * |         keys:      |
+ * | 256 chars * 2Bytes |
+ * |          +         |
+ * |       values:      | 1024B
+ * | 256 chars * 1Byte  |
+ * |          +         |
+ * |     collisions:    |
+ * | 64 buckets * 4Byte |
+ * #--------------------# <- 1280 (alphabetOffset)
+ * |      alphabet      |
+ * | 256 chars * 2Bytes | 512B
+ * #--------------------# <- 1792 (originalWordOffset)
+ * |    originalWord    |
+ * | 64 * Uint16 = 128B |
+ * #--------------------# <- 1920   - DATAOFFSET
+ * |      licence       |           |
+ * #--------------------#           |
+ * |      alphabet      |    (ao)   |
+ * #--------------------#           |
+ * |     STrieBits      |    (bm)   | (bm)
+ * #--------------------#           |
+ * |     STrieChars     |    (cm)   } pattern data (succinct value trie)
+ * #--------------------#           |
+ * |    hasValueBits    |    (hv)   |
+ * #--------------------#           |
+ * |    valuesBitMap    |    (vm)   |
+ * #--------------------#           |
+ * |       values       |    (va)   |
+ * #--------------------# <- dataEnd-
+ * |   alignment bytes  |
+ * #--------------------# <- heapSize
+ *
+ * USAGE:
+ * Each module created from this source is language specific.
+ * 1. Write a UTF-16 String to memory starting at index 0 (64 chars max)
+ * 2. Call hyphenate(), which returns the lenght of the hyphenated string
+ * 3. Read the hyphenated UTF-16 string from memory starting at index 0
+ *
+ * INTERNALS:
+ * Upon instantiation the module builds a translate map that maps UTF-16 chars
+ * to 8bit numbers.
+ * This limits the size of the alphabet to a theoretically maximum of
+ * 255 characters (practically the number is lower to prevent hash collisions).
+ * Hyphenation patterns are stored in and read from a static succinct trie.
+ */
+
+/*
+ * Import the offsets and left-/rightmin of the language specific data.
+ * The import file is created by the createWasmData.js script
+ */
 import {ao, bm, cm, hv, lm, rm, va, vm} from "./g";
+
+/*
+ * Export the variables essential for the user of the module:
+ * lmi: leftmin - the number of characters before the first hyphenation point
+ * rmi: rightmin - the number of characters after the last hyphenation point
+ * lct: lettercount - number of letters in the alphabet
+ */
 export const lmi: i32 = lm;
 export const rmi: i32 = rm;
 export let lct: i32 = 0;
 
+/*
+ * Define the offsets into memory
+ */
 const tw: i32 = 128;
 const hp: i32 = 192;
 const translateMapOffset:i32 = 256;
+const alphabetOffset: i32 = 1280;
 const originalWordOffset: i32 = 1792;
 
+/*
+ * Minimalistic hash function to map 16-bit to 8-bit
+ *
+ * The magic numbers are found by tools/searchHashSeeds.*
+ * with the goal of having as few collisions as possible.
+ */
 function hashCharCode(cc: i32): i32 {
-    // Hashes charCodes to [0, 256[
     return ((19441 * cc) % 19559) & 255;
 }
 
+/*
+ * Store a k/v pair in translateMap
+ * k is the utf-16 char
+ * v is it's 8-bit representation
+ */
 function pushToTranslateMap(cc: i32, id: i32): void {
     let ptr: i32 = hashCharCode(cc) << 1;
     if (load<u16>(ptr, translateMapOffset) === 0) {
@@ -40,6 +125,10 @@ function pushToTranslateMap(cc: i32, id: i32): void {
     }
 }
 
+/*
+ * Retrieve the 8-bit value for a UTF-16 char
+ * Returns 255 if the char is not in the translateMap
+ */
 function pullFromTranslateMap(cc: i32): i32 {
     let ptr: i32 = hashCharCode(cc) << 1;
     const val = load<u16>(ptr, translateMapOffset);
@@ -62,7 +151,10 @@ function pullFromTranslateMap(cc: i32): i32 {
     return load<u16>(ptr, translateMapOffset + 770);
 }
 
-
+/*
+ * Creates the translateMap for the language specific alphabet.
+ * This function is called upon instantiation of the module.
+ */
 function createTranslateMap(): void {
     let i: i32 = 0;
     let k: i32 = 1;
@@ -83,20 +175,20 @@ function createTranslateMap(): void {
         if (pullFromTranslateMap(first) !== 255) {
             // This is a substitution
             pushToTranslateMap(second, pullFromTranslateMap(first));
-            store<u16>(lct, second, 1280);
+            store<u16>(lct, second, alphabetOffset);
         } else if (secondInt === 255) {
             //  There's no such char yet in the TranslateMap
             pushToTranslateMap(first, k);
             if (second !== 0) {
                 // Set upperCase representation
                 pushToTranslateMap(second, k);
             }
-            store<u16>(lct, first, 1280);
+            store<u16>(lct, first, alphabetOffset);
             k += 1;
         } else {
             // Sigma
             pushToTranslateMap(first, k);
-            store<u16>(lct, first, 1280);
+            store<u16>(lct, first, alphabetOffset);
             k += 1;
         }
         lct += 2;
@@ -105,13 +197,25 @@ function createTranslateMap(): void {
     lct >>= 1;
 }
 
+/*
+ * Returns the bit at pos starting at startByte
+ * For our purposes the bits are numbered from left to right
+ */
 function getBitAtPos(pos: i32, startByte: i32): i32 {
     const numBytes: i32 = pos >> 3;
     // BitHack: pos % 8 === pos & (8 - 1)
     const numBits: i32 = 7 - (pos & 7);
     return (load<u8>(startByte + numBytes) >> numBits) & 1;
 }
 
+/*
+ * Computes the rank at pos starting at startByte.
+ * The rank is the number of bits set up to the given position.
+ * We first count the bits set in the 32-bit blocks,
+ * then we count the bits set until the final pos.
+ * Since byte ordering in webassembly is little-endian, but we count from
+ * left to right we need to byteswap the last number read from memory.
+ */
 function rank1(pos: i32, startByte: i32): i32 {
     // (pos / 32) << 2 === (pos >> 5) << 2
     const numBytes: i32 = (pos >> 5) << 2;
@@ -131,34 +235,14 @@ function rank1(pos: i32, startByte: i32): i32 {
     return count;
 }
 
-/*
- * Loop based search for select0 in 32 bit dWord
- *
- * function get1PosIndDWord(dWord: i32, nth: i32): i32 {
- *     let count: i32 = 0;
- *     let pos: i32 = 0;
- *     const dWordBigEnd: i32 = bswap<i32>(dWord);
- *     while (pos < 32) {
- *         const mask: i32 = 1 << (31 - pos);
- *         if ((dWordBigEnd & mask) === mask) {
- *             count += 1;
- *         }
- *         if (count === nth) {
- *             break;
- *         }
- *         pos += 1;
- *     }
- *     return pos;
- * }
- */
-
 /*
  * Select the bit position (from the most-significant bit)
  * with the given count (rank)
  * Adapted for wasm from
  * https://graphics.stanford.edu/~seander/bithacks.html#SelectPosFromMSBRank
+ * This is faster than a loop based approach but the code is some bytes bigger.
  */
-function get1PosIndDWord(dWord: i32, nth: i32): i32 {
+function get1PosInDWord(dWord: i32, nth: i32): i32 {
     const v: i32 = bswap<i32>(dWord);
     let r: i32 = nth;
     let s: i32 = 0;
@@ -207,55 +291,60 @@ function select0(ith: i32, startByte: i32, endByte: i32): i32 {
     let count: i32 = 0;
     let dWord: i32 = 0;
     let dWord0Count: i32 = 0;
+    let run: i32 = 0;
+    let posInByte: i32 = 0;
+    let pos: i32 = 0;
+    let firstPos: i32 = 0;
+    let secndPos: i32 = 0;
 
-    // Find pos of ith 0 (first 0)
-    while (count < ith) {
-        if (bytePos > endByte) {
-            return 0;
+    while (run < 2) {
+        ith += run;
+        while (count < ith) {
+            if (bytePos > endByte) {
+                return 0;
+            }
+            dWord = ~load<u32>(bytePos);
+            dWord0Count = popcnt<i32>(dWord);
+            count += dWord0Count;
+            bytePos += 4;
         }
-        dWord = ~load<u32>(bytePos);
-        dWord0Count = popcnt<i32>(dWord);
-        count += dWord0Count;
-        bytePos += 4;
-    }
-    count -= dWord0Count;
-    bytePos -= 4;
-    const firstPosInByte: i32 = get1PosIndDWord(dWord, ith - count);
-    const firstPos: i32 = ((bytePos - startByte) << 3) + firstPosInByte;
-    // Find pos of ith + 1 0 (second 0)
-    ith += 1;
-    while (count < ith) {
-        if (bytePos > endByte) {
-            return 0;
+        count -= dWord0Count;
+        bytePos -= 4;
+        posInByte = get1PosInDWord(dWord, ith - count);
+        pos = ((bytePos - startByte) << 3) + posInByte;
+        if (run === 0) {
+            firstPos = pos;
+        } else {
+            secndPos = pos;
         }
-        dWord = ~load<u32>(bytePos);
-        dWord0Count = popcnt<i32>(dWord);
-        count += dWord0Count;
-        bytePos += 4;
+        run += 1;
     }
-    count -= dWord0Count;
-    bytePos -= 4;
-    const secndPosInByte: i32 = get1PosIndDWord(dWord, ith - count);
-    const secndPos: i32 = ((bytePos - startByte) << 3) + secndPosInByte;
-
     return (firstPos << 8) + (secndPos - firstPos - 1);
 }
 
+/*
+ * Get the values from memory and copy to hp if greater than value in hp
+ *
+ * To save space the values are stored in a compact form:
+ * Values range from 0 to 11, so we only need 4bits for each value
+ * Leading zeroes are compressed to a number, trailing zeroes are left out
+ * [0,0,0,1,0,2,0,0] -> [3,1,0,2] -> [0011,0001,0000,0010] -> [49,2]
+ */
 function extractValuesToHp(valIdx: i32, length: i32, startOffset: i32): void {
     let byteIdx: i32 = valIdx >> 1;
     let currentByte: i32 = load<u8>(byteIdx, va);
     let pos: i32 = valIdx & 1;
-    let valuesWritten: i32 = 0;
+    let leadingZeros: i32 = 0;
     let newValue: i32 = 0;
     if (pos) {
         // Second (right) half of byte
-        valuesWritten = currentByte & 15;
+        leadingZeros = currentByte & 15;
     } else {
         // First (left) half of byte
-        valuesWritten = currentByte >> 4;
+        leadingZeros = currentByte >> 4;
     }
     let i: i32 = 1;
-    let addr: i32 = startOffset + valuesWritten;
+    let addr: i32 = startOffset + leadingZeros;
     while (i < length) {
         if (pos) {
             byteIdx += 1;
@@ -273,6 +362,10 @@ function extractValuesToHp(valIdx: i32, length: i32, startOffset: i32): void {
     }
 }
 
+/*
+ * Method to define character substitutions
+ * e.g. é/É -> e
+ */
 export function subst(ccl: i32, ccu: i32, replcc: i32): i32 {
     const replccInt: i32 = pullFromTranslateMap(replcc);
     lct <<= 1;
@@ -282,13 +375,23 @@ export function subst(ccl: i32, ccu: i32, replcc: i32): i32 {
             pushToTranslateMap(ccu, replccInt);
         }
         // Add to alphabet
-        store<u16>(lct, ccl, 1280);
+        store<u16>(lct, ccl, alphabetOffset);
         lct += 2;
     }
     lct >>= 1;
     return lct;
 }
 
+/*
+ * The main hyphenate function
+ * lmin: leftmin - the number of characters before the first hyphenation point
+ * rmin: rightmin - the number of characters after the last hyphenation point
+ * hc: hyphenchar – the char to insert as hyphen (usually soft hyphen \00AD)
+ *
+ * Reads the word from memory[0] until 0 termination and writes back to memory
+ * starting at adress 0.
+ * Returns the new length of the hyphenated word.
+ */
 export function hyphenate(lmin: i32, rmin: i32, hc: i32): i32 {
     let patternStartPos: i32 = 0;
     let wordLength: i32 = 0;

diff --git a/lang/af/src/mytransform.js b/lang/af/src/mytransform.js
@@ -2,53 +2,6 @@
 /* eslint-disable security/detect-non-literal-fs-filename */
 /* eslint-env node */
 
-/*
- * Memory layout (static)
- *
- * #--------------------# <- Offset 0
- * |        word        |
- * | 64 * Uint16 = 128B |
- * #--------------------# <- 128
- * |   translatedWord   |
- * |  64 * Uint8 = 64B  |
- * #--------------------# <- 192
- * |    hyphenPoints    |
- * |  64 * Uint8 = 64B  |
- * #--------------------# <- 256
- * |    translateMap    |
- * |         keys:      |
- * | 256 chars * 2Bytes |
- * |          +         |
- * |       values:      | 1024B
- * | 256 chars * 1Byte  |
- * |          +         |
- * |     collisions:    |
- * | 64 buckets * 4Byte |
- * #--------------------# <- 1280
- * |      alphabet      |
- * | 256 chars * 2Bytes | 512B
- * #--------------------# <- 1792
- * |    originalWord    |
- * | 64 * Uint16 = 128B |
- * #--------------------# <- 1920   - DATAOFFSET
- * |      licence       |           |
- * #--------------------#           |
- * |      alphabet      |           |
- * #--------------------#           |
- * |     STrieBits      |           |
- * #--------------------#           |
- * |     STrieChars     |           } pattern data (succinct value trie)
- * #--------------------#           |
- * |    hasValueBits    |           |
- * #--------------------#           |
- * |    valuesBitMap    |           |
- * #--------------------#           |
- * |       values       |           |
- * #--------------------# <- dataEnd-
- * |   alignment bytes  |
- * #--------------------# <- heapSize
- */
-
 "use strict";
 const {Transform} = require("assemblyscript/cli/transform");
 const fs = require("fs");

diff --git a/lang/as/as.wasm b/lang/as/as.wasm