Skip to content

Commit

Permalink
chore: recompile all wasm
Browse files Browse the repository at this point in the history
  • Loading branch information
mnater committed Oct 10, 2021
1 parent 451674b commit 90facef
Show file tree
Hide file tree
Showing 276 changed files with 11,178 additions and 7,314 deletions.
Binary file modified lang/af/af.wasm
Binary file not shown.
221 changes: 162 additions & 59 deletions lang/af/src/hyphenEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,106 @@
* declare function logc(arg0: i32): void;
*/

/*
* MEMORY LAYOUT (static)
*
* #--------------------# <- Offset 0
* | word |
* | 64 * Uint16 = 128B |
* #--------------------# <- 128 (tw)
* | translatedWord |
* | 64 * Uint8 = 64B |
* #--------------------# <- 192 (hp)
* | hyphenPoints |
* | 64 * Uint8 = 64B |
* #--------------------# <- 256 (translateMapOffset)
* | translateMap |
* | keys: |
* | 256 chars * 2Bytes |
* | + |
* | values: | 1024B
* | 256 chars * 1Byte |
* | + |
* | collisions: |
* | 64 buckets * 4Byte |
* #--------------------# <- 1280 (alphabetOffset)
* | alphabet |
* | 256 chars * 2Bytes | 512B
* #--------------------# <- 1792 (originalWordOffset)
* | originalWord |
* | 64 * Uint16 = 128B |
* #--------------------# <- 1920 - DATAOFFSET
* | licence | |
* #--------------------# |
* | alphabet | (ao) |
* #--------------------# |
* | STrieBits | (bm) | (bm)
* #--------------------# |
* | STrieChars | (cm) } pattern data (succinct value trie)
* #--------------------# |
* | hasValueBits | (hv) |
* #--------------------# |
* | valuesBitMap | (vm) |
* #--------------------# |
* | values | (va) |
* #--------------------# <- dataEnd-
* | alignment bytes |
* #--------------------# <- heapSize
*
* USAGE:
* Each module created from this source is language specific.
* 1. Write a UTF-16 String to memory starting at index 0 (64 chars max)
* 2. Call hyphenate(), which returns the lenght of the hyphenated string
* 3. Read the hyphenated UTF-16 string from memory starting at index 0
*
* INTERNALS:
* Upon instantiation the module builds a translate map that maps UTF-16 chars
* to 8bit numbers.
* This limits the size of the alphabet to a theoretically maximum of
* 255 characters (practically the number is lower to prevent hash collisions).
* Hyphenation patterns are stored in and read from a static succinct trie.
*/

/*
* Import the offsets and left-/rightmin of the language specific data.
* The import file is created by the createWasmData.js script
*/
import {ao, bm, cm, hv, lm, rm, va, vm} from "./g";

/*
* Export the variables essential for the user of the module:
* lmi: leftmin - the number of characters before the first hyphenation point
* rmi: rightmin - the number of characters after the last hyphenation point
* lct: lettercount - number of letters in the alphabet
*/
export const lmi: i32 = lm;
export const rmi: i32 = rm;
export let lct: i32 = 0;

/*
* Define the offsets into memory
*/
const tw: i32 = 128;
const hp: i32 = 192;
const translateMapOffset:i32 = 256;
const alphabetOffset: i32 = 1280;
const originalWordOffset: i32 = 1792;

/*
* Minimalistic hash function to map 16-bit to 8-bit
*
* The magic numbers are found by tools/searchHashSeeds.*
* with the goal of having as few collisions as possible.
*/
function hashCharCode(cc: i32): i32 {
// Hashes charCodes to [0, 256[
return ((19441 * cc) % 19559) & 255;
}

/*
* Store a k/v pair in translateMap
* k is the utf-16 char
* v is it's 8-bit representation
*/
function pushToTranslateMap(cc: i32, id: i32): void {
let ptr: i32 = hashCharCode(cc) << 1;
if (load<u16>(ptr, translateMapOffset) === 0) {
Expand All @@ -40,6 +125,10 @@ function pushToTranslateMap(cc: i32, id: i32): void {
}
}

/*
* Retrieve the 8-bit value for a UTF-16 char
* Returns 255 if the char is not in the translateMap
*/
function pullFromTranslateMap(cc: i32): i32 {
let ptr: i32 = hashCharCode(cc) << 1;
const val = load<u16>(ptr, translateMapOffset);
Expand All @@ -62,7 +151,10 @@ function pullFromTranslateMap(cc: i32): i32 {
return load<u16>(ptr, translateMapOffset + 770);
}


/*
* Creates the translateMap for the language specific alphabet.
* This function is called upon instantiation of the module.
*/
function createTranslateMap(): void {
let i: i32 = 0;
let k: i32 = 1;
Expand All @@ -83,20 +175,20 @@ function createTranslateMap(): void {
if (pullFromTranslateMap(first) !== 255) {
// This is a substitution
pushToTranslateMap(second, pullFromTranslateMap(first));
store<u16>(lct, second, 1280);
store<u16>(lct, second, alphabetOffset);
} else if (secondInt === 255) {
// There's no such char yet in the TranslateMap
pushToTranslateMap(first, k);
if (second !== 0) {
// Set upperCase representation
pushToTranslateMap(second, k);
}
store<u16>(lct, first, 1280);
store<u16>(lct, first, alphabetOffset);
k += 1;
} else {
// Sigma
pushToTranslateMap(first, k);
store<u16>(lct, first, 1280);
store<u16>(lct, first, alphabetOffset);
k += 1;
}
lct += 2;
Expand All @@ -105,13 +197,25 @@ function createTranslateMap(): void {
lct >>= 1;
}

/*
* Returns the bit at pos starting at startByte
* For our purposes the bits are numbered from left to right
*/
function getBitAtPos(pos: i32, startByte: i32): i32 {
const numBytes: i32 = pos >> 3;
// BitHack: pos % 8 === pos & (8 - 1)
const numBits: i32 = 7 - (pos & 7);
return (load<u8>(startByte + numBytes) >> numBits) & 1;
}

/*
* Computes the rank at pos starting at startByte.
* The rank is the number of bits set up to the given position.
* We first count the bits set in the 32-bit blocks,
* then we count the bits set until the final pos.
* Since byte ordering in webassembly is little-endian, but we count from
* left to right we need to byteswap the last number read from memory.
*/
function rank1(pos: i32, startByte: i32): i32 {
// (pos / 32) << 2 === (pos >> 5) << 2
const numBytes: i32 = (pos >> 5) << 2;
Expand All @@ -131,34 +235,14 @@ function rank1(pos: i32, startByte: i32): i32 {
return count;
}

/*
* Loop based search for select0 in 32 bit dWord
*
* function get1PosIndDWord(dWord: i32, nth: i32): i32 {
* let count: i32 = 0;
* let pos: i32 = 0;
* const dWordBigEnd: i32 = bswap<i32>(dWord);
* while (pos < 32) {
* const mask: i32 = 1 << (31 - pos);
* if ((dWordBigEnd & mask) === mask) {
* count += 1;
* }
* if (count === nth) {
* break;
* }
* pos += 1;
* }
* return pos;
* }
*/

/*
* Select the bit position (from the most-significant bit)
* with the given count (rank)
* Adapted for wasm from
* https://graphics.stanford.edu/~seander/bithacks.html#SelectPosFromMSBRank
* This is faster than a loop based approach but the code is some bytes bigger.
*/
function get1PosIndDWord(dWord: i32, nth: i32): i32 {
function get1PosInDWord(dWord: i32, nth: i32): i32 {
const v: i32 = bswap<i32>(dWord);
let r: i32 = nth;
let s: i32 = 0;
Expand Down Expand Up @@ -207,55 +291,60 @@ function select0(ith: i32, startByte: i32, endByte: i32): i32 {
let count: i32 = 0;
let dWord: i32 = 0;
let dWord0Count: i32 = 0;
let run: i32 = 0;
let posInByte: i32 = 0;
let pos: i32 = 0;
let firstPos: i32 = 0;
let secndPos: i32 = 0;

// Find pos of ith 0 (first 0)
while (count < ith) {
if (bytePos > endByte) {
return 0;
while (run < 2) {
ith += run;
while (count < ith) {
if (bytePos > endByte) {
return 0;
}
dWord = ~load<u32>(bytePos);
dWord0Count = popcnt<i32>(dWord);
count += dWord0Count;
bytePos += 4;
}
dWord = ~load<u32>(bytePos);
dWord0Count = popcnt<i32>(dWord);
count += dWord0Count;
bytePos += 4;
}
count -= dWord0Count;
bytePos -= 4;
const firstPosInByte: i32 = get1PosIndDWord(dWord, ith - count);
const firstPos: i32 = ((bytePos - startByte) << 3) + firstPosInByte;
// Find pos of ith + 1 0 (second 0)
ith += 1;
while (count < ith) {
if (bytePos > endByte) {
return 0;
count -= dWord0Count;
bytePos -= 4;
posInByte = get1PosInDWord(dWord, ith - count);
pos = ((bytePos - startByte) << 3) + posInByte;
if (run === 0) {
firstPos = pos;
} else {
secndPos = pos;
}
dWord = ~load<u32>(bytePos);
dWord0Count = popcnt<i32>(dWord);
count += dWord0Count;
bytePos += 4;
run += 1;
}
count -= dWord0Count;
bytePos -= 4;
const secndPosInByte: i32 = get1PosIndDWord(dWord, ith - count);
const secndPos: i32 = ((bytePos - startByte) << 3) + secndPosInByte;

return (firstPos << 8) + (secndPos - firstPos - 1);
}

/*
* Get the values from memory and copy to hp if greater than value in hp
*
* To save space the values are stored in a compact form:
* Values range from 0 to 11, so we only need 4bits for each value
* Leading zeroes are compressed to a number, trailing zeroes are left out
* [0,0,0,1,0,2,0,0] -> [3,1,0,2] -> [0011,0001,0000,0010] -> [49,2]
*/
function extractValuesToHp(valIdx: i32, length: i32, startOffset: i32): void {
let byteIdx: i32 = valIdx >> 1;
let currentByte: i32 = load<u8>(byteIdx, va);
let pos: i32 = valIdx & 1;
let valuesWritten: i32 = 0;
let leadingZeros: i32 = 0;
let newValue: i32 = 0;
if (pos) {
// Second (right) half of byte
valuesWritten = currentByte & 15;
leadingZeros = currentByte & 15;
} else {
// First (left) half of byte
valuesWritten = currentByte >> 4;
leadingZeros = currentByte >> 4;
}
let i: i32 = 1;
let addr: i32 = startOffset + valuesWritten;
let addr: i32 = startOffset + leadingZeros;
while (i < length) {
if (pos) {
byteIdx += 1;
Expand All @@ -273,6 +362,10 @@ function extractValuesToHp(valIdx: i32, length: i32, startOffset: i32): void {
}
}

/*
* Method to define character substitutions
* e.g. é/É -> e
*/
export function subst(ccl: i32, ccu: i32, replcc: i32): i32 {
const replccInt: i32 = pullFromTranslateMap(replcc);
lct <<= 1;
Expand All @@ -282,13 +375,23 @@ export function subst(ccl: i32, ccu: i32, replcc: i32): i32 {
pushToTranslateMap(ccu, replccInt);
}
// Add to alphabet
store<u16>(lct, ccl, 1280);
store<u16>(lct, ccl, alphabetOffset);
lct += 2;
}
lct >>= 1;
return lct;
}

/*
* The main hyphenate function
* lmin: leftmin - the number of characters before the first hyphenation point
* rmin: rightmin - the number of characters after the last hyphenation point
* hc: hyphenchar – the char to insert as hyphen (usually soft hyphen \00AD)
*
* Reads the word from memory[0] until 0 termination and writes back to memory
* starting at adress 0.
* Returns the new length of the hyphenated word.
*/
export function hyphenate(lmin: i32, rmin: i32, hc: i32): i32 {
let patternStartPos: i32 = 0;
let wordLength: i32 = 0;
Expand Down
47 changes: 0 additions & 47 deletions lang/af/src/mytransform.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,6 @@
/* eslint-disable security/detect-non-literal-fs-filename */
/* eslint-env node */

/*
* Memory layout (static)
*
* #--------------------# <- Offset 0
* | word |
* | 64 * Uint16 = 128B |
* #--------------------# <- 128
* | translatedWord |
* | 64 * Uint8 = 64B |
* #--------------------# <- 192
* | hyphenPoints |
* | 64 * Uint8 = 64B |
* #--------------------# <- 256
* | translateMap |
* | keys: |
* | 256 chars * 2Bytes |
* | + |
* | values: | 1024B
* | 256 chars * 1Byte |
* | + |
* | collisions: |
* | 64 buckets * 4Byte |
* #--------------------# <- 1280
* | alphabet |
* | 256 chars * 2Bytes | 512B
* #--------------------# <- 1792
* | originalWord |
* | 64 * Uint16 = 128B |
* #--------------------# <- 1920 - DATAOFFSET
* | licence | |
* #--------------------# |
* | alphabet | |
* #--------------------# |
* | STrieBits | |
* #--------------------# |
* | STrieChars | } pattern data (succinct value trie)
* #--------------------# |
* | hasValueBits | |
* #--------------------# |
* | valuesBitMap | |
* #--------------------# |
* | values | |
* #--------------------# <- dataEnd-
* | alignment bytes |
* #--------------------# <- heapSize
*/

"use strict";
const {Transform} = require("assemblyscript/cli/transform");
const fs = require("fs");
Expand Down
Binary file modified lang/as/as.wasm
Binary file not shown.
Loading

0 comments on commit 90facef

Please sign in to comment.