Skip to content

Commit

Permalink
Code Point Mapper; #529
Browse files Browse the repository at this point in the history
  • Loading branch information
the-moisrex committed Apr 7, 2024
1 parent d6c3ae4 commit 15152c0
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 14 deletions.
90 changes: 77 additions & 13 deletions webpp/uri/idna/details/generate_idna_mapping_table.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,54 @@ const splitLine = line => line.split(';').map(seg => seg.trim());
const findVersion = fileContent =>
fileContent.match(/Version: (\d+\.\d+\.\d+)/)[1];
const findDate = fileContent => fileContent.match(/Date: ([^\n\r]+)/)[1];
const parseCodePoints =
codePoints => {
let [rangeStart, rangeEnd] =
codePoints.split('..').map(codePoint => parseInt(codePoint, 16));
rangeEnd = rangeEnd || rangeStart;
return [ rangeStart, rangeEnd ];
}

const parseCodePoints = codePoints => {
let [rangeStart, rangeEnd] =
codePoints.split('..').map(codePoint => parseInt(codePoint, 16));
rangeEnd = rangeEnd || rangeStart;
return [ rangeStart, rangeEnd ];
};
const parseMappedCodePoints = codePoints =>
codePoints.split(" ").map(codePoint => parseInt(codePoint, 16));

class codePointMapper {
constructor(max) {
this.bytes = new Uint8Array(max);
this.index = 0;
}

append(start, end, isMapped = true) {
for (; start !== end; ++start) {
const arrayIndex = Math.floor(this.index / 8);
const byteIndex = this.index % 8;
const mask = (isMapped ? 0x1 : 0x0) << byteIndex;

// activating the bit:
this.bytes[arrayIndex] = this.bytes[arrayIndex] | mask;
// console.log(arrayIndex, byteIndex, mask, isMapped,
// this.bytes[arrayIndex] | mask)

++this.index;
}
}

/// Get the length of the table
get length() { return Math.ceil(this.index / 8); }

/// Get how many bits are in the whole table
get bitLength() { return this.index; }

serializeTable(appendFunc, cols = 16) {
let pos = 0;
for (; pos !== this.length;) {
appendFunc(`${this.bytes[pos]}, `);
++pos;
if (pos % cols === 0) {
appendFunc('\n');
}
}
}
}

function processCachedFile(fileContent) {
const lines = fileContent.split('\n');

Expand All @@ -78,7 +115,8 @@ function processCachedFile(fileContent) {
console.log(`Version: ${version}`);
console.log(`Creation Date: ${creationDate}`);

const table = lines.map((line, index) => {
const table = new codePointMapper(200000);
lines.forEach((line, index) => {
line = cleanComments(line)

// ignore empty lines
Expand All @@ -92,18 +130,22 @@ function processCachedFile(fileContent) {

switch (status) {
case 'disallowed_STD3_valid':
break;
case 'deviation': // https://www.unicode.org/reports/tr46/#Deviations
// Deviations are considered valid in IDNA2008 and UTS #46.
case 'valid':
table.append(rangeStart, rangeEnd, false);
break;
case 'disallowed_STD3_mapped':
table.append(rangeStart, rangeEnd, true);
break;
case 'mapped':
table.append(rangeStart, rangeEnd, true);
break;
case 'ignored':
table.append(rangeStart, rangeEnd, true);
break;
case 'disallowed':
table.append(rangeStart, rangeEnd, true);
break;
default:
console.error(`Invalid 'status' found: ${status}; line: ${line}`);
Expand All @@ -117,14 +159,17 @@ function processCachedFile(fileContent) {
return `${codePoints}`;
});

console.log(`Table Length: ${table.length}`);
console.log(`Table Bit Length: ${table.bitLength}`);

createTableFile(version, creationDate, table);

console.log('File processing complete.');
}

const createTableFile =
async (version, creationDate, table) => {
const fileContent = `
const begContent = `
/**
* Attention: Auto-generated file, don't modify.
*
Expand All @@ -141,14 +186,33 @@ const createTableFile =
#ifndef WEBPP_URI_IDNA_MAPPING_TABLE_HPP
#define WEBPP_URI_IDNA_MAPPING_TABLE_HPP
#include <array>
#include <cstdint>
namespace webpp::uri::idna::details {
static constexpr std::array<std::uint8_t, ${
table.length}> idna_mapping_table{
`;

const endContent = `
};
} // webpp::uri::idna::details
#endif // WEBPP_URI_IDNA_MAPPING_TABLE_HPP
`;

await fs.writeFile(outFilePath, fileContent);
await fs.writeFile(outFilePath, begContent);
let content = "";
table.serializeTable(line => content += line);
await fs.appendFile(outFilePath, content);
await fs.appendFile(outFilePath, endContent);

// Reformat the file
require('child_process').exec(`clang-format -i "${outFilePath}"`, err => {
if (err) {
console.error("Could not re-format the file.", err);
}
});
}

start();
16 changes: 15 additions & 1 deletion webpp/uri/idna/idna_ascii.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@
#ifndef WEBPP_URI_IDNA_ASCII_HPP
#define WEBPP_URI_IDNA_ASCII_HPP

namespace webpp::uri::idna {}
namespace webpp::uri::idna {

enum struct domain_to_ascii_status {
success = 0,
invalid_code_point,
dissallowed_code_point_found
};

/**
*
* RFC: https://www.rfc-editor.org/rfc/rfc3490.html#section-4.1
*/
static constexpr auto domain_to_ascii() {}

} // namespace webpp::uri::idna

#endif // WEBPP_URI_IDNA_ASCII_HPP

0 comments on commit 15152c0

Please sign in to comment.