Skip to content

Commit

Permalink
Straightforward implementation of IDNA mapping, for tests only
Browse files Browse the repository at this point in the history
As described in UTS #46, https://www.unicode.org/reports/tr46

This is working towards OkHttp's own implementation of what
IDN.toASCII() does on the JVM.
  • Loading branch information
swankjesse committed Apr 23, 2023
1 parent afcc2df commit 46db0d1
Show file tree
Hide file tree
Showing 4 changed files with 9,353 additions and 0 deletions.
26 changes: 26 additions & 0 deletions okhttp/src/jvmTest/java/okhttp3/internal/idn/IdnaMappingTable.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (C) 2023 Square, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.idn

import okio.BufferedSink

interface IdnaMappingTable {

/**
* Returns true if the [codePoint] was applied successfully. Returns false if it was disallowed.
*/
fun apply(codePoint: Int, sink: BufferedSink): Boolean
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright (C) 2023 Square, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.idn

import assertk.assertThat
import assertk.assertions.isEqualTo
import assertk.assertions.isGreaterThan
import okio.Buffer
import okio.FileSystem
import okio.Path.Companion.toPath
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test

class IdnaMappingTableTest {
private lateinit var table: IdnaMappingTable

@BeforeEach
fun setUp() {
table = FileSystem.RESOURCES.read("/okhttp3/internal/idna/IdnaMappingTable.txt".toPath()) {
readPlainTextIdnaMappingTable()
}
}

@Test fun regularMappings() {
assertThat("hello".map()).isEqualTo("hello")
assertThat("hello-world".map()).isEqualTo("hello-world")
assertThat("HELLO".map()).isEqualTo("hello")
assertThat("Hello".map()).isEqualTo("hello")
assertThat("¼".map()).isEqualTo("1⁄4")
}

@Test fun deviations() {
assertThat("ß".map()).isEqualTo("ss")
assertThat("ς".map()).isEqualTo("σ")
assertThat("\u200c".map()).isEqualTo("")
assertThat("\u200d".map()).isEqualTo("")
}

@Test fun ignored() {
assertThat("\u200b".map()).isEqualTo("")
assertThat("\ufeff".map()).isEqualTo("")
}

@Test fun disallowed() {
assertThat("\u0080".mapExpectingErrors()).isEqualTo("")
}

@Test fun disallowedStd3Valid() {
assertThat("/".map()).isEqualTo("/")
}

@Test fun disallowedStd3Mapped() {
assertThat("\u00b8".map()).isEqualTo("\u0020\u0327")
}

private fun String.map(): String {
val result = Buffer()
for (codePoint in codePoints()) {
require(table.apply(codePoint, result))
}
return result.readUtf8()
}

private fun String.mapExpectingErrors(): String {
val result = Buffer()
var errorCount = 0
for (codePoint in codePoints()) {
if (!table.apply(codePoint, result)) errorCount++
}
assertThat(errorCount).isGreaterThan(0)
return result.readUtf8()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
/*
* Copyright (C) 2023 Square, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.idn

import okio.Buffer
import okio.BufferedSink
import okio.BufferedSource
import okio.ByteString
import okio.ByteString.Companion.encodeUtf8
import okio.IOException
import okio.Options

/**
* A decoded [mapping table] that can perform the [mapping step] of IDNA processing.
*
* This implementation is optimized for readability over efficiency.
*
* [mapping table]: https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
* [mapping step]: https://www.unicode.org/reports/tr46/#ProcessingStepMap
*/
class PlainTextIdnaMappingTable internal constructor(
private val mappings: List<Mapping>,
) : IdnaMappingTable {
override fun apply(codePoint: Int, sink: BufferedSink): Boolean {
val index = mappings.binarySearch {
when {
it.sourceCodePoint1 < codePoint -> -1
it.sourceCodePoint0 > codePoint -> 1
else -> 0
}
}

val mapping = mappings[index]
var result = true

when (mapping.type) {
TYPE_IGNORED -> Unit
TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> {
sink.write(mapping.mappedTo)
}
TYPE_DISALLOWED_STD3_VALID, TYPE_VALID -> {
sink.writeUtf8CodePoint(codePoint)
}
TYPE_DISALLOWED -> result = false
}

return result
}
}


private val optionsDelimeter = Options.of(
".".encodeUtf8(), // 0.
" ".encodeUtf8(), // 1.
";".encodeUtf8(), // 2.
"#".encodeUtf8(), // 3.
"\n".encodeUtf8(), // 4.
)

private val optionsDot = Options.of(
".".encodeUtf8(), // 0.
)

private const val DELIMITER_DOT = 0
private const val DELIMITER_SPACE = 1
private const val DELIMITER_SEMICOLON = 2
private const val DELIMITER_HASH = 3
private const val DELIMITER_NEWLINE = 4

private val optionsType = Options.of(
"deviation ".encodeUtf8(), // 0.
"disallowed ".encodeUtf8(), // 1.
"disallowed_STD3_mapped ".encodeUtf8(), // 2.
"disallowed_STD3_valid ".encodeUtf8(), // 3.
"ignored ".encodeUtf8(), // 4.
"mapped ".encodeUtf8(), // 5.
"valid ".encodeUtf8(), // 6.
)

private const val TYPE_DEVIATION = 0
private const val TYPE_DISALLOWED = 1
private const val TYPE_DISALLOWED_STD3_MAPPED = 2
private const val TYPE_DISALLOWED_STD3_VALID = 3
private const val TYPE_IGNORED = 4
private const val TYPE_MAPPED = 5
private const val TYPE_VALID = 6

private fun BufferedSource.skipWhitespace() {
while (!exhausted()) {
if (buffer[0] != ' '.code.toByte()) return
skip(1L)
}
}

private fun BufferedSource.skipRestOfLine() {
when (val newline = indexOf('\n'.code.toByte())) {
-1L -> skip(buffer.size) // Exhaust this source.
else -> skip(newline + 1)
}
}

/**
* Reads lines from `IdnaMappingTable.txt`.
*
* Comment lines are either blank or start with a `#` character. Lines may also end with a comment.
* All comments are ignored.
*
* Regular lines contain fields separated by semicolons.
*
* The first element on each line is a single hex code point (like 0041) or a hex code point range
* (like 0030..0039).
*
* The second element on each line is a mapping type, like `valid` or `mapped`.
*
* For lines that contain a mapping target, the next thing is a sequence of hex code points (like
* 0031 2044 0034).
*
* All other data is ignored.
*/
fun BufferedSource.readPlainTextIdnaMappingTable(): PlainTextIdnaMappingTable {
val mappedTo = Buffer()
val result = mutableListOf<Mapping>()

while (!exhausted()) {
// Skip comment and empty lines.
when (select(optionsDelimeter)) {
DELIMITER_HASH -> {
skipRestOfLine()
continue
}
DELIMITER_NEWLINE -> {
continue
}
DELIMITER_DOT, DELIMITER_SPACE, DELIMITER_SEMICOLON -> {
throw IOException("unexpected delimiter")
}
}

// "002F" or "0000..002C"
val sourceCodePoint0 = readHexadecimalUnsignedLong()
val sourceCodePoint1 = when (select(optionsDot)) {
DELIMITER_DOT -> {
if (readByte() != '.'.code.toByte()) throw IOException("expected '..'")
readHexadecimalUnsignedLong()
}
else -> sourceCodePoint0
}

skipWhitespace()
if (readByte() != ';'.code.toByte()) throw IOException("expected ';'")

// "valid" or "mapped"
skipWhitespace()
val type = select(optionsType)

when (type) {
TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> {
skipWhitespace()
if (readByte() != ';'.code.toByte()) throw IOException("expected ';'")

// Like "0061" or "0031 2044 0034".
while (true) {
skipWhitespace()

when (select(optionsDelimeter)) {
DELIMITER_HASH -> {
break
}
DELIMITER_DOT, DELIMITER_SEMICOLON, DELIMITER_NEWLINE -> {
throw IOException("unexpected delimiter")
}
}

mappedTo.writeUtf8CodePoint(readHexadecimalUnsignedLong().toInt())
}
}

TYPE_DISALLOWED, TYPE_DISALLOWED_STD3_VALID, TYPE_IGNORED, TYPE_VALID -> Unit

else -> throw IOException("unexpected type")
}

skipRestOfLine()

result += Mapping(
sourceCodePoint0.toInt(),
sourceCodePoint1.toInt(),
type,
mappedTo.readByteString(),
)
}

return PlainTextIdnaMappingTable(result)
}

internal data class Mapping(
val sourceCodePoint0: Int,
val sourceCodePoint1: Int,
val type: Int,
val mappedTo: ByteString,
)
Loading

0 comments on commit 46db0d1

Please sign in to comment.