-
Notifications
You must be signed in to change notification settings - Fork 9.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Straightforward implementation of IDNA mapping, for tests only
As described in UTS #46, https://www.unicode.org/reports/tr46 This is working towards OkHttp's own implementation of what IDN.toASCII() does on the JVM.
- Loading branch information
1 parent
afcc2df
commit 46db0d1
Showing
4 changed files
with
9,353 additions
and
0 deletions.
There are no files selected for viewing
26 changes: 26 additions & 0 deletions
26
okhttp/src/jvmTest/java/okhttp3/internal/idn/IdnaMappingTable.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/* | ||
* Copyright (C) 2023 Square, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package okhttp3.internal.idn | ||
|
||
import okio.BufferedSink | ||
|
||
interface IdnaMappingTable { | ||
|
||
/** | ||
* Returns true if the [codePoint] was applied successfully. Returns false if it was disallowed. | ||
*/ | ||
fun apply(codePoint: Int, sink: BufferedSink): Boolean | ||
} |
86 changes: 86 additions & 0 deletions
86
okhttp/src/jvmTest/java/okhttp3/internal/idn/IdnaMappingTableTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/* | ||
* Copyright (C) 2023 Square, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package okhttp3.internal.idn | ||
|
||
import assertk.assertThat | ||
import assertk.assertions.isEqualTo | ||
import assertk.assertions.isGreaterThan | ||
import okio.Buffer | ||
import okio.FileSystem | ||
import okio.Path.Companion.toPath | ||
import org.junit.jupiter.api.BeforeEach | ||
import org.junit.jupiter.api.Test | ||
|
||
class IdnaMappingTableTest { | ||
private lateinit var table: IdnaMappingTable | ||
|
||
@BeforeEach | ||
fun setUp() { | ||
table = FileSystem.RESOURCES.read("/okhttp3/internal/idna/IdnaMappingTable.txt".toPath()) { | ||
readPlainTextIdnaMappingTable() | ||
} | ||
} | ||
|
||
@Test fun regularMappings() { | ||
assertThat("hello".map()).isEqualTo("hello") | ||
assertThat("hello-world".map()).isEqualTo("hello-world") | ||
assertThat("HELLO".map()).isEqualTo("hello") | ||
assertThat("Hello".map()).isEqualTo("hello") | ||
assertThat("¼".map()).isEqualTo("1⁄4") | ||
} | ||
|
||
@Test fun deviations() { | ||
assertThat("ß".map()).isEqualTo("ss") | ||
assertThat("ς".map()).isEqualTo("σ") | ||
assertThat("\u200c".map()).isEqualTo("") | ||
assertThat("\u200d".map()).isEqualTo("") | ||
} | ||
|
||
@Test fun ignored() { | ||
assertThat("\u200b".map()).isEqualTo("") | ||
assertThat("\ufeff".map()).isEqualTo("") | ||
} | ||
|
||
@Test fun disallowed() { | ||
assertThat("\u0080".mapExpectingErrors()).isEqualTo("") | ||
} | ||
|
||
@Test fun disallowedStd3Valid() { | ||
assertThat("/".map()).isEqualTo("/") | ||
} | ||
|
||
@Test fun disallowedStd3Mapped() { | ||
assertThat("\u00b8".map()).isEqualTo("\u0020\u0327") | ||
} | ||
|
||
private fun String.map(): String { | ||
val result = Buffer() | ||
for (codePoint in codePoints()) { | ||
require(table.apply(codePoint, result)) | ||
} | ||
return result.readUtf8() | ||
} | ||
|
||
private fun String.mapExpectingErrors(): String { | ||
val result = Buffer() | ||
var errorCount = 0 | ||
for (codePoint in codePoints()) { | ||
if (!table.apply(codePoint, result)) errorCount++ | ||
} | ||
assertThat(errorCount).isGreaterThan(0) | ||
return result.readUtf8() | ||
} | ||
} |
214 changes: 214 additions & 0 deletions
214
okhttp/src/jvmTest/java/okhttp3/internal/idn/PlainTextIdnaMappingTable.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
/* | ||
* Copyright (C) 2023 Square, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package okhttp3.internal.idn | ||
|
||
import okio.Buffer | ||
import okio.BufferedSink | ||
import okio.BufferedSource | ||
import okio.ByteString | ||
import okio.ByteString.Companion.encodeUtf8 | ||
import okio.IOException | ||
import okio.Options | ||
|
||
/** | ||
* A decoded [mapping table] that can perform the [mapping step] of IDNA processing. | ||
* | ||
* This implementation is optimized for readability over efficiency. | ||
* | ||
* [mapping table]: https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table | ||
* [mapping step]: https://www.unicode.org/reports/tr46/#ProcessingStepMap | ||
*/ | ||
class PlainTextIdnaMappingTable internal constructor( | ||
private val mappings: List<Mapping>, | ||
) : IdnaMappingTable { | ||
override fun apply(codePoint: Int, sink: BufferedSink): Boolean { | ||
val index = mappings.binarySearch { | ||
when { | ||
it.sourceCodePoint1 < codePoint -> -1 | ||
it.sourceCodePoint0 > codePoint -> 1 | ||
else -> 0 | ||
} | ||
} | ||
|
||
val mapping = mappings[index] | ||
var result = true | ||
|
||
when (mapping.type) { | ||
TYPE_IGNORED -> Unit | ||
TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> { | ||
sink.write(mapping.mappedTo) | ||
} | ||
TYPE_DISALLOWED_STD3_VALID, TYPE_VALID -> { | ||
sink.writeUtf8CodePoint(codePoint) | ||
} | ||
TYPE_DISALLOWED -> result = false | ||
} | ||
|
||
return result | ||
} | ||
} | ||
|
||
|
||
private val optionsDelimeter = Options.of( | ||
".".encodeUtf8(), // 0. | ||
" ".encodeUtf8(), // 1. | ||
";".encodeUtf8(), // 2. | ||
"#".encodeUtf8(), // 3. | ||
"\n".encodeUtf8(), // 4. | ||
) | ||
|
||
private val optionsDot = Options.of( | ||
".".encodeUtf8(), // 0. | ||
) | ||
|
||
private const val DELIMITER_DOT = 0 | ||
private const val DELIMITER_SPACE = 1 | ||
private const val DELIMITER_SEMICOLON = 2 | ||
private const val DELIMITER_HASH = 3 | ||
private const val DELIMITER_NEWLINE = 4 | ||
|
||
private val optionsType = Options.of( | ||
"deviation ".encodeUtf8(), // 0. | ||
"disallowed ".encodeUtf8(), // 1. | ||
"disallowed_STD3_mapped ".encodeUtf8(), // 2. | ||
"disallowed_STD3_valid ".encodeUtf8(), // 3. | ||
"ignored ".encodeUtf8(), // 4. | ||
"mapped ".encodeUtf8(), // 5. | ||
"valid ".encodeUtf8(), // 6. | ||
) | ||
|
||
private const val TYPE_DEVIATION = 0 | ||
private const val TYPE_DISALLOWED = 1 | ||
private const val TYPE_DISALLOWED_STD3_MAPPED = 2 | ||
private const val TYPE_DISALLOWED_STD3_VALID = 3 | ||
private const val TYPE_IGNORED = 4 | ||
private const val TYPE_MAPPED = 5 | ||
private const val TYPE_VALID = 6 | ||
|
||
private fun BufferedSource.skipWhitespace() { | ||
while (!exhausted()) { | ||
if (buffer[0] != ' '.code.toByte()) return | ||
skip(1L) | ||
} | ||
} | ||
|
||
private fun BufferedSource.skipRestOfLine() { | ||
when (val newline = indexOf('\n'.code.toByte())) { | ||
-1L -> skip(buffer.size) // Exhaust this source. | ||
else -> skip(newline + 1) | ||
} | ||
} | ||
|
||
/** | ||
* Reads lines from `IdnaMappingTable.txt`. | ||
* | ||
* Comment lines are either blank or start with a `#` character. Lines may also end with a comment. | ||
* All comments are ignored. | ||
* | ||
* Regular lines contain fields separated by semicolons. | ||
* | ||
* The first element on each line is a single hex code point (like 0041) or a hex code point range | ||
* (like 0030..0039). | ||
* | ||
* The second element on each line is a mapping type, like `valid` or `mapped`. | ||
* | ||
* For lines that contain a mapping target, the next thing is a sequence of hex code points (like | ||
* 0031 2044 0034). | ||
* | ||
* All other data is ignored. | ||
*/ | ||
fun BufferedSource.readPlainTextIdnaMappingTable(): PlainTextIdnaMappingTable { | ||
val mappedTo = Buffer() | ||
val result = mutableListOf<Mapping>() | ||
|
||
while (!exhausted()) { | ||
// Skip comment and empty lines. | ||
when (select(optionsDelimeter)) { | ||
DELIMITER_HASH -> { | ||
skipRestOfLine() | ||
continue | ||
} | ||
DELIMITER_NEWLINE -> { | ||
continue | ||
} | ||
DELIMITER_DOT, DELIMITER_SPACE, DELIMITER_SEMICOLON -> { | ||
throw IOException("unexpected delimiter") | ||
} | ||
} | ||
|
||
// "002F" or "0000..002C" | ||
val sourceCodePoint0 = readHexadecimalUnsignedLong() | ||
val sourceCodePoint1 = when (select(optionsDot)) { | ||
DELIMITER_DOT -> { | ||
if (readByte() != '.'.code.toByte()) throw IOException("expected '..'") | ||
readHexadecimalUnsignedLong() | ||
} | ||
else -> sourceCodePoint0 | ||
} | ||
|
||
skipWhitespace() | ||
if (readByte() != ';'.code.toByte()) throw IOException("expected ';'") | ||
|
||
// "valid" or "mapped" | ||
skipWhitespace() | ||
val type = select(optionsType) | ||
|
||
when (type) { | ||
TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> { | ||
skipWhitespace() | ||
if (readByte() != ';'.code.toByte()) throw IOException("expected ';'") | ||
|
||
// Like "0061" or "0031 2044 0034". | ||
while (true) { | ||
skipWhitespace() | ||
|
||
when (select(optionsDelimeter)) { | ||
DELIMITER_HASH -> { | ||
break | ||
} | ||
DELIMITER_DOT, DELIMITER_SEMICOLON, DELIMITER_NEWLINE -> { | ||
throw IOException("unexpected delimiter") | ||
} | ||
} | ||
|
||
mappedTo.writeUtf8CodePoint(readHexadecimalUnsignedLong().toInt()) | ||
} | ||
} | ||
|
||
TYPE_DISALLOWED, TYPE_DISALLOWED_STD3_VALID, TYPE_IGNORED, TYPE_VALID -> Unit | ||
|
||
else -> throw IOException("unexpected type") | ||
} | ||
|
||
skipRestOfLine() | ||
|
||
result += Mapping( | ||
sourceCodePoint0.toInt(), | ||
sourceCodePoint1.toInt(), | ||
type, | ||
mappedTo.readByteString(), | ||
) | ||
} | ||
|
||
return PlainTextIdnaMappingTable(result) | ||
} | ||
|
||
internal data class Mapping( | ||
val sourceCodePoint0: Int, | ||
val sourceCodePoint1: Int, | ||
val type: Int, | ||
val mappedTo: ByteString, | ||
) |
Oops, something went wrong.