Skip to content

Commit 8fbcc73

Browse files
committed
[SE-0163] Add Latin1 to Unicode codecs
1 parent 241c5d9 commit 8fbcc73

File tree

4 files changed

+147
-2
lines changed

4 files changed

+147
-2
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# This source file is part of the Swift.org open source project
44
#
5-
# Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors
5+
# Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
66
# Licensed under Apache License v2.0 with Runtime Library Exception
77
#
88
# See https://swift.org/LICENSE.txt for license information
@@ -219,6 +219,7 @@ set(SWIFTLIB_SOURCES
219219
SliceBuffer.swift
220220
SIMDVector.swift
221221
UnfoldSequence.swift
222+
UnicodeLatin1.swift
222223
VarArgs.swift
223224
Zip.swift
224225
"${SWIFT_SOURCE_DIR}/stdlib/linker-support/magic-symbols-for-install-name.c"

stdlib/public/core/GroupInfo.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"Unicode.swift",
4444
"UnicodeEncoding.swift",
4545
"UnicodeHelpers.swift",
46+
"UnicodeLatin1.swift",
4647
"UnicodeParser.swift",
4748
"UnicodeScalar.swift",
4849
"UnicodeScalarProperties.swift",
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
//===--- UnicodeLatin1.swift ----------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2020 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
14+
extension Unicode {
15+
16+
@frozen
17+
public enum Latin1 {}
18+
}
19+
20+
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
21+
extension Unicode.Latin1: Unicode.Encoding {
22+
23+
@frozen
24+
public struct Parser {
25+
26+
@inlinable
27+
public init() {}
28+
}
29+
30+
public typealias CodeUnit = UInt8
31+
public typealias EncodedScalar = CollectionOfOne<CodeUnit>
32+
public typealias ForwardParser = Parser
33+
public typealias ReverseParser = Parser
34+
35+
@inlinable
36+
public static var encodedReplacementCharacter: EncodedScalar {
37+
EncodedScalar(0x1A) // U+001A SUBSTITUTE
38+
}
39+
40+
@inlinable
41+
public static func decode(_ encodedScalar: EncodedScalar) -> Unicode.Scalar {
42+
Unicode.Scalar(encodedScalar[0])
43+
}
44+
45+
@inlinable
46+
public static func encode(_ unicodeScalar: Unicode.Scalar) -> EncodedScalar? {
47+
CodeUnit(exactly: unicodeScalar.value).map { EncodedScalar($0) }
48+
}
49+
50+
@inlinable
51+
public static func _isScalar(_: CodeUnit) -> Bool { true }
52+
}
53+
54+
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
55+
extension Unicode.Latin1.Parser: Unicode.Parser {
56+
57+
public typealias Encoding = Unicode.Latin1
58+
59+
@inlinable
60+
public mutating func parseScalar<I: IteratorProtocol>(
61+
from codeUnits: inout I
62+
) -> Unicode.ParseResult<Encoding.EncodedScalar>
63+
where I.Element == Encoding.CodeUnit {
64+
codeUnits.next().map { .valid(Encoding.EncodedScalar($0)) } ?? .emptyInput
65+
}
66+
}

test/stdlib/Unicode.swift

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
@@ -110,4 +110,81 @@ UnicodeAPIs.test("UTF-8 and UTF-16 queries") {
110110
}
111111
}
112112

113+
if #available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) {
114+
var UnicodeLatin1 = TestSuite("UnicodeLatin1")
115+
116+
UnicodeLatin1.test("Encoding") {
117+
let unicodeScalars = (UInt8.min ... UInt8.max).map { Unicode.Scalar($0) }
118+
let encodedScalars = unicodeScalars.compactMap { Unicode.Latin1.encode($0) }
119+
let decodedScalars = encodedScalars.map { Unicode.Latin1.decode($0) }
120+
expectEqualSequence(unicodeScalars, decodedScalars)
121+
expectNil(Unicode.Latin1.encode("\u{0100}"))
122+
expectNil(Unicode.Latin1.encode("\u{10FFFF}"))
123+
}
124+
125+
UnicodeLatin1.test("Parser") {
126+
let codeUnits = UInt8.min ... UInt8.max
127+
var codeUnitsIterator = codeUnits.makeIterator()
128+
var encodedScalars: [Unicode.Latin1.EncodedScalar] = []
129+
var forwardParser = Unicode.Latin1.ForwardParser()
130+
loop: while true {
131+
switch forwardParser.parseScalar(from: &codeUnitsIterator) {
132+
case .valid(let encodedScalar):
133+
encodedScalars.append(encodedScalar)
134+
case .emptyInput:
135+
expectEqualSequence(codeUnits, encodedScalars.joined())
136+
break loop
137+
case .error:
138+
expectUnreachable()
139+
break loop
140+
}
141+
}
142+
}
143+
144+
UnicodeLatin1.test("Transcode") {
145+
let codeUnitsAndText: [ClosedRange<UInt8>: String] = [
146+
UInt8(0x20) ... UInt8(0x7E):
147+
"""
148+
\u{20}!"#$%&'()*+,-./0123456789:;<=>?\
149+
@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_\
150+
`abcdefghijklmnopqrstuvwxyz{|}~
151+
""",
152+
UInt8(0xA0) ... UInt8(0xFF):
153+
"""
154+
\u{A0}¡¢£¤¥¦§¨©ª«¬\u{AD}®¯°±²³´µ¶·¸¹º»¼½¾¿\
155+
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß\
156+
àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
157+
""",
158+
]
159+
for (codeUnits, expectedText) in codeUnitsAndText {
160+
let actualText = String(decoding: codeUnits, as: Unicode.Latin1.self)
161+
expectEqual(expectedText, actualText)
162+
}
163+
for (expectedCodeUnits, text) in codeUnitsAndText {
164+
var actualCodeUnits: [UInt8] = []
165+
let hadError = transcode(
166+
text.utf8.makeIterator(),
167+
from: Unicode.UTF8.self,
168+
to: Unicode.Latin1.self,
169+
stoppingOnError: false,
170+
into: { actualCodeUnits.append($0) }
171+
)
172+
expectEqualSequence(expectedCodeUnits, actualCodeUnits)
173+
expectFalse(hadError)
174+
}
175+
do {
176+
var actualCodeUnits: [UInt8] = []
177+
let hadError = transcode(
178+
"A\u{0100}B\u{10FFFF}C".utf8.makeIterator(),
179+
from: Unicode.UTF8.self,
180+
to: Unicode.Latin1.self,
181+
stoppingOnError: false,
182+
into: { actualCodeUnits.append($0) }
183+
)
184+
expectEqualSequence([0x41, 0x1A, 0x42, 0x1A, 0x43], actualCodeUnits)
185+
expectFalse(hadError)
186+
}
187+
}
188+
}
189+
113190
runAllTests()

0 commit comments

Comments
 (0)