Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
8303018: Unicode Emoji Properties
Reviewed-by: prr, erikj, rriggs
- Loading branch information
Showing
19 changed files
with
643 additions
and
278 deletions.
There are no files selected for viewing
93 changes: 93 additions & 0 deletions
93
make/jdk/src/classes/build/tools/generatecharacter/EmojiData.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. | ||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||
* | ||
* This code is free software; you can redistribute it and/or modify it | ||
* under the terms of the GNU General Public License version 2 only, as | ||
* published by the Free Software Foundation. Oracle designates this | ||
* particular file as subject to the "Classpath" exception as provided | ||
* by Oracle in the LICENSE file that accompanied this code. | ||
* | ||
* This code is distributed in the hope that it will be useful, but WITHOUT | ||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
* version 2 for more details (a copy is included in the LICENSE file that | ||
* accompanied this code). | ||
* | ||
* You should have received a copy of the GNU General Public License version | ||
* 2 along with this work; if not, write to the Free Software Foundation, | ||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
* | ||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||
* or visit www.oracle.com if you need additional information or have any | ||
* questions. | ||
*/ | ||
|
||
package build.tools.generatecharacter; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.StandardOpenOption; | ||
import java.util.AbstractMap; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.function.Predicate; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.IntStream; | ||
import java.util.stream.Stream; | ||
|
||
/** | ||
* A class holding emoji character properties | ||
* https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files | ||
*/ | ||
class EmojiData { | ||
// Emoji properties map | ||
private final Map<Integer, Long> emojiProps; | ||
|
||
static EmojiData readSpecFile(Path file, int plane) throws IOException { | ||
return new EmojiData(file, plane); | ||
} | ||
|
||
EmojiData(Path file, int plane) throws IOException { | ||
emojiProps = Files.readAllLines(file).stream() | ||
.map(line -> line.split("#", 2)[0]) | ||
.filter(Predicate.not(String::isBlank)) | ||
.map(line -> line.split("[ \t]*;[ \t]*", 2)) | ||
.flatMap(map -> { | ||
var range = map[0].split("\\.\\.", 2); | ||
var start = Integer.valueOf(range[0], 16); | ||
if ((start >> 16) != plane) { | ||
return Stream.empty(); | ||
} else { | ||
return range.length == 1 ? | ||
Stream.of(new AbstractMap.SimpleEntry<>(start, convertType(map[1].trim()))) : | ||
IntStream.rangeClosed(start, Integer.valueOf(range[1], 16)) | ||
.mapToObj(cp -> new AbstractMap.SimpleEntry<>(cp, convertType(map[1].trim()))); | ||
} | ||
}) | ||
.collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, | ||
AbstractMap.SimpleEntry::getValue, | ||
(v1, v2) -> v1 | v2)); | ||
} | ||
|
||
long properties(int cp) { | ||
return emojiProps.get(cp); | ||
} | ||
|
||
Set<Integer> codepoints() { | ||
return emojiProps.keySet(); | ||
} | ||
|
||
private static long convertType(String type) { | ||
return switch (type) { | ||
case "Emoji" -> GenerateCharacter.maskEmoji; | ||
case "Emoji_Presentation" -> GenerateCharacter.maskEmojiPresentation; | ||
case "Emoji_Modifier" -> GenerateCharacter.maskEmojiModifier; | ||
case "Emoji_Modifier_Base" -> GenerateCharacter.maskEmojiModifierBase; | ||
case "Emoji_Component" -> GenerateCharacter.maskEmojiComponent; | ||
case "Extended_Pictographic" -> GenerateCharacter.maskExtendedPictographic; | ||
default -> throw new InternalError("Unrecognizable Emoji type: " + type); | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
f593a6b
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Review
Issues