From c437d68a8737ba7ea0a8c3babe1c322cbdba114d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?TingI=C4=81u=20=22Ting=22=20K=C3=AC?= <51072200+akechetaki10736@users.noreply.github.com> Date: Mon, 21 Nov 2022 10:47:06 +0800 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20=E6=96=B0=E6=89=8B?= =?UTF-8?q?=E5=98=97=E8=A9=A6=E9=87=8D=E6=A7=8B=20(#7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Modify some functions make it to be more semantic * Extract crawler logic and filter logic to new class file * Extract writing enum file logic and cleanup the code * Replace with buildList functions and make it more compact * Replace explic return with implicit return, make it more "Kotlin" * Relpace explicit return with implicit return, make it more "Kotlin" * Introduce multiline string template and do some code styling --- src/main/kotlin/net/purefunc/generate/Main.kt | 113 +----------------- .../generate/util/EmojiFileCreator.kt | 77 ++++++++++++ .../net/purefunc/generate/util/EmojiReader.kt | 23 ++++ .../generate/util/ValidEmojiCollector.kt | 50 ++++++++ 4 files changed, 156 insertions(+), 107 deletions(-) create mode 100644 src/main/kotlin/net/purefunc/generate/util/EmojiFileCreator.kt create mode 100644 src/main/kotlin/net/purefunc/generate/util/EmojiReader.kt create mode 100644 src/main/kotlin/net/purefunc/generate/util/ValidEmojiCollector.kt diff --git a/src/main/kotlin/net/purefunc/generate/Main.kt b/src/main/kotlin/net/purefunc/generate/Main.kt index af2e62f..ba9eaf4 100644 --- a/src/main/kotlin/net/purefunc/generate/Main.kt +++ b/src/main/kotlin/net/purefunc/generate/Main.kt @@ -1,112 +1,11 @@ package net.purefunc.generate -import java.io.BufferedReader -import java.io.File -import java.io.FileOutputStream -import java.io.InputStreamReader -import java.net.URL -import java.util.Locale +import net.purefunc.generate.util.EmojiFileCreator +import net.purefunc.generate.util.EmojiReader +import net.purefunc.generate.util.ValidEmojiCollector fun main() { - // collect emoji lines - // 1F636 200D 1F32B FE0F ; fully-qualified # πŸ˜Άβ€πŸŒ«οΈ E13.1 face in clouds - var flag = false - val lines = mutableListOf() - val url = URL("https://unicode.org/Public/emoji/15.0/emoji-test.txt") - val reader = BufferedReader(InputStreamReader(url.openConnection().getInputStream())) - reader.useLines { readLines -> - readLines.forEach { line -> - if (line == "") flag = false - if (flag) lines.add(line) - if (line.startsWith("# subgroup: ")) flag = true - } - } - - val bigEnum = lines.filter { - it.contains("fully-qualified") - }.map { - it.split(" ") - }.map { elements -> - // [1F636, 200D, 1F32B, FE0F, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ;, fully-qualified, , , , , #, πŸ˜Άβ€πŸŒ«οΈ, E13.1, face, in, cloud] - - val items = mutableListOf() - - val codePartIdxs = elements.mapIndexed { idx, element -> if (element == "" || element == ";") idx else -1 } - val codeIdx = codePartIdxs.filter { idx -> idx != -1 }[0] - - (0 until codeIdx).forEach { i -> items.add(elements[i]) } - items.add(";") - - val namePartIdxs = - elements.mapIndexed { idx, element -> if (element.startsWith("E") && element.contains(".")) idx else -1 } - val nameIdx = namePartIdxs.filter { idx -> idx != -1 }[0] - - items.addAll( - // replace item name contains invalid char - elements.subList(nameIdx + 1, elements.size).map { str -> - str.replace("β€œ", "") - .replace("”", "") - .replace("’", "") - .replace("-", "_") - .replace(":", "") - .replace(".", "") - .replace("!", "") - .replace("(", "") - .replace(")", "") - .replace("1st", "first") - .replace("2nd", "second") - .replace("3rd", "third") - .replace("package", "packages") - .replace("#", "hash") - .replace("*", "star") - .replace(",", "comma") - .replace("&", "and") - } - ) - - items - } - - // all enum in one .kt is will exceed jvm limit 64K - val pageCount = 1000 - val pageSize = (bigEnum.size / pageCount) - val pagingItems = (0..pageSize).map { page -> - if (pageCount * (page + 1) > bigEnum.size) { - bigEnum.subList(pageCount * page, bigEnum.size) - } else { - bigEnum.subList(pageCount * page, pageCount * (page + 1)) - } - } - - pagingItems.forEachIndexed { fileIdx, item -> - val fos = FileOutputStream(File("src/main/kotlin/net/purefunc/emoji/Emoji$fileIdx.kt")) - fos.write("package net.purefunc.emoji\n".toByteArray()) - fos.write("\n".toByteArray()) - fos.write("enum class Emoji$fileIdx(\n".toByteArray()) - fos.write(" private val intArray: IntArray,\n".toByteArray()) - fos.write(") {\n".toByteArray()) - fos.write("\n".toByteArray()) - - item.forEachIndexed { idx, element -> - val idxListThird = element.mapIndexed { i, e -> if (e == ";") i else -1 } - val splitIdx = idxListThird.filter { i -> i != -1 }[0] - - val name = element.subList(splitIdx + 1, element.size).joinToString("_").uppercase(Locale.getDefault()) - val hexs = element.subList(0, splitIdx).map { hex -> "0x$hex" }.joinToString(",") - val emojiIntArr = element.subList(0, splitIdx).map { hex -> hex.toInt(16) }.toIntArray() - val emoji = String(emojiIntArr, 0, emojiIntArr.size) - - val comment = " // $emoji $emoji $emoji" - val enum = " $name(intArrayOf($hexs))" - if (idx == item.size - 1) { - fos.write("$comment\n$enum;\n".toByteArray()) - } else { - fos.write("$comment\n$enum,\n".toByteArray()) - } - } - - fos.write("\n".toByteArray()) - fos.write(" override fun toString() = String(intArray, 0, intArray.size)\n".toByteArray()) - fos.write("}\n".toByteArray()) - } + val roughList = EmojiReader("https://unicode.org/Public/emoji/15.0/emoji-test.txt").readTargetUrl() + val bigEnum = ValidEmojiCollector(roughList).filter() + EmojiFileCreator(1000).writeAsEnumFile(bigEnum) } diff --git a/src/main/kotlin/net/purefunc/generate/util/EmojiFileCreator.kt b/src/main/kotlin/net/purefunc/generate/util/EmojiFileCreator.kt new file mode 100644 index 0000000..a219786 --- /dev/null +++ b/src/main/kotlin/net/purefunc/generate/util/EmojiFileCreator.kt @@ -0,0 +1,77 @@ +package net.purefunc.generate.util + +import java.io.File +import java.io.FileOutputStream +import java.util.* + +class EmojiFileCreator(private val pageCount: Int) { + // all enum in one .kt is will exceed jvm limit 64K + fun writeAsEnumFile(source: List>): Unit { + val pagingItems = allocatePerPageItem(source) + + pagingItems.forEachIndexed { fileIdx, item -> + val fos = FileOutputStream(File("src/main/kotlin/net/purefunc/emoji/Emoji$fileIdx.kt")) + fos.generateFileHeader(fileIdx) + + item.forEachIndexed { idx, element -> + val idxListThird = element.mapIndexed { i, e -> if (e == ";") i else -1 } + val splitIdx = idxListThird.first { i -> i != -1 } + + val name = element.subList(splitIdx + 1, element.size).joinToString("_").uppercase(Locale.getDefault()) + val hexs = element.subList(0, splitIdx).joinToString(",") { hex -> "0x$hex" } + val emojiIntArr = element.subList(0, splitIdx).map { hex -> hex.toInt(16) }.toIntArray() + val emoji = String(emojiIntArr, 0, emojiIntArr.size) + + val comment = " // $emoji $emoji $emoji" + val enum = " $name(intArrayOf($hexs))" + if (idx == item.size - 1) { + fos.writeLastLine(comment, enum) + } else { + fos.writeNextLine(comment, enum) + } + } + + fos.generateFileFooter() + } + } + + private fun allocatePerPageItem(source: List>): List>> { + val pageSize = (source.size / pageCount) + return (0..pageSize).map { page -> + val pageList: (Int) -> List> = { source.subList(pageCount * page, it) } + if (pageCount * (page + 1) > source.size) + pageList(source.size) + else + pageList(pageCount * (page + 1)) + + } + } + + private fun FileOutputStream.generateFileHeader(fileIdx: Int) = + write( + """ + package net.purefunc.emoji + + enum class Emoji$fileIdx( + private val intArray: IntArray, + ) { + + + """.trimIndent().toByteArray() + ) + + private fun FileOutputStream.writeNextLine(comment: String, enum: String) = write("$comment\n$enum,\n".toByteArray()) + + private fun FileOutputStream.writeLastLine(comment: String, enum: String) = write("$comment\n$enum;\n".toByteArray()) + + private fun FileOutputStream.generateFileFooter() = + write( + """ + + override fun toString() = String(intArray, 0, intArray.size) + } + + """.trimIndent().toByteArray() + ) + +} diff --git a/src/main/kotlin/net/purefunc/generate/util/EmojiReader.kt b/src/main/kotlin/net/purefunc/generate/util/EmojiReader.kt new file mode 100644 index 0000000..a4f18fc --- /dev/null +++ b/src/main/kotlin/net/purefunc/generate/util/EmojiReader.kt @@ -0,0 +1,23 @@ +package net.purefunc.generate.util + +import java.io.BufferedReader +import java.io.InputStreamReader +import java.net.URL + +class EmojiReader(private val targetUrl: String) { + + fun readTargetUrl(): List = + // collect emoji lines + // 1F636 200D 1F32B FE0F ; fully-qualified # πŸ˜Άβ€πŸŒ«οΈ E13.1 face in clouds + buildList { + var flag = false + val reader = BufferedReader(InputStreamReader(URL(targetUrl).openConnection().getInputStream())) + reader.useLines { readLines -> + readLines.forEach { line -> + if (line.isEmpty()) flag = false + if (flag) add(line) + if (line.startsWith("# subgroup: ")) flag = true + } + } + } +} diff --git a/src/main/kotlin/net/purefunc/generate/util/ValidEmojiCollector.kt b/src/main/kotlin/net/purefunc/generate/util/ValidEmojiCollector.kt new file mode 100644 index 0000000..0f95a13 --- /dev/null +++ b/src/main/kotlin/net/purefunc/generate/util/ValidEmojiCollector.kt @@ -0,0 +1,50 @@ +package net.purefunc.generate.util + +class ValidEmojiCollector(private val source: List) { + + fun filter() = + source + .filter { + it.contains("fully-qualified") + }.map { + it.split(" ") + }.map { elements -> + // [1F636, 200D, 1F32B, FE0F, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ;, fully-qualified, , , , , #, πŸ˜Άβ€πŸŒ«οΈ, E13.1, face, in, cloud] + + val items = mutableListOf() + + val codePartIdxs = elements.mapIndexed { idx, element -> if (element.isEmpty() || element == ";") idx else -1 } + val codeIdx = codePartIdxs.first { idx -> idx != -1 } + + (0 until codeIdx).forEach { i -> items.add(elements[i]) } + items.add(";") + + val namePartIdxs = + elements.mapIndexed { idx, element -> if (element.startsWith("E") && element.contains(".")) idx else -1 } + val nameIdx = namePartIdxs.first { idx -> idx != -1 } + + items.plus(elements.subList(nameIdx + 1, elements.size).map { str -> convertToValidChar(str) }) + } + + + private fun convertToValidChar(str: String) = + // replace item name contains invalid char + str.replace("β€œ", "") + .replace("”", "") + .replace("’", "") + .replace("-", "_") + .replace(":", "") + .replace(".", "") + .replace("!", "") + .replace("(", "") + .replace(")", "") + .replace("1st", "first") + .replace("2nd", "second") + .replace("3rd", "third") + .replace("package", "packages") + .replace("#", "hash") + .replace("*", "star") + .replace(",", "comma") + .replace("&", "and") + +}