-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Modify some functions make it to be more semantic * Extract crawler logic and filter logic to new class file * Extract writing enum file logic and cleanup the code * Replace with buildList functions and make it more compact * Replace explic return with implicit return, make it more "Kotlin" * Relpace explicit return with implicit return, make it more "Kotlin" * Introduce multiline string template and do some code styling
- Loading branch information
1 parent
b898377
commit c437d68
Showing
4 changed files
with
156 additions
and
107 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,112 +1,11 @@ | ||
package net.purefunc.generate | ||
|
||
import java.io.BufferedReader | ||
import java.io.File | ||
import java.io.FileOutputStream | ||
import java.io.InputStreamReader | ||
import java.net.URL | ||
import java.util.Locale | ||
import net.purefunc.generate.util.EmojiFileCreator | ||
import net.purefunc.generate.util.EmojiReader | ||
import net.purefunc.generate.util.ValidEmojiCollector | ||
|
||
fun main() { | ||
// collect emoji lines | ||
// 1F636 200D 1F32B FE0F ; fully-qualified # 😶🌫️ E13.1 face in clouds | ||
var flag = false | ||
val lines = mutableListOf<String>() | ||
val url = URL("https://unicode.org/Public/emoji/15.0/emoji-test.txt") | ||
val reader = BufferedReader(InputStreamReader(url.openConnection().getInputStream())) | ||
reader.useLines { readLines -> | ||
readLines.forEach { line -> | ||
if (line == "") flag = false | ||
if (flag) lines.add(line) | ||
if (line.startsWith("# subgroup: ")) flag = true | ||
} | ||
} | ||
|
||
val bigEnum = lines.filter { | ||
it.contains("fully-qualified") | ||
}.map { | ||
it.split(" ") | ||
}.map { elements -> | ||
// [1F636, 200D, 1F32B, FE0F, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ;, fully-qualified, , , , , #, 😶🌫️, E13.1, face, in, cloud] | ||
|
||
val items = mutableListOf<String>() | ||
|
||
val codePartIdxs = elements.mapIndexed { idx, element -> if (element == "" || element == ";") idx else -1 } | ||
val codeIdx = codePartIdxs.filter { idx -> idx != -1 }[0] | ||
|
||
(0 until codeIdx).forEach { i -> items.add(elements[i]) } | ||
items.add(";") | ||
|
||
val namePartIdxs = | ||
elements.mapIndexed { idx, element -> if (element.startsWith("E") && element.contains(".")) idx else -1 } | ||
val nameIdx = namePartIdxs.filter { idx -> idx != -1 }[0] | ||
|
||
items.addAll( | ||
// replace item name contains invalid char | ||
elements.subList(nameIdx + 1, elements.size).map { str -> | ||
str.replace("“", "") | ||
.replace("”", "") | ||
.replace("’", "") | ||
.replace("-", "_") | ||
.replace(":", "") | ||
.replace(".", "") | ||
.replace("!", "") | ||
.replace("(", "") | ||
.replace(")", "") | ||
.replace("1st", "first") | ||
.replace("2nd", "second") | ||
.replace("3rd", "third") | ||
.replace("package", "packages") | ||
.replace("#", "hash") | ||
.replace("*", "star") | ||
.replace(",", "comma") | ||
.replace("&", "and") | ||
} | ||
) | ||
|
||
items | ||
} | ||
|
||
// all enum in one .kt is will exceed jvm limit 64K | ||
val pageCount = 1000 | ||
val pageSize = (bigEnum.size / pageCount) | ||
val pagingItems = (0..pageSize).map { page -> | ||
if (pageCount * (page + 1) > bigEnum.size) { | ||
bigEnum.subList(pageCount * page, bigEnum.size) | ||
} else { | ||
bigEnum.subList(pageCount * page, pageCount * (page + 1)) | ||
} | ||
} | ||
|
||
pagingItems.forEachIndexed { fileIdx, item -> | ||
val fos = FileOutputStream(File("src/main/kotlin/net/purefunc/emoji/Emoji$fileIdx.kt")) | ||
fos.write("package net.purefunc.emoji\n".toByteArray()) | ||
fos.write("\n".toByteArray()) | ||
fos.write("enum class Emoji$fileIdx(\n".toByteArray()) | ||
fos.write(" private val intArray: IntArray,\n".toByteArray()) | ||
fos.write(") {\n".toByteArray()) | ||
fos.write("\n".toByteArray()) | ||
|
||
item.forEachIndexed { idx, element -> | ||
val idxListThird = element.mapIndexed { i, e -> if (e == ";") i else -1 } | ||
val splitIdx = idxListThird.filter { i -> i != -1 }[0] | ||
|
||
val name = element.subList(splitIdx + 1, element.size).joinToString("_").uppercase(Locale.getDefault()) | ||
val hexs = element.subList(0, splitIdx).map { hex -> "0x$hex" }.joinToString(",") | ||
val emojiIntArr = element.subList(0, splitIdx).map { hex -> hex.toInt(16) }.toIntArray() | ||
val emoji = String(emojiIntArr, 0, emojiIntArr.size) | ||
|
||
val comment = " // $emoji $emoji $emoji" | ||
val enum = " $name(intArrayOf($hexs))" | ||
if (idx == item.size - 1) { | ||
fos.write("$comment\n$enum;\n".toByteArray()) | ||
} else { | ||
fos.write("$comment\n$enum,\n".toByteArray()) | ||
} | ||
} | ||
|
||
fos.write("\n".toByteArray()) | ||
fos.write(" override fun toString() = String(intArray, 0, intArray.size)\n".toByteArray()) | ||
fos.write("}\n".toByteArray()) | ||
} | ||
val roughList = EmojiReader("https://unicode.org/Public/emoji/15.0/emoji-test.txt").readTargetUrl() | ||
val bigEnum = ValidEmojiCollector(roughList).filter() | ||
EmojiFileCreator(1000).writeAsEnumFile(bigEnum) | ||
} |
77 changes: 77 additions & 0 deletions
77
src/main/kotlin/net/purefunc/generate/util/EmojiFileCreator.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package net.purefunc.generate.util | ||
|
||
import java.io.File | ||
import java.io.FileOutputStream | ||
import java.util.* | ||
|
||
class EmojiFileCreator(private val pageCount: Int) { | ||
// all enum in one .kt is will exceed jvm limit 64K | ||
fun writeAsEnumFile(source: List<List<String>>): Unit { | ||
val pagingItems = allocatePerPageItem(source) | ||
|
||
pagingItems.forEachIndexed { fileIdx, item -> | ||
val fos = FileOutputStream(File("src/main/kotlin/net/purefunc/emoji/Emoji$fileIdx.kt")) | ||
fos.generateFileHeader(fileIdx) | ||
|
||
item.forEachIndexed { idx, element -> | ||
val idxListThird = element.mapIndexed { i, e -> if (e == ";") i else -1 } | ||
val splitIdx = idxListThird.first { i -> i != -1 } | ||
|
||
val name = element.subList(splitIdx + 1, element.size).joinToString("_").uppercase(Locale.getDefault()) | ||
val hexs = element.subList(0, splitIdx).joinToString(",") { hex -> "0x$hex" } | ||
val emojiIntArr = element.subList(0, splitIdx).map { hex -> hex.toInt(16) }.toIntArray() | ||
val emoji = String(emojiIntArr, 0, emojiIntArr.size) | ||
|
||
val comment = " // $emoji $emoji $emoji" | ||
val enum = " $name(intArrayOf($hexs))" | ||
if (idx == item.size - 1) { | ||
fos.writeLastLine(comment, enum) | ||
} else { | ||
fos.writeNextLine(comment, enum) | ||
} | ||
} | ||
|
||
fos.generateFileFooter() | ||
} | ||
} | ||
|
||
private fun allocatePerPageItem(source: List<List<String>>): List<List<List<String>>> { | ||
val pageSize = (source.size / pageCount) | ||
return (0..pageSize).map { page -> | ||
val pageList: (Int) -> List<List<String>> = { source.subList(pageCount * page, it) } | ||
if (pageCount * (page + 1) > source.size) | ||
pageList(source.size) | ||
else | ||
pageList(pageCount * (page + 1)) | ||
|
||
} | ||
} | ||
|
||
private fun FileOutputStream.generateFileHeader(fileIdx: Int) = | ||
write( | ||
""" | ||
package net.purefunc.emoji | ||
enum class Emoji$fileIdx( | ||
private val intArray: IntArray, | ||
) { | ||
""".trimIndent().toByteArray() | ||
) | ||
|
||
private fun FileOutputStream.writeNextLine(comment: String, enum: String) = write("$comment\n$enum,\n".toByteArray()) | ||
|
||
private fun FileOutputStream.writeLastLine(comment: String, enum: String) = write("$comment\n$enum;\n".toByteArray()) | ||
|
||
private fun FileOutputStream.generateFileFooter() = | ||
write( | ||
""" | ||
override fun toString() = String(intArray, 0, intArray.size) | ||
} | ||
""".trimIndent().toByteArray() | ||
) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package net.purefunc.generate.util | ||
|
||
import java.io.BufferedReader | ||
import java.io.InputStreamReader | ||
import java.net.URL | ||
|
||
class EmojiReader(private val targetUrl: String) { | ||
|
||
fun readTargetUrl(): List<String> = | ||
// collect emoji lines | ||
// 1F636 200D 1F32B FE0F ; fully-qualified # 😶🌫️ E13.1 face in clouds | ||
buildList { | ||
var flag = false | ||
val reader = BufferedReader(InputStreamReader(URL(targetUrl).openConnection().getInputStream())) | ||
reader.useLines { readLines -> | ||
readLines.forEach { line -> | ||
if (line.isEmpty()) flag = false | ||
if (flag) add(line) | ||
if (line.startsWith("# subgroup: ")) flag = true | ||
} | ||
} | ||
} | ||
} |
50 changes: 50 additions & 0 deletions
50
src/main/kotlin/net/purefunc/generate/util/ValidEmojiCollector.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package net.purefunc.generate.util | ||
|
||
class ValidEmojiCollector(private val source: List<String>) { | ||
|
||
fun filter() = | ||
source | ||
.filter { | ||
it.contains("fully-qualified") | ||
}.map { | ||
it.split(" ") | ||
}.map { elements -> | ||
// [1F636, 200D, 1F32B, FE0F, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ;, fully-qualified, , , , , #, 😶🌫️, E13.1, face, in, cloud] | ||
|
||
val items = mutableListOf<String>() | ||
|
||
val codePartIdxs = elements.mapIndexed { idx, element -> if (element.isEmpty() || element == ";") idx else -1 } | ||
val codeIdx = codePartIdxs.first { idx -> idx != -1 } | ||
|
||
(0 until codeIdx).forEach { i -> items.add(elements[i]) } | ||
items.add(";") | ||
|
||
val namePartIdxs = | ||
elements.mapIndexed { idx, element -> if (element.startsWith("E") && element.contains(".")) idx else -1 } | ||
val nameIdx = namePartIdxs.first { idx -> idx != -1 } | ||
|
||
items.plus(elements.subList(nameIdx + 1, elements.size).map { str -> convertToValidChar(str) }) | ||
} | ||
|
||
|
||
private fun convertToValidChar(str: String) = | ||
// replace item name contains invalid char | ||
str.replace("“", "") | ||
.replace("”", "") | ||
.replace("’", "") | ||
.replace("-", "_") | ||
.replace(":", "") | ||
.replace(".", "") | ||
.replace("!", "") | ||
.replace("(", "") | ||
.replace(")", "") | ||
.replace("1st", "first") | ||
.replace("2nd", "second") | ||
.replace("3rd", "third") | ||
.replace("package", "packages") | ||
.replace("#", "hash") | ||
.replace("*", "star") | ||
.replace(",", "comma") | ||
.replace("&", "and") | ||
|
||
} |