Skip to content

Commit

Permalink
♻️ 新手嘗試重構 (#7)
Browse files Browse the repository at this point in the history
* Modify some functions make it to be more semantic

* Extract crawler logic and filter logic to new class file

* Extract writing enum file logic and cleanup the code

* Replace with buildList functions and make it more compact

* Replace explic return with implicit return, make it more "Kotlin"

* Relpace explicit return with implicit return, make it more "Kotlin"

* Introduce multiline string template and do some code styling
  • Loading branch information
frankvicky committed Nov 21, 2022
1 parent b898377 commit c437d68
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 107 deletions.
113 changes: 6 additions & 107 deletions src/main/kotlin/net/purefunc/generate/Main.kt
Original file line number Diff line number Diff line change
@@ -1,112 +1,11 @@
package net.purefunc.generate

import java.io.BufferedReader
import java.io.File
import java.io.FileOutputStream
import java.io.InputStreamReader
import java.net.URL
import java.util.Locale
import net.purefunc.generate.util.EmojiFileCreator
import net.purefunc.generate.util.EmojiReader
import net.purefunc.generate.util.ValidEmojiCollector

fun main() {
// collect emoji lines
// 1F636 200D 1F32B FE0F ; fully-qualified # 😶‍🌫️ E13.1 face in clouds
var flag = false
val lines = mutableListOf<String>()
val url = URL("https://unicode.org/Public/emoji/15.0/emoji-test.txt")
val reader = BufferedReader(InputStreamReader(url.openConnection().getInputStream()))
reader.useLines { readLines ->
readLines.forEach { line ->
if (line == "") flag = false
if (flag) lines.add(line)
if (line.startsWith("# subgroup: ")) flag = true
}
}

val bigEnum = lines.filter {
it.contains("fully-qualified")
}.map {
it.split(" ")
}.map { elements ->
// [1F636, 200D, 1F32B, FE0F, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ;, fully-qualified, , , , , #, 😶‍🌫️, E13.1, face, in, cloud]

val items = mutableListOf<String>()

val codePartIdxs = elements.mapIndexed { idx, element -> if (element == "" || element == ";") idx else -1 }
val codeIdx = codePartIdxs.filter { idx -> idx != -1 }[0]

(0 until codeIdx).forEach { i -> items.add(elements[i]) }
items.add(";")

val namePartIdxs =
elements.mapIndexed { idx, element -> if (element.startsWith("E") && element.contains(".")) idx else -1 }
val nameIdx = namePartIdxs.filter { idx -> idx != -1 }[0]

items.addAll(
// replace item name contains invalid char
elements.subList(nameIdx + 1, elements.size).map { str ->
str.replace("", "")
.replace("", "")
.replace("", "")
.replace("-", "_")
.replace(":", "")
.replace(".", "")
.replace("!", "")
.replace("(", "")
.replace(")", "")
.replace("1st", "first")
.replace("2nd", "second")
.replace("3rd", "third")
.replace("package", "packages")
.replace("#", "hash")
.replace("*", "star")
.replace(",", "comma")
.replace("&", "and")
}
)

items
}

// all enum in one .kt is will exceed jvm limit 64K
val pageCount = 1000
val pageSize = (bigEnum.size / pageCount)
val pagingItems = (0..pageSize).map { page ->
if (pageCount * (page + 1) > bigEnum.size) {
bigEnum.subList(pageCount * page, bigEnum.size)
} else {
bigEnum.subList(pageCount * page, pageCount * (page + 1))
}
}

pagingItems.forEachIndexed { fileIdx, item ->
val fos = FileOutputStream(File("src/main/kotlin/net/purefunc/emoji/Emoji$fileIdx.kt"))
fos.write("package net.purefunc.emoji\n".toByteArray())
fos.write("\n".toByteArray())
fos.write("enum class Emoji$fileIdx(\n".toByteArray())
fos.write(" private val intArray: IntArray,\n".toByteArray())
fos.write(") {\n".toByteArray())
fos.write("\n".toByteArray())

item.forEachIndexed { idx, element ->
val idxListThird = element.mapIndexed { i, e -> if (e == ";") i else -1 }
val splitIdx = idxListThird.filter { i -> i != -1 }[0]

val name = element.subList(splitIdx + 1, element.size).joinToString("_").uppercase(Locale.getDefault())
val hexs = element.subList(0, splitIdx).map { hex -> "0x$hex" }.joinToString(",")
val emojiIntArr = element.subList(0, splitIdx).map { hex -> hex.toInt(16) }.toIntArray()
val emoji = String(emojiIntArr, 0, emojiIntArr.size)

val comment = " // $emoji $emoji $emoji"
val enum = " $name(intArrayOf($hexs))"
if (idx == item.size - 1) {
fos.write("$comment\n$enum;\n".toByteArray())
} else {
fos.write("$comment\n$enum,\n".toByteArray())
}
}

fos.write("\n".toByteArray())
fos.write(" override fun toString() = String(intArray, 0, intArray.size)\n".toByteArray())
fos.write("}\n".toByteArray())
}
val roughList = EmojiReader("https://unicode.org/Public/emoji/15.0/emoji-test.txt").readTargetUrl()
val bigEnum = ValidEmojiCollector(roughList).filter()
EmojiFileCreator(1000).writeAsEnumFile(bigEnum)
}
77 changes: 77 additions & 0 deletions src/main/kotlin/net/purefunc/generate/util/EmojiFileCreator.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package net.purefunc.generate.util

import java.io.File
import java.io.FileOutputStream
import java.util.*

class EmojiFileCreator(private val pageCount: Int) {
// all enum in one .kt is will exceed jvm limit 64K
fun writeAsEnumFile(source: List<List<String>>): Unit {
val pagingItems = allocatePerPageItem(source)

pagingItems.forEachIndexed { fileIdx, item ->
val fos = FileOutputStream(File("src/main/kotlin/net/purefunc/emoji/Emoji$fileIdx.kt"))
fos.generateFileHeader(fileIdx)

item.forEachIndexed { idx, element ->
val idxListThird = element.mapIndexed { i, e -> if (e == ";") i else -1 }
val splitIdx = idxListThird.first { i -> i != -1 }

val name = element.subList(splitIdx + 1, element.size).joinToString("_").uppercase(Locale.getDefault())
val hexs = element.subList(0, splitIdx).joinToString(",") { hex -> "0x$hex" }
val emojiIntArr = element.subList(0, splitIdx).map { hex -> hex.toInt(16) }.toIntArray()
val emoji = String(emojiIntArr, 0, emojiIntArr.size)

val comment = " // $emoji $emoji $emoji"
val enum = " $name(intArrayOf($hexs))"
if (idx == item.size - 1) {
fos.writeLastLine(comment, enum)
} else {
fos.writeNextLine(comment, enum)
}
}

fos.generateFileFooter()
}
}

private fun allocatePerPageItem(source: List<List<String>>): List<List<List<String>>> {
val pageSize = (source.size / pageCount)
return (0..pageSize).map { page ->
val pageList: (Int) -> List<List<String>> = { source.subList(pageCount * page, it) }
if (pageCount * (page + 1) > source.size)
pageList(source.size)
else
pageList(pageCount * (page + 1))

}
}

private fun FileOutputStream.generateFileHeader(fileIdx: Int) =
write(
"""
package net.purefunc.emoji
enum class Emoji$fileIdx(
private val intArray: IntArray,
) {
""".trimIndent().toByteArray()
)

private fun FileOutputStream.writeNextLine(comment: String, enum: String) = write("$comment\n$enum,\n".toByteArray())

private fun FileOutputStream.writeLastLine(comment: String, enum: String) = write("$comment\n$enum;\n".toByteArray())

private fun FileOutputStream.generateFileFooter() =
write(
"""
override fun toString() = String(intArray, 0, intArray.size)
}
""".trimIndent().toByteArray()
)

}
23 changes: 23 additions & 0 deletions src/main/kotlin/net/purefunc/generate/util/EmojiReader.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package net.purefunc.generate.util

import java.io.BufferedReader
import java.io.InputStreamReader
import java.net.URL

class EmojiReader(private val targetUrl: String) {

fun readTargetUrl(): List<String> =
// collect emoji lines
// 1F636 200D 1F32B FE0F ; fully-qualified # 😶‍🌫️ E13.1 face in clouds
buildList {
var flag = false
val reader = BufferedReader(InputStreamReader(URL(targetUrl).openConnection().getInputStream()))
reader.useLines { readLines ->
readLines.forEach { line ->
if (line.isEmpty()) flag = false
if (flag) add(line)
if (line.startsWith("# subgroup: ")) flag = true
}
}
}
}
50 changes: 50 additions & 0 deletions src/main/kotlin/net/purefunc/generate/util/ValidEmojiCollector.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package net.purefunc.generate.util

class ValidEmojiCollector(private val source: List<String>) {

fun filter() =
source
.filter {
it.contains("fully-qualified")
}.map {
it.split(" ")
}.map { elements ->
// [1F636, 200D, 1F32B, FE0F, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ;, fully-qualified, , , , , #, 😶‍🌫️, E13.1, face, in, cloud]

val items = mutableListOf<String>()

val codePartIdxs = elements.mapIndexed { idx, element -> if (element.isEmpty() || element == ";") idx else -1 }
val codeIdx = codePartIdxs.first { idx -> idx != -1 }

(0 until codeIdx).forEach { i -> items.add(elements[i]) }
items.add(";")

val namePartIdxs =
elements.mapIndexed { idx, element -> if (element.startsWith("E") && element.contains(".")) idx else -1 }
val nameIdx = namePartIdxs.first { idx -> idx != -1 }

items.plus(elements.subList(nameIdx + 1, elements.size).map { str -> convertToValidChar(str) })
}


private fun convertToValidChar(str: String) =
// replace item name contains invalid char
str.replace("", "")
.replace("", "")
.replace("", "")
.replace("-", "_")
.replace(":", "")
.replace(".", "")
.replace("!", "")
.replace("(", "")
.replace(")", "")
.replace("1st", "first")
.replace("2nd", "second")
.replace("3rd", "third")
.replace("package", "packages")
.replace("#", "hash")
.replace("*", "star")
.replace(",", "comma")
.replace("&", "and")

}

0 comments on commit c437d68

Please sign in to comment.