Skip to content

Commit

Permalink
Fix regex
Browse files Browse the repository at this point in the history
  • Loading branch information
rakuishi committed Feb 12, 2024
1 parent 1a0fc13 commit 75745d7
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 28 deletions.
2 changes: 1 addition & 1 deletion app/src/main/java/com/rakuishi/nreader/model/Novel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ data class Novel(

val url: String
get() = when (site) {
Site.NCODE -> "https://ncode.syosetu.com/${nid}/"
Site.NCODE -> "https://ncode.syosetu.com/novelview/infotop/ncode/${nid}/"
Site.KAKUYOMU -> "https://kakuyomu.jp/works/${nid}"
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,19 @@ import com.rakuishi.nreader.database.NovelDao
import com.rakuishi.nreader.model.Novel
import com.rakuishi.nreader.model.Site
import com.rakuishi.nreader.util.await
import kotlinx.coroutines.*
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.withContext
import okhttp3.OkHttpClient
import okhttp3.Request
import java.text.ParseException
import java.text.SimpleDateFormat
import java.time.ZoneId
import java.time.ZonedDateTime
import java.util.*
import java.util.Date
import java.util.Locale

class NovelRepository(
private val dao: NovelDao,
Expand Down Expand Up @@ -41,7 +46,7 @@ class NovelRepository(

private suspend fun fetchNarouNewNovel(nid: String) =
withContext(Dispatchers.IO) {
val url = "https://ncode.syosetu.com/${nid}/"
val url = "https://ncode.syosetu.com/novelview/infotop/ncode/${nid}/"
val request = Request.Builder().url(url).get().build()
val response = client.newCall(request).await()
val body = response.body?.string() ?: ""
Expand Down Expand Up @@ -111,15 +116,15 @@ class NovelRepository(
)

val titleRegex =
Regex("""<h1 id="workTitle"><a href="/works/(\d+)">(.+?)</a></h1>""")
Regex(""""__typename":"Work","id":"\d+","title":"(.+?)"""")
titleRegex.find(body)?.let {
novel.title = it.groups[2]?.value ?: ""
novel.title = it.groups[1]?.value ?: ""
}

val authorNameRegex =
Regex("""<span id="workAuthor-activityName"><a href="/users/(.+?)">(.+?)</a></span>""")
Regex(""""activityName":"(.+?)"""")
authorNameRegex.find(body)?.let {
novel.authorName = it.groups[2]?.value ?: ""
novel.authorName = it.groups[1]?.value ?: ""
}

parseKakuyomuEpisode(body)?.let {
Expand Down Expand Up @@ -192,33 +197,38 @@ class NovelRepository(
)

private fun parseNarouEpisode(body: String): Episodes? {
val regex =
Regex("""<dd class="subtitle">\s+<a href="/n[a-z0-9]+/(\d+)/">.+?</a>\s+</dd>\s+<dt class="long_update">\s+(\d{4}/\d{2}/\d{2} \d{2}:\d{2})<""")
regex.findAll(body).lastOrNull()?.let {
val episodeNumber = it.groups[1]?.value?.toInt() ?: 0
val updatedAtString = it.groups[2]?.value ?: ""
val updatedAt: Date? = try {
val sdf = SimpleDateFormat("yyyy/MM/dd HH:mm", Locale.JAPAN)
var episodeNumber = 0
val episodeNumberRegex =
Regex("""</span>全(\d+)部分""")
episodeNumberRegex.find(body)?.let {
episodeNumber = it.groups[1]?.value?.toInt() ?: 0
}

var updatedAt = Date()
val updatedAtRegex =
Regex("""<th>最新部分掲載日</th>\s+<td>(\d{4}年 \d{2}月\d{2}日 \d{2}時\d{2}分)</td>""")
updatedAtRegex.find(body)?.let {
val updatedAtString = it.groups[1]?.value ?: ""
updatedAt = try {
val sdf = SimpleDateFormat("yyyy年 MM月dd日 HH時mm分", Locale.JAPAN)
sdf.parse(updatedAtString)
} catch (e: ParseException) {
null
Date()
}

return Episodes(
firstEpisodeId = "1",
firstEpisodeNumber = 1,
latestEpisodeId = episodeNumber.toString(),
latestEpisodeNumber = episodeNumber,
latestEpisodeUpdatedAt = updatedAt ?: Date()
)
}

return null
return Episodes(
firstEpisodeId = "1",
firstEpisodeNumber = 1,
latestEpisodeId = episodeNumber.toString(),
latestEpisodeNumber = episodeNumber,
latestEpisodeUpdatedAt = updatedAt
)
}

private fun parseKakuyomuEpisode(body: String): Episodes? {
val regex =
Regex("""<li class="widget-toc-episode">\s+<a href="/works/\d+/episodes/(\d+)" class="widget-toc-episode-episodeTitle">\s+<span class="widget-toc-episode-titleLabel js-vertical-composition-item">(.+?)</span>\s+<time class="widget-toc-episode-datePublished" datetime="(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z">.+?</time>\s+</a>\s+</li>""")
Regex(""""__typename":"Episode","id":"(\d+)","title":".+?","publishedAt":"(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z"""")
regex.findAll(body).let { results ->
val firstEpisodeId = results.firstOrNull()?.let {
it.groups[1]?.value
Expand All @@ -229,12 +239,12 @@ class NovelRepository(
val episodeNumber = results.count()

val updatedAt = ZonedDateTime.of(
it.groups[2]?.value?.toInt() ?: 0,
it.groups[3]?.value?.toInt() ?: 0,
it.groups[4]?.value?.toInt() ?: 0,
it.groups[5]?.value?.toInt() ?: 0,
it.groups[6]?.value?.toInt() ?: 0,
it.groups[7]?.value?.toInt() ?: 0,
it.groups[8]?.value?.toInt() ?: 0,
0,
ZoneId.of("GMT0")
)
Expand Down Expand Up @@ -298,7 +308,7 @@ class NovelRepository(
val body = response.body?.string() ?: ""

val regex =
Regex("""<a href="/works/\d+/episodes/(\d+)" class="widget-toc-episode-episodeTitle">""")
Regex(""""__typename":"Episode","id":"(\d+)","title":".+?","publishedAt":"(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z"""")
regex.findAll(body).forEachIndexed { index, result ->
val id = result.groups[1]?.value ?: ""
if (episodeId == id) return@withContext index + 1
Expand Down

0 comments on commit 75745d7

Please sign in to comment.