In [None]:
%use coroutines

##### Субтитры

In [None]:
import com.github.pgreze.process.Redirect
import com.github.pgreze.process.process
import kotlinx.coroutines.runBlocking

suspend fun getRussianSubtitles(url: String) = process(
    "yt-dlp",
    "--proxy", "localhost:8888",
    "${url}",
    "--write-auto-subs",
    "--skip-download",
    "--sub-lang", "ru",
    "-o", "%(channel_id)s/%(id)s/%(id)s.%(ext)s",
    "--download-archive", "archive.txt",
    stdout = Redirect.CAPTURE,
    stderr = Redirect.CAPTURE,
    consumer = { line: String ->
        println(line)
    }
)


In [None]:
fun sToTime(s: Long): String {
    val hours = s / 3600
    val minutes = (s % 3600) / 60
    return String.format("%02d:%02d:%02d", hours, minutes, s)
}

#### DTO

In [None]:
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable

@Serializable
data class YtDlpEntry(
    @SerialName("_type")
    val type: String,

    @SerialName("ie_key")
    val ieKey: String,

    val id: String,
    val url: String,
    val title: String,
    val description: String? = null,

    val duration: Double? = null,

    @SerialName("channel_id")
    val channelId: String? = null,

    val channel: String? = null,

    @SerialName("channel_url")
    val channelUrl: String? = null,

    val uploader: String? = null,

    @SerialName("uploader_id")
    val uploaderId: String? = null,

    @SerialName("uploader_url")
    val uploaderUrl: String? = null,

    val thumbnails: List<Thumbnail> = emptyList(),

    val timestamp: Long? = null,

    @SerialName("release_timestamp")
    val releaseTimestamp: Long? = null,

    val availability: String? = null,

    @SerialName("concurrent_view_count")
    val concurrentViewCount: Int? = null,

    @SerialName("live_status")
    val liveStatus: String? = null,

    @SerialName("channel_is_verified")
    val channelIsVerified: Boolean? = null,

    @SerialName("__x_forwarded_for_ip")
    val xForwardedForIp: String? = null,

    @SerialName("webpage_url")
    val webpageUrl: String,

    @SerialName("original_url")
    val originalUrl: String,

    @SerialName("webpage_url_basename")
    val webpageUrlBasename: String,

    @SerialName("webpage_url_domain")
    val webpageUrlDomain: String,

    val extractor: String,

    @SerialName("extractor_key")
    val extractorKey: String,

    @SerialName("playlist_count")
    val playlistCount: Int? = null,

    val playlist: String? = null,

    @SerialName("playlist_id")
    val playlistId: String? = null,

    @SerialName("playlist_title")
    val playlistTitle: String? = null,

    @SerialName("playlist_uploader")
    val playlistUploader: String? = null,

    @SerialName("playlist_uploader_id")
    val playlistUploaderId: String? = null,

    @SerialName("playlist_channel")
    val playlistChannel: String? = null,

    @SerialName("playlist_channel_id")
    val playlistChannelId: String? = null,

    @SerialName("playlist_webpage_url")
    val playlistWebpageUrl: String? = null,

    @SerialName("n_entries")
    val nEntries: Int? = null,

    @SerialName("playlist_index")
    val playlistIndex: Int? = null,

    @SerialName("__last_playlist_index")
    val lastPlaylistIndex: Int? = null,

    @SerialName("playlist_autonumber")
    val playlistAutonumber: Int? = null,

    val epoch: Long? = null,

    @SerialName("release_date")
    val releaseDate: String? = null,

    @SerialName("release_year")
    val releaseYear: Int? = null,

    @SerialName("is_live")
    val isLive: Boolean? = null,

    @SerialName("was_live")
    val wasLive: Boolean? = null,

    @SerialName("_version")
    val version: VersionInfo? = null
)

@Serializable
data class Thumbnail(
    val url: String,
    val height: Int? = null,
    val width: Int? = null
)

@Serializable
data class VersionInfo(
    val version: String,

    @SerialName("current_git_head")
    val currentGitHead: String? = null,

    @SerialName("release_git_head")
    val releaseGitHead: String? = null,

    val repository: String
)



#### Download Missing Subtitles

In [None]:


import kotlinx.serialization.json.Json
import java.io.File

val json = Json { ignoreUnknownKeys = true }

// TODO: переписать .sh скрипт в Kotlin Process
val streamsFile = File("./streams.json")
val parsedList: List<YtDlpEntry> = streamsFile.readLines().map {
    json.decodeFromString<YtDlpEntry>(it)
}
println("JSON file from yt-dlp parsed. Size: ${parsedList.size}")

In [19]:
val totalDuration = parsedList.fold(0L, { acc, i ->
    val duration = i.duration?.toInt() ?: 0
    return@fold acc + duration
})
msToTime(totalDuration)

05:41:46

In [None]:
@file:Suppress("RECEIVER_NULLABILITY_MISMATCH_BASED_ON_JAVA_ANNOTATIONS")

val downloadedSubtitlesFolder = File("./UCSQGkViib9XLwjDfIahtLdw/")
val existingSubtitles = downloadedSubtitlesFolder.listFiles()
    .filter { it.listFiles().size != 0 }
    .map { it.nameWithoutExtension }

println("Existing subtitles count: ${existingSubtitles.size}")

runBlocking {
    parsedList
        .filter { it.id !in existingSubtitles }
        .forEach { entry ->
            val videoUrl = entry.url
            getRussianSubtitles(videoUrl)
        }
}


#### Post-process Subtitles to TXT

In [None]:
@file:Suppress("RECEIVER_NULLABILITY_MISMATCH_BASED_ON_JAVA_ANNOTATIONS")

import com.github.pgreze.process.process
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.runBlocking
import java.io.File

suspend fun postProcessSubtitles(input: File) {
    println("Processing: ${input.path}")
    val txtFile = File(input.path.replace(".vtt", ".txt"))
    txtFile.createNewFile()
        process(
            "npx",
            "tsx",
            "../ragtitles/cli.ts",
            "${input.path}",
            "--time",
            stdout = Redirect.CAPTURE,
            stderr = Redirect.CAPTURE,
            consumer = { line: String ->
                txtFile.appendText(line)
                txtFile.appendText("\n")
            }
        )
}

runBlocking {
    coroutineScope {
        val gopsterFolder = File("./UCSQGkViib9XLwjDfIahtLdw/").listFiles()
        gopsterFolder.forEach { innerFolder: File ->
            innerFolder.listFiles().forEach { file ->
                postProcessSubtitles(file)
            }
        }
    }
}

#### Clean-up Empty Folders

In [None]:
@file:Suppress("RECEIVER_NULLABILITY_MISMATCH_BASED_ON_JAVA_ANNOTATIONS")

val gopsterFolder = File("./UCSQGkViib9XLwjDfIahtLdw/")
gopsterFolder.walkTopDown()
    .filter { it.isDirectory }
    .filter { it.listFiles().isEmpty() }
    .forEach { it.deleteRecursively() }
