<a href="https://colab.research.google.com/github/shinysky73/youtube-subtitle-downloader/blob/main/youtube-subtitle-downloader.ipynb">
<img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
</a>

## 사용방법
이 노트북은 한글 자막이 없는 영어 유튜브 영상을 자막생성 후 자막이 입혀진 영상을 얻기 위해 만들어진 프로그램입니다.


Google Colab의 GPU 환경을 이용하면 더 빠르게 실행할 수 있습니다. 
([Run in Colab](https://colab.research.google.com/github/shinysky73/youtube-subtitle-downloader/blob/main/youtube-subtitle-downloader.ipynb) 링크를 이용하세요)

아래의 입력박스 및 선택사항을 선택 후 Colab 상단메뉴의 Runtime > Run all 를 실행해주세요


In [None]:
# 예시 URL : https://www.youtube.com/watch?v=jC4v5AS4RIM

In [2]:
#@title 유튜브 URL 입력(필수 입력사항)
youtube_url = "" #@param {type:"string"}

In [3]:
#@title 유튜브 영상의 언어를 선택하세요.
language = "English" # @param ["English", "Korean"]

In [4]:
#@title (선택) 영상 추출 모델의 크기를 선택하세요. large 로 갈수록 성능은 좋아지지만 오래 걸림
model_size = "base" # @param ["base","small","medium","large"]

In [5]:
#@title (선택) 영어 유튜브 영상일 경우 한글 자막 생성 여부를 선택하세요.
is_translate = "Yes"  # @param ["Yes", "No"]

In [6]:
#@title (선택) 영상과 자막을 합칠지 여부를 선택하세요 (자막을 입히는 경우 시간이 오래걸림)
is_merge = "Yes"  # @param ["Yes", "No"]

In [7]:
#@title (옵션) 영상과 자막을 합칠 경우 자막의 언어를 선택하세요.
merge_language = "Korean" # @param ["English", "Korean"]

## 패키지 설치

In [None]:
# @title
from datetime import datetime
total_start_time = datetime.now()

start_time = datetime.now()
!sudo apt-get install -y fonts-nanum
end_time = datetime.now()
elapsed_time = end_time - start_time
print("(1/3) 폰트 설치 완료", elapsed_time)

start_time = datetime.now()
#! pip install -q pytube googletrans==4.0.0rc1 ffmpeg-python
! pip install -q pytube translate ffmpeg-python
end_time = datetime.now()
elapsed_time = end_time - start_time
print("(2/3) Pytube, 구글번역, FFMPEG 설치 완료", elapsed_time)

start_time = datetime.now()
! pip install git+https://github.com/openai/whisper.git -q
end_time = datetime.now()
elapsed_time = end_time - start_time
print("(3/3) Whisper 설치 완료", elapsed_time)
print("")
elapsed_time = end_time - total_start_time
print("패키지 설치에 걸린 전체시간", elapsed_time)

## 클래스 모음

In [None]:
# @title
from IPython.display import display
from pytube import YouTube
import math
import whisper
import pandas as pd
import requests
from pathlib import Path

from google.colab import files

#from googletrans import Translator
from translate import Translator
import re
import threading

class YoutubeDownloader:

    def __init__(self, url):
        self.title = None
        self.filename = None
        self.audio_name = None
        self.vide_name = None
        self.youtube_video = YouTube(url)

    def make_safe_filename(self, s: str) -> str:
      def safe_char(c):
          if c.isalnum():
              return c
          else:
              return "_"
      return "".join(safe_char(c) for c in s).strip("_").replace("__", "_").replace("__", "_")

    def download(self):
        self.title = self.youtube_video.title
        self.filename = self.make_safe_filename(self.title)

        video = self.youtube_video.streams.filter(progressive="True", file_extension="mp4").order_by('resolution').desc()[0]
        audio = self.youtube_video.streams.get_audio_only()

        self.video_name = f"{self.filename}.mp4"
        video.download(filename=self.video_name)

        self.audio_name = f"{self.filename}.mp3"
        audio.download(filename=self.audio_name)

class Transcriber:

    def __init__(self, model, downloader: YoutubeDownloader, language: str = "en"):
        self.model = model
        self.audio_name = downloader.audio_name
        self.language = language

    def transcribe(self):
        self.transcribed_result = self.model.transcribe(self.audio_name, word_timestamps=False, language=self.language)

class SubtileMaker:

    def __init__(self, script, downloader: YoutubeDownloader, language: str = "en"):
        self.script = script
        self.filename = downloader.filename
        self.language = language

    def reformat_time(self, second):
        m, s = divmod(second, 60)
        h, m = divmod(m, 60)
        hms = "%02d:%02d:%s" % (h, m, str('%.3f' % s).zfill(6))
        hms = hms.replace('.', ',')
        return hms

    def execute(self):
        seg = self.script['segments']
        srt_path = f"{self.filename}_{self.language}.srt"
        with open(srt_path, 'w', encoding='utf-8') as f:
            write_content = [str(n + 1) + '\n'
                            + self.reformat_time(i['start'])
                            + ' --> '
                            + self.reformat_time(i['end']) + '\n'
                            + i['text'] + '\n\n'
                            for n, i in enumerate(seg)]
            f.writelines(write_content)


class SubtitleTranslator:

  def __init__(self, downloader: YoutubeDownloader, from_language: str = "en", to_language: str = "ko"):
      self.filename = downloader.filename
      self.from_language = from_language
      self.to_language = to_language

  def __translate(self, translator, text, n):

      if text == "" or text == '\n':
          return text

      text = text.rstrip('\n')
      if re.match(r"^[0-9]+$", text):
          return self.add_newline_if_missing(text)

      if re.match(r"\d{2}:\d{2}:\d{2},\d{3}\s-->\s\d{2}:\d{2}:\d{2},\d{3}", text):
          return self.add_newline_if_missing(text)

      #return self.add_newline_if_missing(translator.translate(text=text, dest=self.to_language).text)
      return self.add_newline_if_missing(translator.translate(text))

  def add_newline_if_missing(self, s):
      s = str(s)
      if not s.endswith('\n'):
          s += '\n'
      return s

  def translate_task(self, lines, translator_fun, result_map, i, translator):
      #print("thread id: ", i, "lines num: ", len(lines))
      result_map[i] = [translator_fun(translator, line, n) for n, line in enumerate(lines)]

  def translate_file(self, translator_fun, file1, file2, thread_nums, translator=None):
      with open(file1, 'r', encoding='utf-8') as f1, open(file2, 'w', encoding='utf-8') as f2:
          lines = f1.readlines()
          #print("translate file total lines: ", len(lines))
          result = self.get_translate_result(lines, thread_nums, translator, translator_fun)
          f2.writelines(result)
          #print("\ntranslate write file done")

  def get_translate_result(self, lines, thread_nums, translator, translator_fun):
      result_map = self.get_translate_threads_result(lines, thread_nums, translator, translator_fun)
      result = []
      for key in sorted(result_map):
          result.extend(result_map.get(key))
      return result

  def get_translate_threads_result(self, lines, thread_nums, translator, translator_fun):
      result_map = {}
      threads = []
      n = len(lines) // thread_nums
      for i in range(1, thread_nums + 1):
          threads.append(
              threading.Thread(
                  target=self.translate_task,
                  args=(self.get_split_lines(i, lines, n, thread_nums), translator_fun, result_map, i, translator)
              )
          )
      for thread in threads:
          thread.start()
      for thread in threads:
          thread.join()
      return result_map

  def get_split_lines(self, i, lines, n, thread_nums):
      if n * i <= len(lines):
          split_line = lines[(i - 1) * n:i * n]
      else:
          split_line = lines[(i - 1) * n:]
      if i == thread_nums and n * i < len(lines):
          split_line = lines[(i - 1) * n:]
      return split_line

  def translate(self,thread_nums=2):
      #translator = Translator()
      translator = Translator(from_lang="en", to_lang="ko")
      en_srt = f"{self.filename}_en.srt"
      ko_srt = f"{self.filename}_ko.srt"
      self.translate_file(self.__translate, en_srt, ko_srt, thread_nums, translator)


## 모델 로드

In [None]:
# @title
print("모델 로딩중...")
model_size = "base"

start_time = datetime.now()
model = whisper.load_model(model_size)
end_time = datetime.now()
elapsed_time = end_time - start_time
print("모델 로딩에 걸린 시간", elapsed_time)

## 실행코드

In [None]:
# @title
from torch import e
import ffmpeg
import os


if language == "English":
  from_language = "en"
else:
  from_language = "ko"

if merge_language == "English":
  merge_language = "en"
else:
  merge_language = "ko"

if youtube_url:

  print("(1/6) 유튜브 영상 다운로드 진행중 ...")
  start_time = datetime.now()
  downloader = YoutubeDownloader(youtube_url)
  downloader.download()
  end_time = datetime.now()
  elapsed_time = end_time - start_time
  print("걸린 시간", elapsed_time)
  print("")


  print("(2/6) 스크립트 추출 진행중 ...")
  start_time = datetime.now()
  transcriber = Transcriber(model=model, downloader=downloader, language=from_language)
  transcriber.transcribe()
  end_time = datetime.now()
  elapsed_time = end_time - start_time
  print("걸린 시간", elapsed_time)
  print("")

  print("(3/6) 자막 생성 진행중 ...")
  start_time = datetime.now()
  srtmaker = SubtileMaker(transcriber.transcribed_result, downloader=downloader, language=from_language)
  srtmaker.execute()
  end_time = datetime.now()
  elapsed_time = end_time - start_time
  print("걸린 시간", elapsed_time)
  print("")

  if is_translate == "Yes":
    print("(4/6) 자막 번역 진행중 ...")
    start_time = datetime.now()
    translator = SubtitleTranslator(downloader)
    translator.translate(2)
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    print("걸린 시간", elapsed_time)
    print("")
  else:
    print("(4/6) 번역 Skipped.")

  if is_merge == "Yes":
    print("(5/6) 영상에 자막 추가 하는 중(시간이 오래걸립니다. 기다려주세요.) ...")
    start_time = datetime.now()
    video = ffmpeg.input(downloader.video_name)
    audio = video.audio
    subtitle_video_path = f"{downloader.filename}_subtitle.mp4"
    subtitle_path = f"{downloader.filename}_{merge_language}.srt"
    ffmpeg.concat(
      video.filter('subtitles', subtitle_path, force_style="OutlineColour=&H40000000,BorderStyle=3"),
      audio,
      v=1,
      a=1
    ).output(subtitle_video_path).run(quiet=True, overwrite_output=True)
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    print("걸린 시간", elapsed_time)
    print("")

    print("(6/6) 영상 다운로드중 ...")
    files.download(subtitle_video_path)
    

  else:
    print("(5/6) 자막추가 Skipped.")
    print("(6/6) 영상 및 자막 다운로드중 ...")
    subtitle_en_path = f"{downloader.filename}_en.srt"
    subtitle_ko_path = f"{downloader.filename}_ko.srt"
    files.download(downloader.video_name)
    if os.path.isfile(subtitle_en_path):
      files.download(subtitle_en_path)
    if os.path.isfile(subtitle_ko_path):
      files.download(subtitle_ko_path)

  end_time = datetime.now()
  elapsed_time = end_time - total_start_time
  print("전체 걸린 시간", elapsed_time)

else:
  print("유튜브 URL을 입력하세요.")
