In [1]:
# 예시 URL : https://www.youtube.com/watch?v=jC4v5AS4RIM

## 사용방법
아래의 입력 및 선택사항을 선택 후 상단메뉴의 Runtime > Run all 를 실행해주세요

GPU 서버를 선택하면 음성 추출을 더 빠르게 수행할수 있습니다

In [2]:
#@title 유튜브 URL 입력(필수 입력사항)
youtube_url = "" #@param {type:"string"}

In [3]:
#@title 유튜브 영상의 언어를 선택하세요.
language = "English" # @param ["English", "Korean"]

In [4]:
#@title (선택) 영상 추출 모델의 크기를 선택하세요. large 로 갈수록 성능은 좋아지지만 오래 걸림
model_size = "base" # @param ["base","small","medium","large"]

In [5]:
#@title (선택) 영어 유튜브 영상일 경우 한글 자막 생성 여부를 선택하세요.
is_translate = "Yes"  # @param ["Yes", "No"]

In [6]:
#@title (선택) 영상과 자막을 합칠지 여부를 선택하세요 (자막을 입히는 경우 시간이 오래걸림)
is_merge = "Yes"  # @param ["Yes", "No"]

In [7]:
#@title (옵션) 영상과 자막을 합칠 경우 자막의 언어를 선택하세요.
merge_language = "Korean" # @param ["English", "Korean"]

## 패키지 설치 (설치시간 몇분 걸림)

In [8]:
# @title
#pip install git+https://github.com/openai/whisper.git -q
! sudo apt-get install -y fonts-nanum
! pip install -U openai-whisper
! pip install pytube googletrans==4.0.0rc1 ffmpeg-python

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  fonts-nanum
0 upgraded, 1 newly installed, 0 to remove and 19 not upgraded.
Need to get 10.3 MB of archives.
After this operation, 34.1 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 fonts-nanum all 20200506-1 [10.3 MB]
Fetched 10.3 MB in 1s (12.1 MB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package fonts-nanum.
(Reading database ... 120874 files and direc

## 클래스 모음

In [9]:
# @title
from IPython.display import display
from pytube import YouTube
import math
import whisper
import pandas as pd
import requests
from pathlib import Path

from google.colab import files

from googletrans import Translator
import re
import threading

class YoutubeDownloader:

    def __init__(self, url):
        self.title = None
        self.filename = None
        self.audio_name = None
        self.vide_name = None
        self.youtube_video = YouTube(url)

    def make_safe_filename(self, s: str) -> str:
      def safe_char(c):
          if c.isalnum():
              return c
          else:
              return "_"
      return "".join(safe_char(c) for c in s).strip("_").replace("__", "_").replace("__", "_")

    def download(self):
        self.title = self.youtube_video.title
        self.filename = self.make_safe_filename(self.title)

        video = self.youtube_video.streams.filter(progressive="True", file_extension="mp4").order_by('resolution').desc()[0]
        audio = self.youtube_video.streams.get_audio_only()

        self.video_name = f"{self.filename}.mp4"
        video.download(filename=self.video_name)

        self.audio_name = f"{self.filename}.mp3"
        audio.download(filename=self.audio_name)

class Transcriber:

    def __init__(self, model, downloader: YoutubeDownloader, language: str = "en"):
        self.model = model
        self.audio_name = downloader.audio_name
        self.language = language

    def transcribe(self):
        self.transcribed_result = self.model.transcribe(self.audio_name, word_timestamps=False, language=self.language)

class SubtileMaker:

    def __init__(self, script, downloader: YoutubeDownloader, language: str = "en"):
        self.script = script
        self.filename = downloader.filename
        self.language = language

    def reformat_time(self, second):
        m, s = divmod(second, 60)
        h, m = divmod(m, 60)
        hms = "%02d:%02d:%s" % (h, m, str('%.3f' % s).zfill(6))
        hms = hms.replace('.', ',')
        return hms

    def execute(self):
        seg = self.script['segments']
        srt_path = f"{self.filename}_{self.language}.srt"
        with open(srt_path, 'w', encoding='utf-8') as f:
            write_content = [str(n + 1) + '\n'
                            + self.reformat_time(i['start'])
                            + ' --> '
                            + self.reformat_time(i['end']) + '\n'
                            + i['text'] + '\n\n'
                            for n, i in enumerate(seg)]
            f.writelines(write_content)


class SubtitleTranslator:

  def __init__(self, downloader: YoutubeDownloader, from_language: str = "en", to_language: str = "ko"):
      self.filename = downloader.filename
      self.from_language = from_language
      self.to_language = to_language

  def __translate(self, translator, text, n):

      if text == "" or text == '\n':
          return text

      text = text.rstrip('\n')
      if re.match(r"^[0-9]+$", text):
          return self.add_newline_if_missing(text)

      if re.match(r"\d{2}:\d{2}:\d{2},\d{3}\s-->\s\d{2}:\d{2}:\d{2},\d{3}", text):
          return self.add_newline_if_missing(text)

      return self.add_newline_if_missing(translator.translate(text=text, dest=self.to_language).text)

  def add_newline_if_missing(self, s):
      s = str(s)
      if not s.endswith('\n'):
          s += '\n'
      return s

  def translate_task(self, lines, translator_fun, result_map, i, translator):
      print("thread id: ", i, "lines num: ", len(lines))
      result_map[i] = [translator_fun(translator, line, n) for n, line in enumerate(lines)]

  def translate_file(self, translator_fun, file1, file2, thread_nums, translator=None):
      with open(file1, 'r', encoding='utf-8') as f1, open(file2, 'w', encoding='utf-8') as f2:
          lines = f1.readlines()
          print("translate file total lines: ", len(lines))
          result = self.get_translate_result(lines, thread_nums, translator, translator_fun)
          f2.writelines(result)
          print("\ntranslate write file done")

  def get_translate_result(self, lines, thread_nums, translator, translator_fun):
      result_map = self.get_translate_threads_result(lines, thread_nums, translator, translator_fun)
      result = []
      for key in sorted(result_map):
          result.extend(result_map.get(key))
      return result

  def get_translate_threads_result(self, lines, thread_nums, translator, translator_fun):
      result_map = {}
      threads = []
      n = len(lines) // thread_nums
      for i in range(1, thread_nums + 1):
          threads.append(
              threading.Thread(
                  target=self.translate_task,
                  args=(self.get_split_lines(i, lines, n, thread_nums), translator_fun, result_map, i, translator)
              )
          )
      for thread in threads:
          thread.start()
      for thread in threads:
          thread.join()
      return result_map

  def get_split_lines(self, i, lines, n, thread_nums):
      if n * i <= len(lines):
          split_line = lines[(i - 1) * n:i * n]
      else:
          split_line = lines[(i - 1) * n:]
      if i == thread_nums and n * i < len(lines):
          split_line = lines[(i - 1) * n:]
      return split_line

  def translate(self,thread_nums=2):
      #translator = Translator(from_lang=form, to_lang=to)
      translator = Translator()
      en_srt = f"{self.filename}_en.srt"
      ko_srt = f"{self.filename}_ko.srt"
      self.translate_file(self.__translate, en_srt, ko_srt, thread_nums, translator)


## 모델 로드

In [10]:
# @title
print("모델 로딩중...")
model = whisper.load_model(model_size)

모델 로딩중...


100%|████████████████████████████████████████| 139M/139M [00:01<00:00, 113MiB/s]


## 실행코드

In [11]:
# @title
from torch import e
import ffmpeg
import os


if language == "English":
  from_language = "en"
else:
  from_language = "ko"

if merge_language == "English":
  merge_language = "en"
else:
  merge_language = "ko"

if youtube_url:

  print("유튜브 영상 다운로드 진행중 ...")
  downloader = YoutubeDownloader(youtube_url)
  downloader.download()

  print("음성 추출 진행중 ...")
  transcriber = Transcriber(model=model, downloader=downloader, language=from_language)
  transcriber.transcribe()

  print("자막 생성 진행중 ...")
  srtmaker = SubtileMaker(transcriber.transcribed_result, downloader=downloader, language=from_language)
  srtmaker.execute()

  if is_translate == "Yes":
    print("번역 진행중 ...")
    translator = SubtitleTranslator(downloader)
    translator.translate(2)

  if is_merge == "Yes":
    print("영상에 자막 추가 하는중 ...")
    video = ffmpeg.input(downloader.video_name)
    audio = video.audio
    subtitle_video_path = f"{downloader.filename}_subtitle.mp4"
    subtitle_path = f"{downloader.filename}_{merge_language}.srt"
    ffmpeg.concat(
      video.filter('subtitles', subtitle_path, force_style="OutlineColour=&H40000000,BorderStyle=3"),
      audio,
      v=1,
      a=1
    ).output(subtitle_video_path).run(quiet=True, overwrite_output=True)

    print("영상 다운로드중 ...")
    files.download(subtitle_video_path)
  else:
    subtitle_en_path = f"{downloader.filename}_en.srt"
    subtitle_ko_path = f"{downloader.filename}_ko.srt"
    files.download(downloader.video_name)
    if os.path.isfile(subtitle_en_path):
      files.download(subtitle_en_path)
    if os.path.isfile(subtitle_ko_path):
      files.download(subtitle_ko_path)
else:
  print("유튜브 URL을 입력하세요.")


유튜브 영상 다운로드 진행중 ...
음성 추출 진행중 ...
자막 생성 진행중 ...
번역 진행중 ...
translate file total lines:  612
thread id:  1 lines num:  306
thread id:  2 lines num:  306

translate write file done
영상에 자막 추가 하는중 ...
영상 다운로드중 ...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>