<a href="https://colab.research.google.com/github/starirene9/DeepLearningAssignment/blob/main/2%EC%A1%B0_7_8%EC%A3%BC%EC%B0%A8_%EA%B3%BC%EC%A0%9C_K2024504.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

        ## 사용 방법 안내 ✨

        - 기본으로 제공된 1분짜리 YouTube URL이 입력되어 있습니다.
        - 다른 URL을 입력하면 새로운 동영상 다운로드 및 자막 추출이 가능합니다.
        - **1️⃣ 다운로드 & 자막 추출** 버튼을 누르면
            - 동영상 재생 화면
            - 자막 원문 텍스트
            - 시간별로 분리된 자막 테이블 (클릭 시 해당 구간 오디오 재생)
            - 자막 파일 미리보기, 비디오 파일 미리보기가 생성됩니다.
        - 다운로드가 완료되면 **자막 다운로드** 버튼과 **비디오 다운로드** 버튼이 활성화됩니다.
        - **2️⃣ Gemini로 요약하기** 버튼을 누르면:
            - `gemini-2.0-flash-001` 모델을 사용하여 자막을 간결하게 요약해줍니다.

In [1]:
# 설치
!pip install git+https://github.com/openai/whisper.git
!pip install pydub
!pip install pytubefix
!pip install --upgrade gradio
!pip install --upgrade google-genai
!pip install moviepy

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-eqy5ot_b
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-eqy5ot_b
  Resolved https://github.com/openai/whisper.git to commit 517a43ecd132a2089d85f4ebc044728a71d49f6e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper==20240930)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper==20240930)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->openai-whisper==20240930)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-

In [2]:
from pytubefix import YouTube
from pydub import AudioSegment
import gradio as gr
from IPython.display import Audio, Video, display
import os, tempfile
os.makedirs(os.path.join(tempfile.gettempdir(), "gradio"), exist_ok=True)
import whisper

from google import genai
from google.genai.types import GenerateContentConfig, HttpOptions

In [3]:
# ────────────────────────────────────────────────
# 2. 모델 생성
# ────────────────────────────────────────────────

# Whisper 모델
whisper_model = whisper.load_model("small")


100%|███████████████████████████████████████| 461M/461M [00:19<00:00, 24.8MiB/s]


In [4]:
# ────────────────────────────────────────────────
# 3. 기능 함수
# ────────────────────────────────────────────────

def make_subtitle_segments(result):
    """Whisper 자막 결과(result)에서 start, end, text를 추출해서 리스트로 변환."""
    subtitles = []
    for seg in result["segments"]:
        start = round(seg['start'], 2)
        end = round(seg['end'], 2)
        text = seg['text'].strip()
        subtitles.append((start, end, text))
    return subtitles

In [5]:
def _sec_to_srt_time(t: float) -> str:
    ms = int(round((t - int(t)) * 1000))
    total_sec = int(t)
    h = total_sec // 3600
    m = (total_sec % 3600) // 60
    s = total_sec % 60
    return f"{h:02}:{m:02}:{s:02},{ms:03}"

def write_srt(segments, srt_path):
    with open(srt_path, "w", encoding="utf-8") as f:
        for i, (start, end, text) in enumerate(segments, 1):
            f.write(
                f"{i}\n"
                f"{_sec_to_srt_time(start)} --> {_sec_to_srt_time(end)}\n"
                f"{text}\n\n"
            )

In [6]:
def download_youtube(url: str):
    """유튜브 URL에서 비디오·오디오·자막(.txt)·SRT 추출."""
    yt = YouTube(url)
    stream = yt.streams.filter(progressive=True,
                               file_extension="mp4").order_by("resolution").first()
    if stream is None:
        raise ValueError("다운로드할 수 있는 영상이 없습니다!")

    download_folder = "./"
    video_path   = os.path.join(download_folder, "video.mp4")
    temp_video   = os.path.join(download_folder, "_temp_video.mp4")
    audio_path   = os.path.join(download_folder, "audio.mp3")
    txt_path     = os.path.join(download_folder, "subtitle.txt")
    srt_path     = os.path.join(download_folder, "subtitle.srt")

    # 1. 비디오 다운로드
    stream.download(output_path=download_folder, filename=os.path.basename(temp_video))
    os.replace(temp_video, video_path)

    # 2. 오디오 추출
    AudioSegment.from_file(video_path).export(audio_path, format="mp3")

    # 3. Whisper STT
    result = whisper_model.transcribe(audio_path)

    transcript = result["text"].strip()
    segments   = make_subtitle_segments(result)   # list[(start,end,text)]

    # 4. 자막 저장
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write(transcript)

    write_srt(segments, srt_path)

    # 5. 호출자에게 반환
    return video_path, audio_path, txt_path, srt_path, transcript, segments


In [7]:
# 빠른 요약용
MAX_CHUNK_CHARS = 12000

def gemini_summary(transcript: str, api_key: str, max_sentences: int = 7) -> str:

    if not transcript:
        return "❗ 자막이 비어 있습니다."

    if len(transcript) > MAX_CHUNK_CHARS:
        half = MAX_CHUNK_CHARS // 2
        transcript = transcript[:half] + "\n…\n" + transcript[-half:]

    prompt = (
        f"당신은 유튜브 자막 요약가입니다.\n"
        f"아래 자막을 기반으로 핵심 내용을 5~{max_sentences}문장 이내로 요약해 주세요.\n"
        f"자막에 없는 내용은 추가하거나 창작하지 마세요.\n"
        f"요약 결과는 반드시 한국어로 작성하세요.\n\n"
        f"{transcript}"
    )

    try:
        client = genai.Client(api_key=api_key, http_options=HttpOptions(api_version="v1"))

        response = client.models.generate_content(
            model="gemini-2.0-flash-001",
            contents=prompt,
        )
        if response.text is None:
            return "❗ Gemini 응답에 요약 결과가 없습니다."

        return response.text.strip()

    except Exception as e:
        return f"🚨 Gemini API 호출 실패: {type(e).__name__}: {e}"

In [9]:
# ────────────────────────────────────────────────
# 4. Gradio 콜백
# ────────────────────────────────────────────────
def cb_download(url: str):
    video_path, audio_path, txt_path, srt_path, transcript, segments = download_youtube(url)

    return (
    (video_path, srt_path),
    transcript,
    txt_path,
    gr.update(value=txt_path, visible=True),
    video_path,
    gr.update(value=video_path, visible=True),
    segments,
    video_path
    )


In [10]:
def cb_summary(transcript: str | None, api_key: str | None):
    if not transcript or not transcript.strip():
        return gr.update(value="❗ 먼저 1️⃣ 버튼으로 자막 추출 완료 후, 2️⃣ 버튼을 누르세요.", visible=True)
    if not api_key:
        return gr.update(value="❗ Gemini API 키를 입력해 주세요.", visible=True)
    try:
        summary = gemini_summary(transcript, api_key)
    except Exception as e:
        return gr.update(value=f"🚨 Gemini 오류: {type(e).__name__}: {e}", visible=True)
    return gr.update(value=summary, visible=True)

In [11]:
from moviepy.editor import VideoFileClip

def cb_play_segment(evt: gr.SelectData, segments, video_path):
    idx = evt.index[0]                                  # 선택 행 인덱스

    # segments 는 DataFrame (gr.Dataframe의 값) → 리스트/시리즈 대응
    start = float(segments.iloc[idx, 0]) if hasattr(segments, "iloc") else segments[idx][0]
    end   = float(segments.iloc[idx, 1]) if hasattr(segments, "iloc") else segments[idx][1]

    clip_path = f"temp_{start}_{end}.mp4"

    # 구간 자르기
    (
        VideoFileClip(video_path)
        .subclip(start, end)
        .write_videofile(
            clip_path,
            codec="libx264",
            audio_codec="aac",
            temp_audiofile="temp-audio.m4a",
            remove_temp=True,
            logger=None
        )
    )

    # 새 클립 재생
    return gr.update(value=clip_path, autoplay=True)

  if event.key is 'enter':



In [None]:
# ────────────────────────────────────────────────
# 5. UI 구성
# ────────────────────────────────────────────────

with gr.Blocks() as demo:
    gr.Markdown("# 🎥 YouTube 동영상 다운로드 · 자막 추출 · Gemini 요약")

    api_key_input = gr.Textbox(label="🔑 Gemini API Key 입력", placeholder="여기에 Gemini API Key를 입력하세요", type="password")

    url_in = gr.Textbox(label="YouTube URL", value="https://www.youtube.com/watch?v=TAx_QgQmAIw")
    btn_dl = gr.Button("1️⃣ 다운로드 & 자막 추출")

    with gr.Row():
        video_out = gr.Video(label="동영상", height=460)
        subtitle_out = gr.Textbox(label="자막 (원문)", lines=20, interactive=False, value="")

    with gr.Row():
        subtitle_table = gr.Dataframe(headers=["Start (sec)", "End (sec)", "Text"], datatype=["number", "number", "str"], interactive=False)

    with gr.Row():
        video_player = gr.Video(label="🎬 선택 구간 재생", height=240, autoplay=True)

    with gr.Row():
        subtitle_file = gr.File(label="자막 보기 (파일 미리보기)", value=None)
        video_file = gr.File(label="비디오 보기 (파일 미리보기)", value=None)

    with gr.Row():
        subtitle_download = gr.DownloadButton(label="자막 다운로드", value=None, visible=False)
        video_download = gr.DownloadButton(label="비디오 다운로드", value=None, visible=False)

    btn_sum = gr.Button("2️⃣ Gemini로 요약하기")
    summary_out = gr.Textbox(label="요약", lines=6)
    video_path_state = gr.State()

    btn_dl.click(
        cb_download,
        inputs=[url_in],
        outputs=[video_out, subtitle_out, subtitle_file, subtitle_download, video_file, video_download, subtitle_table, video_path_state],
    )

    btn_sum.click(
        cb_summary,
        inputs=[subtitle_out, api_key_input],
        outputs=[summary_out],
    )

    subtitle_table.select(
        fn=cb_play_segment,
        inputs=[subtitle_table, video_path_state],
        outputs=[video_player]
    )

# ────────────────────────────────────────────────
# 6. 실행
# ────────────────────────────────────────────────

if __name__ == "__main__":
    demo.launch(debug=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://c8eeb598b4c621d6f3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)



