In [1]:
import requests
from bs4 import BeautifulSoup

url = "https://www.youtube.com/watch?v=sTadZA-uYfA"

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

title = soup.title.string.replace(" - YouTube", "").strip()

In [2]:
import re

# get video title name
def clean_title(text):
    return re.sub(r'[^0-9a-zA-Z\u0E00-\u0E7F\.]', '', text)
title = clean_title(text=title)

# get video id
match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
if match:
    video_id = match.group(1)

In [3]:
from youtube_transcript_api import YouTubeTranscriptApi

# Fetch transcript (auto-captions or uploaded)
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['th', 'en'])

# Optionally, save to file
file_name = f'{title}_{video_id}'
with open(f"{file_name}_subtitle.txt", "w", encoding="utf-8") as f:
    for entry in transcript:
        f.write(f"{entry['start']:.2f}s: {entry['text']}\n")
        # f.write(f"{entry['text']}")


In [4]:
title = file_name.split('_')[0]

In [5]:
# Read file content
with open(f"{file_name}_subtitle.txt", "r", encoding="utf-8") as f:
    text_content = f.read()

In [8]:
import ollama

user_prompt = f"""
You are an expert summarizer.
Your task is to summarize the given YouTube video subtitle into clear, structured topics with as much important detail as possible.

**Video Title:** {title}

**Subtitle Text (may include timestamps):**
{text_content}

**Instructions:**
- Ignore timestamps if present.
- Identify all main topics or sections discussed in the video.
- For each topic, write a clear **topic heading** and detailed bullet points (include sub-points if needed).
- Remove filler words and repetitions.
- Combine related lines into clear, complete thoughts.
- Keep the style clear and easy to read.
- Cover ALL important points in the subtitle — do not skip parts.
- Finish with a short **Conclusion** section that summarizes the overall message in 2–4 sentences.
- ⚠️ **Important:** DO NOT add any introduction, opening phrase, or explanation before the first topic. Start the output IMMEDIATELY with the first topic heading.
- The first line must be EXACTLY in this format:
  **Topic 1: [Title]**
- Never write any other sentence before that line.

**Format Example:**

**Topic 1: Investment Strategy**
- Point 1
- Point 2
  - Sub-point

**Topic 2: Earnings Per Share (EPS)**
- Point 1
...

**Conclusion**
Short final summary.
"""

system_prompt = """
You are a helpful, precise summarizer.
Your role is to read YouTube subtitles and produce an accurate, detailed summary with clear topic headings and bullet points.
Ignore timestamps and filler words.
NEVER write an introduction or any opening sentence. Only output the formatted summary.
Always start DIRECTLY with the first topic heading: **Topic 1: [Title]**
Your summaries must be detailed, well-organized, and cover all key points.
Always end with a short conclusion.
"""

response = ollama.chat(
    model="llama3",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
)

summary = response["message"]["content"]

print(summary)

Here is the summarized version of the YouTube video subtitle:

**Topic 1: Investment Strategy**
• The speaker emphasizes the importance of understanding fundamental analysis when investing in stocks, rather than relying on market trends or emotions.
• They encourage investors to focus on long-term growth and ignore short-term fluctuations.
• A key takeaway from this topic is that a good investment strategy should be based on thorough research and a deep understanding of the company's financials.

**Topic 2: Earnings Per Share (EPS)**
• The speaker discusses the importance of EPS in evaluating a stock's potential for growth.
• They provide examples of how EPS can be used to identify undervalued or overvalued stocks, highlighting the need for thorough analysis and patience when investing.
• A key point made is that investors should consider multiple factors beyond just EPS, such as company fundamentals, market trends, and industry outlook.

**Topic 3: Healthcare Industry**
• The speaker 

เอาสิ่งที่สรุปได้ไปหากูเกิลเพื่อทำ research อีกที

In [9]:
summary_text = response["message"]["content"]

# Save to a .txt file
with open(f"{file_name}_summary.txt", "w", encoding="utf-8") as file:
    file.write(summary_text)