In [8]:
import requests
import pandas as pd
import re
import json
import time


In [9]:
# URL dan header untuk permintaan HTTP
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
HEADERS = {
    "User-Agent": USER_AGENT,
}

In [10]:
def get_initial_data(video_url):
    """Mengambil data awal dari HTML video untuk mendapatkan token komentar."""
    response = requests.get(video_url, headers=HEADERS)
    if response.status_code != 200:
        raise Exception("Gagal mengakses halaman video.")
    return response.text

def extract_comments_data(html):
    """Mengambil token awal dari halaman HTML untuk permintaan komentar pertama."""
    try:
        token = re.search(r'"continuation":"(.*?)"', html).group(1)
        session_id = re.search(r'"INNERTUBE_CONTEXT_CLIENT_VERSION":"(.*?)"', html).group(1)
    except AttributeError:
        raise Exception("Gagal menemukan token atau session ID.")
    return token, session_id

def get_comments(video_url, max_comments=100):
    """Mengambil komentar dari video YouTube hingga mencapai jumlah maksimal atau habis."""
    html = get_initial_data(video_url)
    token, session_id = extract_comments_data(html)

    comments, count = [], 0
    while token and count < max_comments:
        ajax_url = f"https://www.youtube.com/youtubei/v1/next?key={session_id}"
        params = {
            "continuation": token,
            "context": {
                "client": {
                    "clientName": "WEB",
                    "clientVersion": session_id,
                }
            }
        }
        
        response = requests.post(ajax_url, headers=HEADERS, json=params)
        if response.status_code != 200:
            break

        data = response.json()
        items = data.get("onResponseReceivedEndpoints", [])[0].get("appendContinuationItemsAction", {}).get("continuationItems", [])
        
        for item in items:
            comment = item.get("commentThreadRenderer", {}).get("comment", {}).get("commentRenderer", {}).get("contentText", {}).get("runs", [{}])[0].get("text")
            if comment:
                comments.append(comment)
                count += 1
            if count >= max_comments:
                break

        # Cek apakah ada token kelanjutan untuk mengambil komentar lebih lanjut
        token = next(
            (i["continuationItemRenderer"]["continuationEndpoint"]["continuationCommand"]["token"]
             for i in items if "continuationItemRenderer" in i),
            None
        )
        time.sleep(0.5)  # Jeda untuk mencegah batasan akses YouTube

    return comments

def save_to_csv(comments, filename="youtube_comments.csv"):
    """Menyimpan komentar ke file CSV."""
    df = pd.DataFrame(comments, columns=["Comment"])
    df.to_csv(filename, index=False)
    print(f"Komentar berhasil disimpan di {filename}")




In [11]:
# Contoh penggunaan
video_url = "https://www.youtube.com/watch?v=i6IOiUi6IYY"  # Ganti VIDEO_ID dengan ID video yang diinginkan
comments = get_comments(video_url, max_comments=100)
save_to_csv(comments)

Exception: Gagal menemukan token atau session ID.