## Import Library

In [12]:
import pandas as pd
import csv
from googleapiclient.discovery import build
import sqlalchemy

## API Key

In [13]:
#Ganti dengan API Key Anda
API_KEY = 'AIzaSyAGF6sBJNOQIhhSuRmjw_tYy4jkvXadpgA'

# Inisialisasi YouTube API
youtube = build('youtube', 'v3', developerKey=API_KEY)

## Crawl Data

In [14]:
# Fungsi untuk mencari video berdasarkan kata kunci
def search_videos(query, max_results=50):
    video_ids = []
    request = youtube.search().list(
        part='id',
        q=query,
        type='video',
        maxResults=max_results,
        order='relevance'
    )

    response = request.execute()

    for item in response['items']:
        if item['id']['kind'] == 'youtube#video':
            video_ids.append(item['id']['videoId'])

    return video_ids

# Fungsi untuk mendapatkan komentar video
def get_video_comments(video_id, max_comments=500):
    comments = []
    try:
        request = youtube.commentThreads().list(
            part='snippet,replies',
            videoId=video_id,
            textFormat='plainText',
            maxResults=100
        )
        response = request.execute()

        while response:
            for item in response['items']:
                # Komentar utama
                top_comment = item['snippet']['topLevelComment']['snippet']
                comment_info = {
                    'video_id': video_id,
                    'created_at': top_comment['publishedAt'],
                    'author_name': top_comment['authorDisplayName'],
                    'full_text': top_comment['textDisplay'],
                    'like_count': top_comment['likeCount']
                }
                comments.append(comment_info)

                # Cek untuk balasan
                if 'replies' in item:
                    for reply in item['replies']['comments']:
                        reply_info = {
                            'video_id': video_id,
                            'created_at': reply['snippet']['publishedAt'],
                            'author_name': reply['snippet']['authorDisplayName'],
                            'full_text': reply['snippet']['textDisplay'],
                            'like_count': reply['snippet']['likeCount']
                        }
                        comments.append(reply_info)

                # Hentikan jika komentar sudah mencapai target
                if len(comments) >= max_comments:
                    return comments

            # Paginasi
            if 'nextPageToken' in response:
                response = youtube.commentThreads().list(
                    part='snippet,replies',
                    videoId=video_id,
                    pageToken=response['nextPageToken'],
                    textFormat='plainText',
                    maxResults=100
                ).execute()
            else:
                break
    except Exception as e:
        print(f"Error fetching comments for video {video_id}: {e}")

    return comments

## YT Searching

In [15]:
# Kata kunci pencarian
search_keyword = 'technology'
max_results = 50  # Jumlah maksimum video yang diambil

# Mencari video
video_ids = search_videos(search_keyword, max_results)

# Mengumpulkan komentar hingga 500 komentar
all_comments = []
for video_id in video_ids:
    if len(all_comments) >= 500:
        break
    comments_data = get_video_comments(video_id, max_comments=500 - len(all_comments))
    all_comments.extend(comments_data)

# Konversi ke DataFrame
df = pd.DataFrame(all_comments)

## Import to Database & Check

In [16]:
# Konversi tipe data
df['like_count'] = pd.to_numeric(df['like_count'], errors='coerce', downcast='integer')
df['created_at'] = pd.to_datetime(df['created_at'])

# Simpan ke CSV
df.to_csv('youtube_comments.csv', index=False)
print(f"Data komentar disimpan ke 'youtube_comments.csv'. Jumlah komentar: {len(df)}")

# Simpan ke Database
DATABASE_URI = 'sqlite:///youtube_comments.db'
engine = sqlalchemy.create_engine(DATABASE_URI)

# Simpan DataFrame ke database
df.to_sql('comments', con=engine, if_exists='replace', index=False)
print("Data komentar ditambahkan ke database.")

Data komentar disimpan ke 'youtube_comments.csv'. Jumlah komentar: 500
Data komentar ditambahkan ke database.
