# **1. Imports**

In [1]:
from googleapiclient.discovery import build
from transformers import pipeline
sentiment_pipeline = pipeline('sentiment-analysis')
import nltk
nltk.download('punkt')
import torch

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# **2. Youtube comment crawling**

In [3]:
def create_youtube_client(api_key):
    """YouTube API 클라이언트 생성합니다."""
    return build('youtube', 'v3', developerKey=api_key)

def get_video_id(video_address):
    """비디오 주소에서 비디오 ID를 추출합니다."""
    return video_address.split('=')[-1]

def get_video_comments(youtube, video_id, max_results=100):
    """비디오의 댓글을 가져옵니다."""
    comments = []
    next_page_token = None

    while True:
        response = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            maxResults=max_results,
            pageToken=next_page_token,
            textFormat='plainText'
        ).execute()
        
        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)
        
        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break

    return comments

def print_comments(comments):
    """댓글을 출력합니다."""
    for i, comment in enumerate(comments, 1):
        print(f"{i}. {comment}")

## **3. Sentiment Analysis**

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sentiment_pipeline = pipeline('sentiment-analysis')

# 감정 분석 함수
def analyze_sentiment(text):
    """주어진 텍스트의 감정을 분석합니다."""
    result = sentiment_pipeline(text)
    sentiment = result[0]
    label = sentiment['label']
    score = sentiment['score']
    return {'label': label, 'score': score}

def split_long_text(text, max_length=64):
    """긴 텍스트를 잘라서 반환합니다."""
    tokens = nltk.word_tokenize(text)
    for i in range(0, len(tokens), max_length):
        yield ' '.join(tokens[i:i + max_length])

def classify_comments(comments):
    """댓글을 분석하고 긍정적 또는 부정적으로 분류합니다."""
    good_comments = []
    bad_comments = []
    
    for comment in comments:
        for piece in split_long_text(comment):
            sentiment = analyze_sentiment(piece)
            
            if sentiment['label'] == 'POSITIVE' and sentiment['score'] > 0.7:
                good_comments.append(comment)
            elif sentiment['label'] == 'NEGATIVE' and sentiment['score'] > 0.7:
                bad_comments.append(comment)

    return good_comments, bad_comments

def print_results(comments, good_comments, bad_comments):
    """결과를 출력합니다."""
    good_count = len(good_comments)
    bad_count = len(bad_comments)
    
    print(f"Total comments: {len(comments)}")
    print(f"Good comments: {good_count}")
    print(f"Bad comments: {bad_count}")

    if good_count > bad_count:
        print("Overall sentiment is positive.")
    elif bad_count > good_count:
        print("Overall sentiment is negative.")
    else:
        print("Overall sentiment is neutral.")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


# **4. 실행**

In [None]:
def main(api_key, video_address):
    """메인 함수: 댓글을 크롤링하고 감정 분석을 수행합니다."""
    youtube = create_youtube_client(api_key)
    video_id = get_video_id(video_address)
    comments = get_video_comments(youtube, video_id)
    
    print_comments(comments)
    
    good_comments, bad_comments = classify_comments(comments)
    print_results(comments, good_comments, bad_comments)

if __name__ == "__main__":
    api_key = 'my api'
    video_address = 'youtube_video_address'
    
    main(api_key, video_address)