In [None]:
from typing import Optional, List
import os
import json
import googleapiclient.discovery

In [None]:
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'
with open('api-key.txt') as f:
    API_KEY = f.read()
    
CHANNEL_URL_PREFIX = 'http://www.youtube.com/channel/'

def id2channel_url(id):
    return CHANNEL_URL_PREFIX + id

In [None]:
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
youtube = googleapiclient.discovery.build(
    API_SERVICE_NAME,
    API_VERSION,
    developerKey=API_KEY
)

In [None]:
class YoutubersComment:
    def __init__(self, author, comment):
        self.author = author
        self.comment = comment
    
    def format_for_csv(self):
        return ','.join([
            self.comment['snippet']['publishedAt'],
            self.comment['snippet']['videoId'] if 'videoId' in self.comment['snippet'] else '',
            self.comment['snippet']['parentId'] if 'parentId' in self.comment['snippet'] else '',
            self.author['display_name'],
            self.comment['snippet']['textDisplay']
        ])

In [None]:
with open('youtubers.json') as f:
    content = json.load(f)
    youtubers = content['youtubers']
    main_youtuber = youtubers[content['main']]

MAIN_YOUTUBER_CHANNEL_ID = main_youtuber['id']

In [None]:
next_page_token_filename = 'output/next-page-token.txt'

def get_next_page_token() -> Optional[str]:
    with open(next_page_token_filename, 'r') as f:
        token = f.read()
        return token if len(token) > 0 else None

def set_next_page_token(token: Optional[str]):
    with open(next_page_token_filename, 'w') as f:
        f.write(token if token else '')

In [None]:
# Returns the nextPageToken if one exists
def fetch_comment_threads(page_token=None):
    request = youtube.commentThreads().list(
        part='replies,snippet',
        allThreadsRelatedToChannelId=MAIN_YOUTUBER_CHANNEL_ID,
        textFormat='plainText',
        pageToken=page_token
    )
    response = request.execute()
    return response['items'], response['nextPageToken'] if 'nextPageToken' in response else None

def fetch_replies(thread):
    reply_count = thread['snippet']['totalReplyCount']
    if reply_count == 0:
        return []
    if 'replies' in thread and reply_count == len(thread['replies']['comments']):
        return thread['replies']['comments']
    request = youtube.comments().list(
        part='snippet',
        parentId=thread['snippet']['topLevelComment']['id']
    )
    response = request.execute()
    return response['items'] # Read only the first page assuming there are not so many long threads

def filter_youtubers_comments(comments) -> List[YoutubersComment]:
    youtubers_comments = []
    for comment in comments:
        for name, data in youtubers.items():
            if comment['snippet']['authorChannelUrl'] == id2channel_url(data['id']):
                youtubers_comments.append(YoutubersComment(data, comment))
                break
    return youtubers_comments

def output_youtubers_comments(comments: List[YoutubersComment]):
    with open('output/comments.csv', 'a') as f:
        for comment in comments:
            f.write(comment.format_for_csv() + "\n")

In [None]:
# Returns the nextPageToken if one exists
def handle_comment_threads_on_single_page(page_token=None) -> Optional[str]:
    threads, next_page_token = fetch_comment_threads(page_token)
    comments = []
    for thread in threads:
        comments.append(thread['snippet']['topLevelComment'])
        comments.extend(fetch_replies(thread))
    output_youtubers_comments(filter_youtubers_comments(comments))
    
    return next_page_token

In [None]:
next_page_token = get_next_page_token()

# Stops when the daily quota limit is reached or no nextPageToken exists
while True:
    next_page_token = handle_comment_threads_on_single_page(next_page_token)
    set_next_page_token(next_page_token)
    if not next_page_token:
        break