# Scrape Youtube Comments

![](https://i.postimg.cc/x1Xv7xzY/ytb-scrape.jpghttps://i.postimg.cc/x1Xv7xzY/ytb-scrape.jpg)

#### This project will demonstrate how to scrape youtube comment for free without using Google API.

### GitHub Repo : [https://github.com/ahmedshahriar/youtube-comment-scraper](https://github.com/ahmedshahriar/youtube-comment-scraper) 

# Libraries & Configuration

In [44]:
"""
By default the below script will download most recent 100 comments of a youtube video
You can change the default filter (line 30 onwards)

Variables :
COMMENT_LIMIT : How many comments you want to download 
SORT_BY_POPULAR : filter comments by popularity (0 for True , 1 for false)
SORT_BY_RECENT : filter comments by recently posted (0 for True , 1 for false)
"""

import pandas as pd
import json
import os
import sys
import re
import time

import requests

# pandas dataframe display configuration
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

YOUTUBE_COMMENTS_AJAX_URL = 'https://www.youtube.com/comment_service_ajax'

USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
# csv file name
FILE_NAME = 'ytb_comments.csv'

# set parameters
# filter comments by popularity or recent, 0:False, 1:True
SORT_BY_POPULAR = 0
# default recent False, change to 1 to download latest comments
SORT_BY_RECENT = 0
# set comment limit
COMMENT_LIMIT = 100000

YT_CFG_RE = r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;'
YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;\s*(?:var\s+meta|</script|\n)'

## Utility Function

In [45]:
def regex_search(text, pattern, group=1, default=None):
    match = re.search(pattern, text)
    return match.group(group) if match else default


def ajax_request(session, endpoint, ytcfg, retries=5, sleep=20):
    url = 'https://www.youtube.com' + endpoint['commandMetadata']['webCommandMetadata']['apiUrl']
    
    data = {'context': ytcfg['INNERTUBE_CONTEXT'],
            'continuation': endpoint['continuationCommand']['token']}

    for _ in range(retries):
        response = session.post(url, params={'key': ytcfg['INNERTUBE_API_KEY']}, json=data)
        if response.status_code == 200:
            return response.json()
        if response.status_code in [403, 413]:
            return {}
        else:
            time.sleep(sleep)

def download_comments(YOUTUBE_VIDEO_URL, sort_by=SORT_BY_RECENT, language=None, sleep=0.1):
    session = requests.Session()
    session.headers['User-Agent'] = USER_AGENT
    response = session.get(YOUTUBE_VIDEO_URL)

    if 'uxe=' in response.request.url:
        session.cookies.set('CONSENT', 'YES+cb', domain='.youtube.com')
        response = session.get(YOUTUBE_VIDEO_URL)

    html = response.text
    ytcfg = json.loads(regex_search(html, YT_CFG_RE, default=''))
    if not ytcfg:
        return # Unable to extract configuration
    if language:
        ytcfg['INNERTUBE_CONTEXT']['client']['hl'] = language

    data = json.loads(regex_search(html, YT_INITIAL_DATA_RE, default=''))

    section = next(search_dict(data, 'itemSectionRenderer'), None)
    renderer = next(search_dict(section, 'continuationItemRenderer'), None) if section else None
    if not renderer:
        # Comments disabled?
        return

    needs_sorting = sort_by != SORT_BY_POPULAR
    continuations = [renderer['continuationEndpoint']]
    while continuations:
        continuation = continuations.pop()
        response = ajax_request(session, continuation, ytcfg)

        if not response:
            break
        if list(search_dict(response, 'externalErrorMessage')):
            raise RuntimeError('Error returned from server: ' + next(search_dict(response, 'externalErrorMessage')))

        if needs_sorting:
            sort_menu = next(search_dict(response, 'sortFilterSubMenuRenderer'), {}).get('subMenuItems', [])
            if sort_by < len(sort_menu):
                continuations = [sort_menu[sort_by]['serviceEndpoint']]
                needs_sorting = False
                continue
            raise RuntimeError('Failed to set sorting')

        actions = list(search_dict(response, 'reloadContinuationItemsCommand')) + \
                  list(search_dict(response, 'appendContinuationItemsAction'))
        for action in actions:
            for item in action.get('continuationItems', []):
                if action['targetId'] == 'comments-section':
                    # Process continuations for comments and replies.
                    continuations[:0] = [ep for ep in search_dict(item, 'continuationEndpoint')]
                if action['targetId'].startswith('comment-replies-item') and 'continuationItemRenderer' in item:
                    # Process the 'Show more replies' button
                    continuations.append(next(search_dict(item, 'buttonRenderer'))['command'])

        for comment in reversed(list(search_dict(response, 'commentRenderer'))):
            yield {'cid': comment['commentId'],
                   'text': ''.join([c['text'] for c in comment['contentText'].get('runs', [])]),
                   'time': comment['publishedTimeText']['runs'][0]['text'],
                   'author': comment.get('authorText', {}).get('simpleText', ''),
                   'channel': comment['authorEndpoint']['browseEndpoint'].get('browseId', ''),
                   'votes': comment.get('voteCount', {}).get('simpleText', '0'),
                   'photo': comment['authorThumbnail']['thumbnails'][-1]['url'],
                   'heart': next(search_dict(comment, 'isHearted'), False)}

        time.sleep(sleep)

def search_dict(partial, search_key):
    stack = [partial]
    while stack:
        current_item = stack.pop()
        if isinstance(current_item, dict):
            for key, value in current_item.items():
                if key == search_key:
                    yield value
                else:
                    stack.append(value)
        elif isinstance(current_item, list):
            for value in current_item:
                stack.append(value)

In [46]:
def main(url):
    """
    This function will save the comments into a dataframe and output a csv file
    By default, it will append the comments in csv, not overwriting them, change it in line 34
    To preview the comments in json, uncomment the lines - 22 and 23
    """
    df_comment = pd.DataFrame()
    try:
        youtube_url = url
        limit = COMMENT_LIMIT

        print('Downloading Youtube comments for video:', youtube_url)

        count = 0

        start_time = time.time()

        for comment in download_comments(youtube_url):

            df_comment = df_comment.append(comment, ignore_index=True)
            print(df_comment)

            # comments overview in json
            # comment_json = json.dumps(comment, ensure_ascii=False)
            # print(comment_json)

            count += 1

#             if limit and count >= limit:
#                 break

#         print("DataFrame Shape: ",df_comment.shape,"\nComment DataFrame: ")
#         display(df_comment)

#         if not os.path.isfile(FILE_NAME):
#             df_comment.to_csv(FILE_NAME, encoding='utf-8', index=False)
#         else:  # else it exists so append without writing the header
#             df_comment.to_csv(FILE_NAME, mode='a', encoding='utf-8', index=False, header=False)

        print('\n[{:.2f} seconds] Done!'.format(time.time() - start_time))

    except Exception as e:
        print('Error:', str(e))
        sys.exit(1)

# Save Comments

## Single Video Link

In [48]:
# dumping youtube comments

""" 
Dump comments to a csv  from a single video

"""
# youtube_URL = 'https://youtu.be/A7yHrPoZ2mE'
youtube_URL = 'https://www.youtube.com/watch?v=VmEU22NYkEs'
main(youtube_URL)

Downloading Youtube comments for video: https://www.youtube.com/watch?v=VmEU22NYkEs


  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)


                          cid                                 text        time        author                   channel     votes                                              photo  heart
0  UgyBuCZcWIAOIfwiVkJ4AaABAg  2.5 million views, say MashALLAH :)  3 سال پہلے  ITHADchannel  UCEegFU8HQJsTugHLV49NVzA  4.3 ہزار  https://yt3.ggpht.com/ytc/AMLnZu-Ues3VHkaaoIIe...  False
                          cid                                               text        time        author                   channel     votes                                              photo  heart
0  UgyBuCZcWIAOIfwiVkJ4AaABAg                2.5 million views, say MashALLAH :)  3 سال پہلے  ITHADchannel  UCEegFU8HQJsTugHLV49NVzA  4.3 ہزار  https://yt3.ggpht.com/ytc/AMLnZu-Ues3VHkaaoIIe...  False
1  UgzIumBWRs3PcyEWRvN4AaABAg  Plz aap sb mery lye dua kry k Allah g mujy ola...  3 سال پہلے  Nazia Masood  UCcRJfEZKRXoKLg78ssZCBrg  2.6 ہزار  https://yt3.ggpht.com/ytc/AMLnZu-GRpM2pfNjEvyF...  False
                       

  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)


                           cid                                               text                       time               author                   channel     votes                                              photo  heart
0   UgyBuCZcWIAOIfwiVkJ4AaABAg                2.5 million views, say MashALLAH :)                 3 سال پہلے         ITHADchannel  UCEegFU8HQJsTugHLV49NVzA  4.3 ہزار  https://yt3.ggpht.com/ytc/AMLnZu-Ues3VHkaaoIIe...  False
1   UgzIumBWRs3PcyEWRvN4AaABAg  Plz aap sb mery lye dua kry k Allah g mujy ola...                 3 سال پہلے         Nazia Masood  UCcRJfEZKRXoKLg78ssZCBrg  2.6 ہزار  https://yt3.ggpht.com/ytc/AMLnZu-GRpM2pfNjEvyF...  False
2   Ugxh30zHqLC6OrZClIV4AaABAg          Allah is medicine of every pain 🕌🕌🕌😍😍😍😍☺☺    2 سال پہلے (ترمیم کردہ)        Anjali Kumari  UCdqWwfOtpwaQvbSgVO5TjfA  2.4 ہزار  https://yt3.ggpht.com/DyJq9izW38gPW3pEsz6mXfoN...  False
3   Ugyptc_RteC1Lw5dQtF4AaABAg  Very peaceful song....love from india,Allah!!!...    2 سال پہلے (ترمیم ک

17  UgwbpEf65tjbf0UTWdx4AaABAg  This songs are not only song they are a means ...               3 مہینے پہلے         Hemanta Nath  UCRQrMc89q_XOH63GAbA9K9w        11  https://yt3.ggpht.com/ytc/AMLnZu85QjxzI0XgZ5a2...  False
                           cid                                               text                       time               author                   channel     votes                                              photo  heart
0   UgyBuCZcWIAOIfwiVkJ4AaABAg                2.5 million views, say MashALLAH :)                 3 سال پہلے         ITHADchannel  UCEegFU8HQJsTugHLV49NVzA  4.3 ہزار  https://yt3.ggpht.com/ytc/AMLnZu-Ues3VHkaaoIIe...  False
1   UgzIumBWRs3PcyEWRvN4AaABAg  Plz aap sb mery lye dua kry k Allah g mujy ola...                 3 سال پہلے         Nazia Masood  UCcRJfEZKRXoKLg78ssZCBrg  2.6 ہزار  https://yt3.ggpht.com/ytc/AMLnZu-GRpM2pfNjEvyF...  False
2   Ugxh30zHqLC6OrZClIV4AaABAg          Allah is medicine of every pain 🕌🕌🕌😍😍😍😍☺☺    2 سال پہلے (ترمیم ک

  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)


In [33]:
df_comment = pd.read_csv('ytb_comments.csv')
df_comment.shape

(20, 8)

## List of Links

In [34]:
"""
Dump to a csv from a a list with video links
"""
ytb_video_list = ['https://youtu.be/0sOvCWFmrtA',
                  'https://www.youtube.com/watch?v=TuIgtitqJho',
                  'https://www.youtube.com/watch?v=hinZO--TEk4',
                  'https://youtu.be/q6EoRBvdVPQ?list=PLFsQleAWXsj_4yDeebiIADdH5FMayBiJo']

for video_link in ytb_video_list:
    main(video_link)

Downloading Youtube comments for video: https://youtu.be/0sOvCWFmrtA


  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_commen

DataFrame Shape:  (20, 8) 
Comment DataFrame: 


Unnamed: 0,cid,text,time,author,channel,votes,photo,heart
0,UgwxW_mKXXacgMFEkfl4AaABAg,I can't believe they've put this one youtube f...,1 سال پہلے,Human,UClAmpEDjU4dzJjOqZb_-iXA,2 ہزار,https://yt3.ggpht.com/ytc/AMLnZu_YiKWfH1r6I1sX...,False
1,UgzWFv9E-sFY9zuyi954AaABAg,### Section 1: Introduction\r\n1. Course Proje...,1 مہینہ پہلے (ترمیم کردہ),Thành Juffy,UCFEzt3BbkkonlMobjco7yiA,96,https://yt3.ggpht.com/ytc/AMLnZu8pgsnf0rkMVCua...,False
2,UgzyvsB5n-y4JlahYcp4AaABAg,I'm going to watch this whole course 2 hours a...,1 سال پہلے,Nord Warrior,UCzDyyUmBb1GAMmX1eVndTpg,360,https://yt3.ggpht.com/ytc/AMLnZu-ei1h8uQNc2QIh...,False
3,Ugyq5RWPOnLokfPwcQd4AaABAg,People pay more than $2000 for Python lectures...,11 مہینے پہلے,Mantorp,UCWaWYE-R6nKPQ9hyerQf0mA,255,https://yt3.ggpht.com/nE6CnCFm3mUpB8WuQfbj--7H...,False
4,UgwkUAyXD5KXL5iGuPZ4AaABAg,Timestamps for entire course(this is copied fr...,10 مہینے پہلے (ترمیم کردہ),Maher Turifi,UCOS2GDYYqPp3DTrnDGUWMoQ,715,https://yt3.ggpht.com/ytc/AMLnZu9KfysHY__TBLAd...,False
5,UgxWGINoGyNS59eN3BF4AaABAg,Truly incredible content. I'm currently a CS s...,9 مہینے پہلے,CHITUS💖⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻⸻,UCzw86ClXJvK97Nw1BDe2-BQ,0,https://yt3.ggpht.com/wOaYb3tmddwDd4PQZgjsOZ-D...,False
6,UgzWjXmrHlgiZrhh0G14AaABAg,Finally finished it. Amazing experience. So th...,9 مہینے پہلے,itarukmakto,UCkAKp822rlKdwj8Op2JxdRA,20,https://yt3.ggpht.com/ytc/AMLnZu_8v0HNjUpsrdDu...,False
7,UgxmWp1q5EKz6kyDweR4AaABAg,"Bro, ya'll make me have faith in humanity. So ...",10 دنوں پہلے,Alon Poudel,UC25JVjWhfV_ZO4hp-CmhDCA,1,https://yt3.ggpht.com/ytc/AMLnZu9hEZ_goq_gwkWe...,False
8,UgzDQgxPCuJzylyh_hR4AaABAg,I am still around 11 hrs into this course but ...,10 مہینے پہلے,Arsive parkour,UCqcPIN6zoULZx-W6atXNGTg,24,https://yt3.ggpht.com/ytc/AMLnZu8LrKye0BtTDX0V...,False
9,UgxiorjbVNnmWQfEevV4AaABAg,"For those wondering, at 10:15:39, the bug was ...",10 مہینے پہلے,Luca Dalla Valle,UCUPsnJoFfiGx1dImOM_HsIg,77,https://yt3.ggpht.com/ytc/AMLnZu-JLvgrCiOs0jRX...,False



[3.14 seconds] Done!
Downloading Youtube comments for video: https://www.youtube.com/watch?v=TuIgtitqJho


  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_commen

DataFrame Shape:  (20, 8) 
Comment DataFrame: 


  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)


Unnamed: 0,cid,text,time,author,channel,votes,photo,heart
0,UgwSDga4oAx1wOVzqxJ4AaABAg,If you want more stable coefficients when find...,2 سال پہلے,Abhishek Thakur,UCBPRJjIWfyNG4X-CRbnv78A,15,https://yt3.ggpht.com/kXhp0q_y7W_KDSjaTruJDdAX...,False
1,UgxzOs7oiXYqn_7En4h4AaABAg,I pre-booked the Kindle version India. The boo...,2 سال پہلے,Razin Tailor,UCbkDVNsvzABoYQQOoBzZ42A,1,https://yt3.ggpht.com/ytc/AMLnZu-YTSxf_SMQJlpO...,True
2,UgyOAij13VggUi6_TgF4AaABAg,The next video on reinforcement learning? you...,2 سال پہلے,Mriganka Nath,UCkDwzYJbqC2krTwK7SpxWMw,8,https://yt3.ggpht.com/ytc/AMLnZu-2SouDLn2QdZ2R...,True
3,Ugw1EHjhdiA2HWuRJ2N4AaABAg,Glad ...to see this sort of contents ...Believ...,2 سال پہلے (ترمیم کردہ),_AnanD_ ML_,UCd76XdJyTiiQVigypxm9Ksg,15,https://yt3.ggpht.com/N3RU69lPmccgjEPpsPlwG5UW...,False
4,UgxOYfgYdcUbPF7UsUd4AaABAg,Thank you for the video. As usual amazing high...,2 سال پہلے,Abi K,UCu6hYINoiCOSwGPV24CurJA,0,https://yt3.ggpht.com/ytc/AMLnZu-xcuk38DVPTQUS...,True
5,UgxC5JHJ1zNuHr25UUZ4AaABAg,Hey Abhishek ! Is it a standard practice to us...,2 سال پہلے,Abhijit Gairola,UCeTk9Y8ND5pQJmqxWclY_gg,0,https://yt3.ggpht.com/ytc/AMLnZu9Pjh0WwIISO_T-...,False
6,UgzIpQBt3o988yVBy4V4AaABAg,Really cool video as always Abhishek!\nI actua...,1 سال پہلے (ترمیم کردہ),Manabendra Rout,UCFDdXd9Xgd07dUbRkM4Gp8A,0,https://yt3.ggpht.com/ytc/AMLnZu_wC6R5Z8nLWGj0...,False
7,UgxgM8ZiGs_3WKH1a0R4AaABAg,"Hi Abhishek, thanks for the great content you ...",2 سال پہلے,Ambarish Kapil,UCFlNYOjiTQ3KHZBKE-YlYRQ,0,https://yt3.ggpht.com/ytc/AMLnZu_pGsMLCW6Lc7xU...,False
8,UgxZgGTUMMUDS9CCccN4AaABAg,What's your go to EDA techniques and do you ha...,2 سال پہلے,figity funk,UCz3CexrpDOPhzp4U8qAQIuA,0,https://yt3.ggpht.com/ytc/AMLnZu8214ujuYJ0sz54...,False
9,UgxyN1ljiOE4wtmkREN4AaABAg,How do you do blending with time series split ...,1 سال پہلے (ترمیم کردہ),zubin mehta,UCdro_yGem44nA7hYwX0IIxw,0,https://yt3.ggpht.com/ytc/AMLnZu9FL607XtMKvfUI...,False



[2.37 seconds] Done!
Downloading Youtube comments for video: https://www.youtube.com/watch?v=hinZO--TEk4


  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)


DataFrame Shape:  (20, 8) 
Comment DataFrame: 


  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)


Unnamed: 0,cid,text,time,author,channel,votes,photo,heart
0,UgyIDQFDtULyH1UJTeZ4AaABAg,Full code is available here: https://github.co...,2 سال پہلے,Abhishek Thakur,UCBPRJjIWfyNG4X-CRbnv78A,26,https://yt3.ggpht.com/kXhp0q_y7W_KDSjaTruJDdAX...,False
1,UgwK10Wgag_JSoqNCxt4AaABAg,"Great video as always, good to see you are cov...",2 سال پہلے,Mankaran Singh,UCYVS_RjnfJhou8SLZNvRaGA,5,https://yt3.ggpht.com/ytc/AMLnZu_Ft9Lhg1K9G2TB...,True
2,UgyI40uufIZdq8rTSAp4AaABAg,Great stuff as usual. Complexity simplified. T...,2 سال پہلے,shaheer zaman,UC_j9VcGDw01LjQoP-z9_jWw,3,https://yt3.ggpht.com/ytc/AMLnZu8ln4uasnYnLH9D...,True
3,UgwBCXQ03uvPReTxAzR4AaABAg,This is the most organized and neat implementa...,1 سال پہلے,Chiranshu Adik,UCi81GqwjWEfrN-jni7Sqgzg,11,https://yt3.ggpht.com/ytc/AMLnZu9fOOXxpjpkMTHv...,True
4,Ugw3QIobjGVOwtsJDNl4AaABAg,I like the way you code. I started following e...,2 سال پہلے (ترمیم کردہ),Amila Pathirana,UCOVir41Y9sUlEwu6arsi7rg,0,https://yt3.ggpht.com/ytc/AMLnZu_TU4YTt6eZXWsF...,False
5,UgynJEWRWwF2eHqrd-x4AaABAg,Thank you for your extremely helpful and infor...,2 سال پہلے,HIEU NGO TRUNG,UCIzydXGh-HtiDg8AEf3HsXw,0,https://yt3.ggpht.com/ytc/AMLnZu-kMCn5SqswxRhB...,True
6,UgxcT1OXZZAXQvoxWVN4AaABAg,Great video!!\n\nIs it possible for you to do ...,2 سال پہلے,Vidya P,UCTLF3h10hyKeYlAh53Ukong,0,https://yt3.ggpht.com/ytc/AMLnZu8ioJ8Vm3jH-hk7...,False
7,UgzK3EULuWCPweQyNmF4AaABAg,"Hi, thank you for a very helpful video! I have...",2 سال پہلے,Michelle Belgrave,UCb59PEXQcInh6jGk9ublW3g,0,https://yt3.ggpht.com/ytc/AMLnZu89YjqQ4he4QG6j...,False
8,UgyuAUW4URTYivEfQkt4AaABAg,This part about copy/pasting 20 lines of code ...,2 سال پہلے,Sergii Makarevych,UCAeptTeh39j7HrUkoTXF8tA,1,https://yt3.ggpht.com/ytc/AMLnZu-0lnTtOrb2WaNd...,True
9,UgwLGDbkySbUlThl04x4AaABAg,"Instead of padding the ids, attention_masks, a...",2 سال پہلے,Matthias L,UCK-yPP0olQNGDWilkPAHDBA,3,https://yt3.ggpht.com/ytc/AMLnZu9AkWnRA_2zqsq2...,True



[2.42 seconds] Done!
Downloading Youtube comments for video: https://youtu.be/q6EoRBvdVPQ?list=PLFsQleAWXsj_4yDeebiIADdH5FMayBiJo


  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_comment.append(comment, ignore_index=True)
  df_comment = df_commen

DataFrame Shape:  (20, 8) 
Comment DataFrame: 


Unnamed: 0,cid,text,time,author,channel,votes,photo,heart
0,UgxlFdVwjdlAs6QC_0V4AaABAg,this is perhaps the youtube version of a histo...,2 سال پہلے,amber,UCtNVFepaJn4ERzSjNjti9AQ,26 ہزار,https://yt3.ggpht.com/lZjtJFO-hn2HJXRabCGVcgp7...,False
1,UgwrrqPVIL-MN2MSHsp4AaABAg,Theres no evidence that dinosaurs didn’t actua...,2 سال پہلے,R SS,UCxVKkYRnsOSdYKEaNUwFG-g,47 ہزار,https://yt3.ggpht.com/ytc/AMLnZu-zu7sE3oGqw_N1...,False
2,Ugy44QaOxgV4w8UmhNF4AaABAg,My favorite part about this video is how when ...,9 مہینے پہلے,Dylan,UCyPZ2aq7WrLdOrRtvI3FiDw,12 ہزار,https://yt3.ggpht.com/ytc/AMLnZu_ajYpnzAUTJUWc...,False
3,UgzjYqysZKRT3yMN-sN4AaABAg,"To date, the only meme I never fully understoo...",4 مہینے پہلے,PuzzLEGO,UC4GaWcWGhh5n1WmJvb1oBkA,8.4 ہزار,https://yt3.ggpht.com/qMc36j22XaxCPe58-4jzWA86...,False
4,UgyR8XpF7lahMshNm5N4AaABAg,This video was:\n✅Useful ✅Calming ✅Inspiring ✅...,2 مہینے پہلے,Dont Stare,UCnGRb9A-OCO2YnkUE8ewApg,783,https://yt3.ggpht.com/ytc/AMLnZu9B7RAxgSwJbXve...,False
5,Ugyntn0vft-ybnC5CKR4AaABAg,"Fun fact, This is an elegant meme, from a more...",7 مہینے پہلے,General_Cheese6,UCqau0K0yGXmpps7-MsJLluw,2.6 ہزار,https://yt3.ggpht.com/9Ohed-a3BMZUwwmSZDZVngS3...,False
6,UgyuUFLuSHhlutgd4Wp4AaABAg,I loved the part when the T-rex-looking dinosa...,3 مہینے پہلے,Paotato,UC7HS0wDoN0duvLbJjo9lWCA,724,https://yt3.ggpht.com/kqZ4eQ9Z-N_LpJXEUv6G7Mqj...,False
7,UgxnmExmA0UtgaQR9Lh4AaABAg,I want this played at my funeral. So everyone ...,4 مہینے پہلے,Rule 34,UC5zDKVXPjZq-kbpC2qtkOcw,1.8 ہزار,https://yt3.ggpht.com/ytc/AMLnZu-wF2eVDnt_4PdI...,False
8,UgyXnbVx3fmqI_MGdTN4AaABAg,"The way the strange-looking dinosaur sings ""Ba...",4 مہینے پہلے,Brull,UCPQIWoFViojcVcQSm7v-aFA,1.5 ہزار,https://yt3.ggpht.com/h15oJhIVIiBatq2fD2vuvudH...,False
9,UgytDD4IDFAhqawQSch4AaABAg,"Even after ten whole years, this remains one o...",3 مہینے پہلے,Kalemaree,UC5l0jXZmVj3c9BQEmeuMuIA,143,https://yt3.ggpht.com/ytc/AMLnZu-dQCBZIOiy50aC...,False



[3.60 seconds] Done!


In [35]:
df_comment = pd.read_csv('ytb_comments.csv')
df_comment.shape

(20, 8)

## Links From DataFrame

In [36]:
"""
Dump comments to a csv by parsing links from a csv with video links

Example -
Create a csv with one column titled 'link'
a sample is given below

'ytb_video_list.csv'

link
https://www.youtube.com/watch?v=-t_uhBBDbA4
https://www.youtube.com/watch?v=75vjjRza7IU
https://www.youtube.com/watch?v=j6dmaPzOBHY
https://www.youtube.com/watch?v=Yj2efyQV1RI
https://www.youtube.com/watch?v=HV652F7U6Qs
https://www.youtube.com/watch?v=47iXEucg3eo
https://www.youtube.com/watch?v=ofHXBLEE3TQ
https://www.youtube.com/watch?v=X6lGqSfVRT8
https://www.youtube.com/watch?v=a_-z9FhGBrE
https://www.youtube.com/watch?v=wTUM_4cVlE4


"""

youtube_data = pd.read_csv("YouTube_data.csv")
youtube_data.url[:3].map(lambda x: main(x))

FileNotFoundError: [Errno 2] No such file or directory: 'YouTube_data.csv'

## Preview Output CSV

In [37]:
df_comment = pd.read_csv('./ytb_comments.csv')

print(f"{df_comment.shape[0]} rows, {df_comment.shape[1]} columns")
df_comment.head()

20 rows, 8 columns


Unnamed: 0,cid,text,time,author,channel,votes,photo,heart
0,UgyQFWUAwflZw8dgkUl4AaABAg,Shot horizontally😀👍,1 سال پہلے,Dave Ellis,UC2vxO8ZCkPanvHF5L4IQ79Q,263,https://yt3.ggpht.com/ytc/AMLnZu_80GFCyeIQRDI_...,True
1,UgwtAC15ZpDsqRFscuB4AaABAg,I have a feeling we aren't too far from Disney...,10 مہینے پہلے,Stefan,UCn_cd_kyCntJs4yu249FayQ,195,https://yt3.ggpht.com/ytc/AMLnZu9dxMjVLZVsbgtJ...,True
2,Ugxjl5CzAJ928JZ6pdt4AaABAg,I love stuff like this because I feel like art...,1 سال پہلے,Jack Middleton,UCHNIRmyVDh52gm60BDEzyCw,102,https://yt3.ggpht.com/ytc/AMLnZu92mQwpJfkOX2Qh...,True
3,Ugxl-2CmqdAsscaV9FJ4AaABAg,That’s incredible. I can’t believe how much wo...,10 مہینے پہلے,Tim Moeller,UCLjV7s8SwS3STCvWCZ_Znqw,155,https://yt3.ggpht.com/qA5LGkIhqbY3mRa0PEM01uui...,True
4,UgxI1JttyQHviJWc0ep4AaABAg,"Thank you math, and the people who understand ...",3 ہفتے پہلے,OnHoldAt50,UCuguVTGL9PUn6A9Xw3S58KA,4,https://yt3.ggpht.com/ytc/AMLnZu8tCAtai24OOYYA...,False


## Reference

1. [https://github.com/egbertbouman/youtube-comment-downloader](https://github.com/egbertbouman/youtube-comment-downloader)

In [57]:
from selenium import webdriver
import time
import os
import csv
import pandas as pd
from math import ceil
from selenium.webdriver.common.by import By
from webdriver_manager.firefox import GeckoDriverManager
from selenium import webdriver
from webdriver_manager.firefox import GeckoDriverManager
from bs4 import BeautifulSoup
import time

In [65]:
comment_list = []
comment_reply = []
def ScrapComment(url):
    option = webdriver.FirefoxOptions()
    option.add_argument("--headless")
    driver = webdriver.Chrome(executable_path=GeckoDriverManager().install(), options=option)
    driver.get(url)
    prev_h = 0
    while True:
        height = driver.execute_script("""
                function getActualHeight() {
                    return Math.max(
                        Math.max(document.body.scrollHeight, document.documentElement.scrollHeight),
                        Math.max(document.body.offsetHeight, document.documentElement.offsetHeight),
                        Math.max(document.body.clientHeight, document.documentElement.clientHeight)
                    );
                }
                return getActualHeight();
            """)
        driver.execute_script(f"window.scrollTo({prev_h},{prev_h + 200})")
        # fix the time sleep value according to your network connection
        time.sleep(1)
        prev_h +=200  
        if prev_h >= height:
            break
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()
    title_text_div = soup.select_one('#container h1')
    title = title_text_div and title_text_div.text
    comment_div = soup.select("#content #content-text")
    comment_r = soup.select('style-scope yt-formatted-string')
    global comment_list
    global comment_reply
    comment_list = [x.text for x in comment_div]
    comment_reply = [x for x in comment_div]
    print(f'comment_list {comment_list}')
    print(f'title {title}')


if __name__ == "__main__":

    urls = [
        "https://www.youtube.com/watch?v=VmEU22NYkEs"
    ]
    ScrapComment(urls[0])

[WDM] - Downloading: 19.0kB [00:00, 20.3MB/s]                                                                          
  driver = webdriver.Chrome(executable_path=GeckoDriverManager().install(), options=option)


comment_list ['2.5 million views, say MashALLAH :)', 'Allah is medicine of every pain ', 'Very peaceful song....love from india,Allah!!!mere sabhi Muslim bhaiyo ko Salamt rakkhe....', 'I am hindu but this song gives peace to my heart ', "I am facing a problem!\nI can not sleep without listening to these 3 soothing songs-\n1. Noor-e-ilahi\n2. Noor-e-azal\n3. Pardadari.\n️ Watching from Bangladesh\nEdit- now added 'faryaad'", "I m hindu but every time I listen this song it gives me a heavenly peace my god I can't even explain in words ....my love for allah is limitless like a sky coz I know mera.khuda allah mera ishwar ye to ek hi hai sirf alag alag logon ne.naam.alag rkh diye ....love this song", 'Am not A Muslim but I Listen it everyday when I get time ️.\nI love It ️', "Purest form of music..... I'm hindu but I can feel the holy presence of God everywhere...", 'I am hindu but love to hear this song....And Atif is just awesome', "Today I was fasting Rosa and after iftar I started feeli

In [67]:
len(comment_list)

1835

In [96]:
new_comment_list = []
for i in comment_list:
    if i != '':
        new_comment_list.append(i)

In [97]:
df = pd.DataFrame(new_comment_list,columns=['comments'])

In [98]:
df.to_csv('ytb_comments.csv',index=False)

In [99]:
# df['comments'] = comment_list

In [100]:
df.head()

Unnamed: 0,comments
0,"2.5 million views, say MashALLAH :)"
1,Allah is medicine of every pain
2,"Very peaceful song....love from india,Allah!!!..."
3,I am hindu but this song gives peace to my heart
4,I am facing a problem!\nI can not sleep withou...


In [103]:
new_comment_list

['2.5 million views, say MashALLAH :)',
 'Allah is medicine of every pain ',
 'Very peaceful song....love from india,Allah!!!mere sabhi Muslim bhaiyo ko Salamt rakkhe....',
 'I am hindu but this song gives peace to my heart ',
 "I am facing a problem!\nI can not sleep without listening to these 3 soothing songs-\n1. Noor-e-ilahi\n2. Noor-e-azal\n3. Pardadari.\n️ Watching from Bangladesh\nEdit- now added 'faryaad'",
 "I m hindu but every time I listen this song it gives me a heavenly peace my god I can't even explain in words ....my love for allah is limitless like a sky coz I know mera.khuda allah mera ishwar ye to ek hi hai sirf alag alag logon ne.naam.alag rkh diye ....love this song",
 'Am not A Muslim but I Listen it everyday when I get time ️.\nI love It ️',
 "Purest form of music..... I'm hindu but I can feel the holy presence of God everywhere...",
 'I am hindu but love to hear this song....And Atif is just awesome',
 "Today I was fasting Rosa and after iftar I started feeling v

In [104]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [128]:
from google_trans_new import google_translator
translator = google_translator()
translate_comments = []
# for i in new_comment_list:
#     translate_text = translator.translate('this is',lang_tgt='en') 
#     translate_comments.append(translate_text)
# translate_comments
translate_text = translator.translate('this is',lang_tgt='en') 
translate_text

google_new_transError: 404 (Not Found) from TTS API. Probable cause: Unknown