In [1]:
# Import the necessary libraries
import pandas as pd
from selenium import webdriver
from bs4 import BeautifulSoup

In [2]:
# Objective: Scrape comments from a given video.

# Running Selenium webdriver with target URL.
driver = webdriver.Chrome()
driver.get('https://www.youtube.com/watch?v=0JE5LrPBSb0')
driver.set_page_load_timeout(10)

In [34]:
driver.get('https://www.youtube.com/watch?v=NOpmy7Kqgok')

In [35]:
# Import page content. Make sure to scroll the page as much as needed before executing this command.
page_content = driver.page_source

In [6]:
# Define a function that takes page content as input and outputs dataframe with scraped data.
def scrape_video(page_content, URL):
    
    # Parsing page content as BeautifulSoup object
    soup = BeautifulSoup(page_content, 'html.parser')
    
    # Create a BeautifulSoup ResultSet with data for all the posts to be scraped
    comments_raw = soup.find_all('ytd-comment-renderer', class_='style-scope ytd-comment-thread-renderer')
    
    # Create an empty list which would be appended with all comments
    comments = []
    
    # Iterate on the ResultSet to extract data for individual posts
    for comment in comments_raw:
        comment_data = {}
        
        # Getting the author
        author_element = comment.find('h3', class_='style-scope ytd-comment-renderer')
        if author_element:
            comment_data['Author'] = author_element.text.strip()
        else:
            'N/A'
            
        # Getting the Text
        text_element = comment.find('yt-formatted-string', class_='style-scope ytd-comment-renderer')
        if text_element:
            comment_data['Text'] = text_element.text
        else:
            'N/A'
            
        # Geting Timestamp
        time_element = comment.find('yt-formatted-string', class_='published-time-text style-scope ytd-comment-renderer')
        if time_element:
            comment_data['Timestamp'] = time_element.text
        else:
            'N/A'
        
        # Getting like count
        like_element = comment.find('span', class_='style-scope ytd-comment-action-buttons-renderer')
        if like_element:
            comment_data['No. of Likes'] = like_element.text.strip()
        else:
            'N/A'
            
        # URL
        comment_data['URL'] = URL
        
        # Appending the comment_data dict to the list
        comments.append(comment_data)
    
    # Create a DataFrame from the list of dictionaries after the loop
    df = pd.DataFrame(comments)
    return df

In [36]:
df_temp = scrape_video(page_content, 'https://www.youtube.com/watch?v=NOpmy7Kqgok')

In [37]:
df = pd.concat([df, df_temp], ignore_index=True)

In [38]:
df

Unnamed: 0,Author,Text,Timestamp,No. of Likes,URL
0,@Maria_Nette,Thanks for the walkthrough!,1 month ago,1,https://www.youtube.com/watch?v=2wYcJEcKVPk
1,@OZtwo,Cool thanks for the video! But also you do nee...,1 month ago (edited),2,https://www.youtube.com/watch?v=2wYcJEcKVPk
2,@Lucastoxico308,"I second that, always good to have private opt...",4 weeks ago,0,https://www.youtube.com/watch?v=2wYcJEcKVPk
3,@user-ew4jb2eu8r,The Mistral model on enterprise is perfect for...,4 weeks ago,0,https://www.youtube.com/watch?v=2wYcJEcKVPk
4,@CM-zl2jw,I still don‚Äôt have the store. How do I get it...,1 month ago,1,https://www.youtube.com/watch?v=2wYcJEcKVPk
...,...,...,...,...,...
94,@marcus3of5,When they figure out how to compensate for pri...,1 month ago,0,https://www.youtube.com/watch?v=NOpmy7Kqgok
95,@Sluggernaut,Meh. I might as well use regular chat gpt. Not...,1 month ago,3,https://www.youtube.com/watch?v=NOpmy7Kqgok
96,@shaunralston,The GPT store is amazing. Great video.,1 month ago,0,https://www.youtube.com/watch?v=NOpmy7Kqgok
97,@Dylanareads,This is a good video!!,1 month ago,1,https://www.youtube.com/watch?v=NOpmy7Kqgok


In [40]:
df['URL'].unique()

array(['https://www.youtube.com/watch?v=2wYcJEcKVPk',
       'https://www.youtube.com/watch?v=0q3veW5esJ0',
       'https://www.youtube.com/watch?v=qUuEUTd9v04',
       'https://www.youtube.com/watch?v=ZbVjyQyVaWw',
       'https://www.youtube.com/watch?v=NOpmy7Kqgok'], dtype=object)

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Author        99 non-null     object
 1   Text          99 non-null     object
 2   Timestamp     99 non-null     object
 3   No. of Likes  99 non-null     object
 4   URL           99 non-null     object
dtypes: object(5)
memory usage: 4.0+ KB


In [42]:
driver.quit()

In [43]:
# Define a function that takes the dataframe and filename as input and writes an excel file to disk
def write_to_disk(df, filename):
    out_path = f'C:\\Users\\BINARY COMPUTERS\\Documents\\{filename}.xlsx'
    writer = pd.ExcelWriter(out_path , engine='xlsxwriter')
    df.to_excel(writer, sheet_name='Sheet1')
    writer.save()
    print(f"Write Complete. You can access the file at {out_path}")

In [44]:
write_to_disk(df, 'GPT Store-YouTube')

Write Complete. You can access the file at C:\Users\BINARY COMPUTERS\Documents\GPT Store-YouTube.xlsx


In [4]:
soup = BeautifulSoup(page_content, 'html.parser')

In [10]:
soup.find_all('ytd-comment-renderer', class_='style-scope ytd-comment-thread-renderer')

[<ytd-comment-renderer class="style-scope ytd-comment-thread-renderer" comment-style="unknown" id="comment" style="--ytd-comment-paid-background-color: initial;"><!--css-build:shady--><!--css-build:shady--><div class="style-scope ytd-comment-renderer" id="paid-comment-background"></div>
 <div class="style-scope ytd-comment-renderer" hidden="" id="linked-comment-badge"></div>
 <div class="style-scope ytd-comment-renderer" id="body">
 <div class="style-scope ytd-comment-renderer" id="author-thumbnail">
 <a class="yt-simple-endpoint style-scope ytd-comment-renderer" href="/@animals-world">
 <yt-img-shadow class="style-scope ytd-comment-renderer no-transition" fit="" height="40" loaded="" style="background-color: transparent;" width="40"><!--css-build:shady--><!--css-build:shady--><img alt="@animals-world" class="style-scope yt-img-shadow" draggable="false" height="40" id="img" src="https://yt3.ggpht.com/5ImjqW3QeipmEyM8sCtOXJa-Oo-clfsQj8T1OWmU6JOlJ0kwRddYJeeGlf3dFDqPOTV3U5G4Ug=s48-c-k-c0x

In [29]:
soup.find('ytd-comment-renderer', class_='style-scope ytd-comment-thread-renderer').find('span', class_='style-scope ytd-comment-action-buttons-renderer').text.strip()

'70'

In [49]:
comments_list = soup.find_all('ytd-comment-renderer', class_='style-scope ytd-comment-thread-renderer')

In [50]:
soup.find('ytd-comment-renderer', class_='style-scope ytd-comment-thread-renderer').find('yt-formatted-string', class_='channel-owner style-scope ytd-comment-renderer style-scope ytd-comment-renderer').text

'@animals-world'

In [60]:
comments = []
for comment in comments_list:
    comment_data = {}
    comment_data['author'] = comment.find('h3', class_='style-scope ytd-comment-renderer').text.strip()
    comment_data['text'] = comment.find('yt-formatted-string', class_='style-scope ytd-comment-renderer').text
    comment_data['time'] = comment.find('yt-formatted-string', class_='published-time-text style-scope ytd-comment-renderer').text
    comment_data['No. of Likes'] = comment.find('span', class_='style-scope ytd-comment-action-buttons-renderer').text.strip()
    comments.append(comment_data)

In [62]:
comments_df = pd.DataFrame(comments)

In [63]:
comments_df

Unnamed: 0,author,text,time,No. of Likes
0,@animals-world,4K African Wildlife : The World's Greatest Mig...,4 days ago,44
1,@TranquilMelody351,Whoever reads this wish all your dream can be ...,4 days ago,13
2,@user-adoyle123,It lifts your heart and Spirits to see such be...,1 day ago,3
3,@SiphamandlaNgubane-lw3vw,In nature we witness Almighty power.,2 days ago,9
4,@user-iq8us2jt1u,God created everything according to his timing,3 days ago,7
5,@user-mm1df4kg8x,This is nice to sit back a ND relax with the m...,2 days ago,9
6,@agnettakamugisha4984,@animals-world @Relaxing Nature 4K\nThank you ...,2 days ago,8
7,@shaunbarry4325,Beautiful Africa,1 day ago,4
8,@mwendavet,Awesomely sultry and enchanting simply amazing,4 hours ago,1
9,@user-te9xc1kc6k,That's awesome natural beauty in the Africa,18 hours ago,1
