### Scrape the Association of Banks Malaysia
We will now need to get a list of the Malaysian Banks. Thus scouring the internet for a bit, we find a website that list all the Banks in Malaysia with a **Commercial Banking** license. <br>
Line 1: URL of the Association of banks Malaysia <br>
Lines 3 - 6 : Creating the soup object to be parsed <br>
Lines 8 : All the data we are intereseted in is nested in the `<div class="d-none au-am-data">` tag <br>

In [1]:
from fake_useragent import UserAgent
ua = UserAgent()
random_header = ua.chrome
# print(random_header)

random_user_agent = {'User-Agent' : str(random_header)}

ModuleNotFoundError: No module named 'fake_useragent'

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
my_url = 'https://www.abm.org.my/about-us/abm-members/'

abm_source = requests.get(url=f'{my_url}',\
               stream=True,\
               headers=random_user_agent).text  # => "<!DOCTYPE html><html><head>..."
abm_soup = BeautifulSoup(abm_source, 'html.parser')
#print(abm_soup.prettify())
malaysian_banks = abm_soup.find_all(name='div', attrs={'class': "d-none au-am-data"})
print(f'We have {len(malaysian_banks)} banks in Malaysia with a commercial banking license')

We have 26 banks in Malaysia with a commercial banking license


In [29]:
chime.themes

<function chime.themes() -> List[str]>

In [4]:
bank_names = []
hashtag_names = []
admirals_of_the_banks = []
top_titles = []
bank_websites = []


for each_bank in malaysian_banks:
    each_bank_name = each_bank.h5.text    
    each_hashtag = each_bank.get('data-bank')
    each_admiral = each_bank.find(name='div', attrs={'class':"au-am-value"}).text
    each_title = each_bank.find(name='div', attrs={'class':"au-am-label"}).text
    each_website = each_bank.find(name='a', attrs={'class':"au-am-web"}).get('href')                    
        
    bank_names.append(each_bank_name)
    hashtag_names.append(each_hashtag)
    admirals_of_the_banks.append(each_admiral)
    top_titles.append(each_title)
    bank_websites.append(each_website)

import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 1000)
import numpy as np
import matplotlib.pyplot as plt

robust_zip = list(zip(bank_names, hashtag_names, admirals_of_the_banks, top_titles, bank_websites))
banks_malaysia= pd.DataFrame(robust_zip, columns=['Bank_Names', 'Hashtag_Name', 'Top_Person', 'Top_Title', 'Bank_Website'])
banks_malaysia.shape

(26, 5)

In [5]:
from collections import defaultdict
hash_to_handle_map = {}
for idx, value in banks_malaysia['Hashtag_Name'].items():
    hash_to_handle_map.update({value : str()})


hash_to_handle_map['#affin_bank_berhad'] = '@AFFIN'
hash_to_handle_map.update({'#alliance_bank_malaysia_berhad' : '@AllianceBankMY',
                           '#ambank_(m)_berhad' : '@AmBankTV',
                           '#bangkok_bank_berhad' : '@bangkokbankchannel',
                           '#bank_of_america_malaysia_berhad' : '@BankofAmerica',
                           '#bank_of_china_(malaysia)_berhad' : '@BankofChinaManila',
                           '#bnp_paribas_malaysia_berhad' : '@labanquedunmondequichange',
                           '#boost_bank_berhad' : '@MyBoostApp',
                           '#china_construction_bank_(malaysia)_berhad': None,
                           '#cimb_bank_berhad' : '@CIMBGroupHoldingsBhd',
                           '#citibank_berhad' : '@Citi',
                           '#deutsche_bank_(malaysia)_berhad' : '@DeutscheBank',
                           '#gx_bank_berhad' : '@GXBank',
                           '#hong_leong_bank_berhad' : '@hongleongbankmy',
                           '#hsbc_bank_malaysia_berhad' : '@HSBC_MY',
                           '#industrial_and_commercial_bank_of_china_(malaysia)_berhad' : None,
                           '#j.p._morgan_chase_bank_berhad' : '@jpmorgan',
                           '#malayan_banking_berhad_(maybank)' : '@maybankvideos',
                           '#mizuho_bank_(malaysia)_berhad' : '@MizuhoAmericas',
                           '#mufg_bank_(malaysia)_berhad': '@MUFGBankChannel',
                           '#ocbc_bank_(malaysia)_berhad': '@channelocbc',
                           '#public_bank_berhad': '@PublicBankGroup',
                           '#rhb_bank_berhad': '@rhbgroup',
                           '#standard_chartered_bank_malaysia_berhad': '@standardchartered',
                           '#sumitomo_mitsui_banking_corporation_malaysia_berhad': '@smbcgroup',
                           '#united_overseas_bank_(malaysia)_bhd': '@uob'  
                          })

# Add the dict to the dataframe
banks_malaysia['Youtube_Handle'] = banks_malaysia['Hashtag_Name'].apply(lambda x: hash_to_handle_map.get(x, 'not found'))

In [6]:
# View the data
display(pd.concat([banks_malaysia.head(), banks_malaysia.tail()]))

Unnamed: 0,Bank_Names,Hashtag_Name,Top_Person,Top_Title,Bank_Website,Youtube_Handle
0,Affin Bank Berhad,#affin_bank_berhad,Datuk Wan Razly Abdullah Wan Ali,Group CEO,https://www.affingroup.com/affin-bank-berhad,@AFFIN
1,Alliance Bank Malaysia Berhad,#alliance_bank_malaysia_berhad,Kellee Kam Chee Khiong,Group CEO,https://www.alliancebank.com.my/,@AllianceBankMY
2,AmBank (M) Berhad,#ambank_(m)_berhad,Jamie Ling,Group CEO,https://www.ambankgroup.com/eng/Pages/home.aspx,@AmBankTV
3,Bangkok Bank Berhad,#bangkok_bank_berhad,Kanet Buranasin,CEO,http://www.bangkokbank.com.my/,@bangkokbankchannel
4,Bank of America Malaysia Berhad,#bank_of_america_malaysia_berhad,Gautam Padmakar Puntambekar,MD/Malaysia Country Head,https://www.bofaml.com/en-us/content/apac-malaysia.html,@BankofAmerica
21,Public Bank Berhad,#public_bank_berhad,Tan Sri Dato' Sri Dr Tay Ah Lek,MD/CEO,http://www.publicbankgroup.com/,@PublicBankGroup
22,RHB Bank Berhad,#rhb_bank_berhad,Dato' Mohd Rashid Mohamad,Group Managing Director / Group Chief Executive Officer,https://www.rhbgroup.com/,@rhbgroup
23,Standard Chartered Bank Malaysia Berhad,#standard_chartered_bank_malaysia_berhad,Mak Joon Nien,Managing Director & Chief Executive Officer,http://www.sc.com/my,@standardchartered
24,Sumitomo Mitsui Banking Corporation Malaysia Berhad,#sumitomo_mitsui_banking_corporation_malaysia_berhad,Atsuhide Shiojiri,President/CEO,https://www.smbc.co.jp/asia/malaysia/,@smbcgroup
25,United Overseas Bank (Malaysia) Bhd,#united_overseas_bank_(malaysia)_bhd,Ng Wei Wei,CEO,http://www.uob.com.my/,@uob


In [7]:
import pickle

In [8]:
## Write table 
with open('./Data/abm_banks_malaysia.pkl', 'wb') as f:
    pickle.dump(banks_malaysia, f)

In [9]:
## Read back data
# Takes a while to read the pickle
import pickle
# Reading the file - unpickling
with open('./Data/abm_banks_malaysia.pkl', 'rb') as fp:
    abm_banks = pickle.load(fp)

In [10]:
pd.concat([abm_banks.head(), abm_banks.tail()])

Unnamed: 0,Bank_Names,Hashtag_Name,Top_Person,Top_Title,Bank_Website,Youtube_Handle
0,Affin Bank Berhad,#affin_bank_berhad,Datuk Wan Razly Abdullah Wan Ali,Group CEO,https://www.affingroup.com/affin-bank-berhad,@AFFIN
1,Alliance Bank Malaysia Berhad,#alliance_bank_malaysia_berhad,Kellee Kam Chee Khiong,Group CEO,https://www.alliancebank.com.my/,@AllianceBankMY
2,AmBank (M) Berhad,#ambank_(m)_berhad,Jamie Ling,Group CEO,https://www.ambankgroup.com/eng/Pages/home.aspx,@AmBankTV
3,Bangkok Bank Berhad,#bangkok_bank_berhad,Kanet Buranasin,CEO,http://www.bangkokbank.com.my/,@bangkokbankchannel
4,Bank of America Malaysia Berhad,#bank_of_america_malaysia_berhad,Gautam Padmakar Puntambekar,MD/Malaysia Country Head,https://www.bofaml.com/en-us/content/apac-malaysia.html,@BankofAmerica
21,Public Bank Berhad,#public_bank_berhad,Tan Sri Dato' Sri Dr Tay Ah Lek,MD/CEO,http://www.publicbankgroup.com/,@PublicBankGroup
22,RHB Bank Berhad,#rhb_bank_berhad,Dato' Mohd Rashid Mohamad,Group Managing Director / Group Chief Executive Officer,https://www.rhbgroup.com/,@rhbgroup
23,Standard Chartered Bank Malaysia Berhad,#standard_chartered_bank_malaysia_berhad,Mak Joon Nien,Managing Director & Chief Executive Officer,http://www.sc.com/my,@standardchartered
24,Sumitomo Mitsui Banking Corporation Malaysia Berhad,#sumitomo_mitsui_banking_corporation_malaysia_berhad,Atsuhide Shiojiri,President/CEO,https://www.smbc.co.jp/asia/malaysia/,@smbcgroup
25,United Overseas Bank (Malaysia) Bhd,#united_overseas_bank_(malaysia)_bhd,Ng Wei Wei,CEO,http://www.uob.com.my/,@uob


# Test connection

## Requests and bs4

In [1]:
from fake_useragent import UserAgent
ua = UserAgent()
random_header = ua.chrome
# print(random_header)

random_user_agent = {'User-Agent' : str(random_header)}

import cloudscraper
import requests
from bs4 import BeautifulSoup

my_query='@bluestar9000'

my_url = f'https://www.youtube.com/{my_query}'
cloud_source = requests.get(url=f'{my_url}',\
               stream=True,\
               headers=random_user_agent).text  # => "<!DOCTYPE html><html><head>..."
cloud_soup = BeautifulSoup(cloud_source, 'html.parser')

my_channel_ID = cloud_soup.find(name='meta', attrs={'itemprop' : "identifier"})
print(f'The channel_name {my_query} has channel_ID : ', my_channel_ID.get('content'))

my_channel_ID = cloud_soup.find(name='meta', attrs={'itemprop' : "identifier"})

The channel_name @bluestar9000 has channel_ID :  UCkdWGOjnGGSY1PoDmKqiwSQ


## Selenium

In [84]:
import os
import time 
from selenium import webdriver 
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import time 
from datetime import datetime

In [99]:
now = datetime.now()
now.strftime("%m-%d-%Y  %H:%M:%S")

'10-24-2025  16:47:22'

In [168]:
BIG_DICT = {}
service = Service("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(service = service)
my_query='@bluestar9000'
my_url = f'https://www.youtube.com/{my_query}'
# Now that we have service, let's get the first url in our list
driver.get(url=my_url)
try:
    # Next we have to wait for the page to load
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CLASS_NAME, "yt-truncated-text__absolute-button"))
    )
    # Still a bit of lag though
    time.sleep(3)
    
    # Now that we know our element exists, we can click it
    driver.find_element(By.CLASS_NAME, "yt-truncated-text__absolute-button").click()
    
    # Next we have to explicitly wait for the new content to render
    time.sleep(3)


    # Next we have to wait for the page to load
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CLASS_NAME, "style-scope ytd-about-channel-renderer"))
    )

    # Still a bit of lag though
    time.sleep(3)

        
    # Now that we know our element exists, we can click it
    elements = driver.find_elements(By.XPATH, "//td[contains(@class, 'style-scope ytd-about-channel-renderer')]")
    for e_td in elements:
        # print(f"Here is data :{e_td.text}\n")
        pass
    
    elements_2 = driver.find_elements(By.XPATH, "//tr[contains(@class, 'description-item style-scope ytd-about-channel-renderer')]")
    small_dict = {}
    for e_tr in elements_2:
        yt_icon = e_tr.find_element(By.TAG_NAME, 'yt-icon')
        attribute_value = yt_icon.get_attribute("icon")
        # print(attribute_value)
        text_value = e_tr.find_elements(By.TAG_NAME, 'td')[-1]
        # print(text_value.text)

        ## Now ready to add info into small_dict
        small_dict.update({**{'ID':my_query},
                           **{attribute_value : text_value.text},
                           **dict(Scraped_DateTime=datetime.now())
                          })
        
    # Now switch from the mainpage to pdfpage
    # driver.switch_to.window(driver.window_handles[1])
    
except TimeoutException: 
    print('Redirected straight to the pdf')
    print(driver.current_url)

print(small_dict)
# finally:
#     # df = camelot_extract(pdf_url=driver.current_url)
#     df = ultimate_parser(pdf_source=driver.current_url)
#     display(df)
#     data_dict = df.to_dict(orient='index')
#     print(data_dict)
#     driver.quit()

{'ID': '@bluestar9000', 'mail': '', 'Scraped_DateTime': datetime.datetime(2025, 10, 25, 22, 43, 29, 99021), 'phone': '', 'language': 'www.youtube.com/@bluestar9000', 'privacy_public': '', 'info_outline': 'Joined 28 Jan 2007', 'person_radar': '73.4K subscribers', 'my_videos': '25 videos', 'trending_up': '51,996,777 views'}


In [174]:
from pprint import pprint
pprint(small_dict)

{'ID': '@bluestar9000',
 'Scraped_DateTime': datetime.datetime(2025, 10, 25, 22, 43, 29, 99021),
 'info_outline': 'Joined 28 Jan 2007',
 'language': 'www.youtube.com/@bluestar9000',
 'mail': '',
 'my_videos': '25 videos',
 'person_radar': '73.4K subscribers',
 'phone': '',
 'privacy_public': '',
 'trending_up': '51,996,777 views'}


# Test Iteration

In [140]:
import sys
sys.path.append('/Users/Malcolm/Documents/Helper Functions/')
import Read_n_Write as RW # We won't be using the write_list_into_txt

In [18]:
import os
import time 
from selenium import webdriver 
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import subprocess
import pyautogui
# from tabula import read_pdf
# import camelot
import pickle # Save the scraped list of dataframes df into a pick
import time 
import chime
from datetime import datetime
now = datetime.now()

# On start 
tab_counter = 0
dfs = []
total_start = time.time()
# Start caffeinate to prevent computer from falling asleep.
caffeinate_process = subprocess.Popen('caffeinate')
print(bool(caffeinate_process))

# Convert the text data into Python list
memoized_links = RW.read_txt_into_list('Scrape_ABM') # touch this file beforehand
counter = len(memoized_links)

# Get the youtube_handles/IDs from the dataframe
youtube_handles = abm_banks['Youtube_Handle'].to_list()

True


In [19]:
total_start = time.time()
# Establish driver and service through each iteration
service = Service("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(service = service)
BIG_LIST = []
with open("./Data/Scrape_ABM.txt", "a") as writer: # touch this file beforehand
    for e_youtube_handle in youtube_handles:
        my_url = f'https://www.youtube.com/{e_youtube_handle}'
        if e_youtube_handle not in memoized_links:
            start = time.time()
            if tab_counter == 0:
                # Now that we have service, let's get the first url in our list
                driver.get(my_url)
                time.sleep(2)
                # Use pyautogui to simulate the Control-Option-RightArrow
                # pyautogui.hotkey('ctrl', 'alt', 'u')    
                try:
                    # Next we have to wait for the page to load
                    WebDriverWait(driver, 15).until(
                        EC.presence_of_element_located((By.CLASS_NAME, "yt-truncated-text__absolute-button"))
                    )
                    # Still a bit of lag though
                    time.sleep(5)
                    # Now that we know our element exists, we can click it
                    driver.find_element(By.CLASS_NAME, "yt-truncated-text__absolute-button").click()
                    # Next we have to explicitly wait for the new content to render
                    time.sleep(5)
                    # Next we have to wait for the page to load
                    WebDriverWait(driver, 15).until(
                        EC.presence_of_element_located((By.CLASS_NAME, "style-scope ytd-about-channel-renderer"))
                    )
                    # Still a bit of lag though
                    time.sleep(3)

                    elements_2 = driver.find_elements(By.XPATH, "//tr[contains(@class, 'description-item style-scope ytd-about-channel-renderer')]")
                    small_dict = {}
                    for e_tr in elements_2:
                        yt_icon = e_tr.find_element(By.TAG_NAME, 'yt-icon')
                        attribute_value = yt_icon.get_attribute("icon")
                        # print(attribute_value)
                        text_value = e_tr.find_elements(By.TAG_NAME, 'td')[-1]
                        # print(text_value.text)
                
                        ## Now ready to add info into small_dict
                        small_dict.update({**{'ID':e_youtube_handle},
                                           **{attribute_value : text_value.text},
                                           **dict(Scraped_DateTime=datetime.now())
                                          })
                        chime.success()
                    BIG_LIST.append(small_dict)
                except (TimeoutException, KeyboardInterrupt): 
                    print('Timeout error occured')
                    chime.warning()
                finally:
                
                    # Write in-disk, across sessions
                    writer.write(f"{e_youtube_handle}\n")
                    writer.flush()
                    # Write in-memory, within the same session
                    memoized_links.append(e_youtube_handle)
                    counter += 1
                    print(f"The youtube channel {e_youtube_handle}, on {driver.current_url} has its table scraped.")
                    end = time.time()
                    print(f"This {counter}-th iteration took {round(end-start, 0)} seconds ")
                    
        else:
            print(f"Skiiping {e_youtube_handle}, already memoized")
            pass

## Epilogue
chime.theme('zelda')
chime.success()

# Write to dataframe 
overview_yt = pd.DataFrame(BIG_LIST)

## Write table 
with open('./Data/overview_youtube.pkl', 'wb') as f:
    pickle.dump(overview_yt, f)


The youtube channel @AFFIN, on https://www.youtube.com/@AFFIN has its table scraped.
This 1-th iteration took 17.0 seconds 
The youtube channel @AllianceBankMY, on https://www.youtube.com/@AllianceBankMY has its table scraped.
This 2-th iteration took 16.0 seconds 
The youtube channel @AmBankTV, on https://www.youtube.com/AmBankTV has its table scraped.
This 3-th iteration took 16.0 seconds 
The youtube channel @bangkokbankchannel, on https://www.youtube.com/@bangkokbankchannel has its table scraped.
This 4-th iteration took 16.0 seconds 
The youtube channel @BankofAmerica, on https://www.youtube.com/@BankofAmerica has its table scraped.
This 5-th iteration took 16.0 seconds 
The youtube channel @BankofChinaManila, on https://www.youtube.com/@BankofChinaManila has its table scraped.
This 6-th iteration took 16.0 seconds 
The youtube channel @labanquedunmondequichange, on https://www.youtube.com/@labanquedunmondequichange has its table scraped.
This 7-th iteration took 16.0 seconds 
The

TypeError: 'module' object is not callable

In [220]:
## Read back data
# Takes a while to read the pickle
import pickle
# Reading the file - unpickling
with open('./Data/overview_youtube.pkl', 'rb') as fp:
    overview_df = pickle.load(fp)

In [221]:
pd.concat([overview_df.head(), overview_df.tail()])

Unnamed: 0,ID,mail,Scraped_DateTime,phone,language,privacy_public,info_outline,person_radar,my_videos,trending_up
0,@AFFIN,,2025-10-23 19:19:33.602173,,www.youtube.com/@AFFIN,Malaysia,Joined 18 Jun 2014,4.9K subscribers,325 videos,"15,050,294 views"
1,@AllianceBankMY,,2025-10-23 19:19:49.725717,,www.youtube.com/@AllianceBankMY,Malaysia,Joined 15 Aug 2013,8.89K subscribers,398 videos,"55,406,213 views"
2,@AmBankTV,,2025-10-23 19:20:05.871238,,www.youtube.com/@AmBankTV,Malaysia,Joined 1 Jul 2013,7.35K subscribers,432 videos,"27,976,034 views"
3,@bangkokbankchannel,,2025-10-23 19:20:22.260778,,www.youtube.com/@bangkokbankchannel,Thailand,Joined 17 Apr 2012,28.8K subscribers,124 videos,"8,970,431 views"
4,@BankofAmerica,,2025-10-23 19:20:38.180865,,www.youtube.com/@BankofAmerica,United States,Joined 18 Mar 2006,122K subscribers,169 videos,"788,249 views"
19,@PublicBankGroup,,2025-10-23 19:24:59.020736,,www.youtube.com/@PublicBankGroup,Malaysia,Joined 5 Jun 2015,17.9K subscribers,217 videos,"4,743,003 views"
20,@rhbgroup,,2025-10-23 19:25:15.063088,,www.youtube.com/@rhbgroup,Malaysia,Joined 30 Jan 2010,34.8K subscribers,620 videos,"309,416,389 views"
21,@standardchartered,,2025-10-23 19:25:30.991864,,www.youtube.com/@standardchartered,United Kingdom,Joined 25 May 2009,56.9K subscribers,"1,711 videos","158,301,441 views"
22,@smbcgroup,,2025-10-23 19:25:46.879627,,www.youtube.com/@smbcgroup,Japan,Joined 3 Oct 2023,371 subscribers,22 videos,"14,889 views"
23,@uob,,2025-10-23 19:26:02.841470,,www.youtube.com/@uob,Singapore,Joined 24 Nov 2012,29.2K subscribers,"1,286 videos","86,088,593 views"


# Scraping all the video ID's
> Testing one channel

In [76]:
import os
import time 
from selenium import webdriver 
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import subprocess
import pyautogui

In [32]:
def scrape_all_videoIDs_given_YT_handle(url, css_selector, pause_time=2, max_attempts=20):
    """
    Scrolls through a dynamically loading page and returns all matching elements.

    Use case : Get all the video_IDS of a given channel via the infinite scroll method.
    
    
    Args:
        url (str): Target webpage URL like "https://www.youtube.com/@AmBankTV/videos"
        css_selector (str): CSS selector for elements you want to extract
        pause_time (float): Time (seconds) to wait between scrolls
        max_attempts (int): Max consecutive scrolls without new content before stopping

    Returns:
        list of WebElement: All elements matching the selector
    """
    # Establish driver and service through each iteration
    service = Service("/usr/local/bin/chromedriver")
    driver = webdriver.Chrome(service = service)
    driver.get(url)
    # Use pyautogui to simulate the Control-Option-RightArrow
    pyautogui.hotkey('ctrl', 'alt', 'enter')   
    wait = WebDriverWait(driver, 10)
    
    # Wait until at least one element appears
    wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "yt-truncated-text__absolute-button")))
    # Still a bit of lag though
    time.sleep(3)
    last_height = driver.execute_script("return document.documentElement.scrollHeight")
    same_height_attempts = 0
    
    print("üîÑ Starting infinite scroll...")

    while same_height_attempts < max_attempts:
        # Scroll to the bottom
        # driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
        
        # Wait for page to load new items
        time.sleep(pause_time)
        
        new_height = driver.execute_script("return document.documentElement.scrollHeight")
        
        # Wait for page to load new items
        time.sleep(pause_time)
        
        if new_height == last_height:
            same_height_attempts += 1
            print(f"‚ö†Ô∏è No new content... ({same_height_attempts}/{max_attempts})")
        else:
            same_height_attempts = 0
            last_height = new_height
            print("‚úÖ New content loaded.")
    
    print("üõë Finished scrolling.")
    
    # Collect all loaded elements
    elements = driver.find_elements(By.XPATH, css_selector)
    
    print(f"üì¶ Total elements found: {len(elements)}")
    
    return elements, driver  # return driver if you want to extract text/attributes later


## Invocation
my_youtube_handle = f"@AmBankTV"
my_selector="//ytd-rich-item-renderer[contains(@class, 'style-scope ytd-rich-grid-renderer')]"
my_elements, my_driver = scrape_all_videoIDs_given_YT_handle(url=fr"https://www.youtube.com/{my_youtube_handle}/videos",
                                                             css_selector=my_selector,
                                                             pause_time=5,
                                                             max_attempts=3)

# Appending to a list 
video_links = []
for el in my_elements:
    yt_icon = el.find_element(By.TAG_NAME, 'a')
    attribute_value = yt_icon.get_attribute("href")
    print(attribute_value)
    video_links.append(attribute_value)

## Write the list into a txt file
timestamp = datetime.now().strftime("%m/%d/%Y, %H:%M:%S)
RW.write_list_into_txt(senarai=video_links, filename=f"{my_youtube_handle}_videoIDs_{timestamp}") 


üîÑ Starting infinite scroll...
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚ö†Ô∏è No new content... (1/3)
‚ö†Ô∏è No new content... (2/3)
‚ö†Ô∏è No new content... (3/3)
üõë Finished scrolling.
üì¶ Total elements found: 362
https://www.youtube.com/watch?v=p7bPO1SfHJc&pp=0gcJCQYKAYcqIYzv
https://www.youtube.com/watch?v=xceiS_r8WlQ
https://www.youtube.com/watch?v=s9pNJYzRvxs
https://www.youtube.com/watch?v=FPGhDYBIlVw
https://www.youtube.com/watch?v=obl5r-aOqy4
https://www.youtube.com/watch?v=sh0XVQDBBsY
https://www.youtube.com/watch?v=-EmdeyJqKs0
https://www.youtube.com/watch?v=lyUjdro80JU
https://www.youtube.com/watch?v=Og6vcBivg9s
https://www.youtube.com/watch?v=iWlD8iSavE8&pp=0gcJCQYKAYcqIYzv
https://www.youtube.com/watch?v=Gxqo6ATyNRk
https:

# Big Scrape 1
> Scraping all video IDs

In [150]:
## Scrape all video_IDs
youtube_handles = overview_df['ID'].to_list()

## Invocation
my_selector="//ytd-rich-item-renderer[contains(@class, 'style-scope ytd-rich-grid-renderer')]"

for e_yt_handle in youtube_handles:
    my_elements, my_driver = scrape_all_videoIDs_given_YT_handle(url=fr"https://www.youtube.com/{e_yt_handle}/videos",
                                                                 css_selector=my_selector,
                                                                 pause_time=5,
                                                                 max_attempts=3)
    # Appending to a list 
    video_links = []
    for el in my_elements:
        yt_icon = el.find_element(By.TAG_NAME, 'a')
        attribute_value = yt_icon.get_attribute("href")
        print(attribute_value)
        video_links.append(attribute_value)
    
    ## Write the list into a txt file
    RW.write_list_into_txt(senarai=video_links,
                           filename=f"./Video_IDs_per_channel/{e_yt_handle}_{datetime.now().strftime('%m-%d-%Y_%H:%M:%S')}") # touch this file

üîÑ Starting infinite scroll...
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚úÖ New content loaded.
‚ö†Ô∏è No new content... (1/3)
‚ö†Ô∏è No new content... (2/3)
‚ö†Ô∏è No new content... (3/3)
üõë Finished scrolling.
üì¶ Total elements found: 221
https://www.youtube.com/watch?v=RMZdBov0FrQ
https://www.youtube.com/watch?v=Hhi_ERnrv1g
https://www.youtube.com/watch?v=t8UCJJafTd0
https://www.youtube.com/watch?v=PyuTTP4C12c
https://www.youtube.com/watch?v=SPZ4q1CnxnE
https://www.youtube.com/watch?v=ENrC21on7Gw
https://www.youtube.com/watch?v=PXGs538p-rQ
https://www.youtube.com/watch?v=5Is2nERAA34
https://www.youtube.com/watch?v=fI6u0JPz8ME&pp=0gcJCQYKAYcqIYzv
https://www.youtube.com/watch?v=wNDPH4Fawao
https://www.youtube.com/watch?v=56rQD9GJKA4
https://www.youtube.com/watch?v=izaCO4ob_ao
https://www.youtube.com/watch?v=zjmW3Uqgt8I
https://www.youtube.com/watch?v=PYdQF1odVKY
https://www.yo

# Validate videos
> Check that video count in txt files matches `overview['my_vidoes']`
> > It doesn't due to Youtube Shorts

In [208]:
import os
import re
directory = r"./Data/Video_IDs_per_channel/"
videos_not_shorts = {}
for e_file in os.listdir(directory):
    match = re.search(pattern=r"^((@[\w]+)_([\d\-]+)_([\d\:]+))\.txt$", string=e_file)
    if match:
        print(f"Match groups : {match.groups()}")
        e_list = RW.read_txt_into_list(filename=fr"./Video_IDs_per_channel/{match.group(1)}")
        videos_not_shorts[match.group(2)] = len(e_list)
    else:
        pass

Match groups : ('@standardchartered_10-24-2025_23:57:16', '@standardchartered', '10-24-2025', '23:57:16')
Match groups : ('@rhbgroup_10-24-2025_23:47:16', '@rhbgroup', '10-24-2025', '23:47:16')
Match groups : ('@GXBank_10-24-2025_23:15:04', '@GXBank', '10-24-2025', '23:15:04')
Match groups : ('@jpmorgan_10-24-2025_23:26:04', '@jpmorgan', '10-24-2025', '23:26:04')
Match groups : ('@AFFIN_10-24-2025_22:24:16', '@AFFIN', '10-24-2025', '22:24:16')
Match groups : ('@uob_10-25-2025_00:05:48', '@uob', '10-25-2025', '00:05:48')
Match groups : ('@MyBoostApp_10-24-2025_22:49:47', '@MyBoostApp', '10-24-2025', '22:49:47')
Match groups : ('@HSBC_MY_10-24-2025_23:18:18', '@HSBC_MY', '10-24-2025', '23:18:18')
Match groups : ('@smbcgroup_10-24-2025_23:57:52', '@smbcgroup', '10-24-2025', '23:57:52')
Match groups : ('@hongleongbankmy_10-24-2025_23:16:41', '@hongleongbankmy', '10-24-2025', '23:16:41')
Match groups : ('@BankofAmerica_10-24-2025_22:31:34', '@BankofAmerica', '10-24-2025', '22:31:34')
Match 

In [214]:
my_loc = overview_df.columns.get_loc('my_videos')
# Insert a new column 'new_col' after 'colB'
# The location will be loc_colB + 1
overview_df.insert(loc=my_loc+1, column='Videos_Not_Shorts', value=overview_df['ID'].apply(lambda x: videos_not_shorts.get(x, 'Not Found')))

In [217]:
## Write table 
with open('./Data/overview_youtube_2.pkl', 'wb') as f:
    pickle.dump(overview_df, f)

In [7]:
## Read back data
# Takes a while to read the pickle
import pickle
# Reading the file - unpickling
with open('./Data/overview_youtube_2.pkl', 'rb') as fp:
    overview_df = pickle.load(fp) # New and improved 

In [9]:
overview_df.head(2)

Unnamed: 0,ID,mail,Scraped_DateTime,phone,language,privacy_public,info_outline,person_radar,my_videos,Videos_Not_Shorts,trending_up
0,@AFFIN,,2025-10-23 19:19:33.602173,,www.youtube.com/@AFFIN,Malaysia,Joined 18 Jun 2014,4.9K subscribers,325 videos,221,"15,050,294 views"
1,@AllianceBankMY,,2025-10-23 19:19:49.725717,,www.youtube.com/@AllianceBankMY,Malaysia,Joined 15 Aug 2013,8.89K subscribers,398 videos,356,"55,406,213 views"


# Data cleaning overview_df

## Creating databases

In [339]:
import os
import re
directory = r"./Data/Video_IDs_per_channel/"
videos_not_shorts = {}
for e_file in os.listdir(directory):
    match = re.search(pattern=r"^((@[\w]+)_([\d\-]+)_([\d\:]+))\.txt$", string=e_file)
    if match:
        print(f"Match groups : {match.groups()}")
        e_list = RW.read_txt_into_list(filename=fr"./Video_IDs_per_channel/{match.group(1)}")
        videos_not_shorts[match.group(2)] = len(e_list)
        # Create empty dataframe
        df = pd.DataFrame()

        IDs = []
        for e_url in e_list:
            e_ID = re.search(pattern=r"\?v=(.*)$", string=e_url).group(1)
            IDs.append(e_ID)
        
        df['Youtube_Video_IDs'] = IDs
        df = df.assign(Youtube_Handle=str(match.group(2)))
        ## Write table 
        with open(f'./Data/Database/{match.group(2)}.pkl', 'wb') as f:
            pickle.dump(df, f)

Match groups : ('@standardchartered_10-24-2025_23:57:16', '@standardchartered', '10-24-2025', '23:57:16')
Match groups : ('@rhbgroup_10-24-2025_23:47:16', '@rhbgroup', '10-24-2025', '23:47:16')
Match groups : ('@GXBank_10-24-2025_23:15:04', '@GXBank', '10-24-2025', '23:15:04')
Match groups : ('@jpmorgan_10-24-2025_23:26:04', '@jpmorgan', '10-24-2025', '23:26:04')
Match groups : ('@AFFIN_10-24-2025_22:24:16', '@AFFIN', '10-24-2025', '22:24:16')
Match groups : ('@uob_10-25-2025_00:05:48', '@uob', '10-25-2025', '00:05:48')
Match groups : ('@MyBoostApp_10-24-2025_22:49:47', '@MyBoostApp', '10-24-2025', '22:49:47')
Match groups : ('@HSBC_MY_10-24-2025_23:18:18', '@HSBC_MY', '10-24-2025', '23:18:18')
Match groups : ('@smbcgroup_10-24-2025_23:57:52', '@smbcgroup', '10-24-2025', '23:57:52')
Match groups : ('@hongleongbankmy_10-24-2025_23:16:41', '@hongleongbankmy', '10-24-2025', '23:16:41')
Match groups : ('@BankofAmerica_10-24-2025_22:31:34', '@BankofAmerica', '10-24-2025', '22:31:34')
Match 

In [393]:
overview_df
for idx, row_dict in overview_df.to_dict(orient='index').items():
    print(row_dict['ID'])

@AFFIN
@AllianceBankMY
@AmBankTV
@bangkokbankchannel
@BankofAmerica
@BankofChinaManila
@labanquedunmondequichange
@MyBoostApp
@CIMBGroupHoldingsBhd
@Citi
@DeutscheBank
@GXBank
@hongleongbankmy
@HSBC_MY
@jpmorgan
@maybankvideos
@MizuhoAmericas
@MUFGBankChannel
@channelocbc
@PublicBankGroup
@rhbgroup
@standardchartered
@smbcgroup
@uob


In [396]:
pd.concat([overview_df.head(), overview_df.tail()], axis='columns')

Unnamed: 0,ID,mail,Scraped_DateTime,phone,language,privacy_public,info_outline,person_radar,my_videos,trending_up,ID.1,mail.1,Scraped_DateTime.1,phone.1,language.1,privacy_public.1,info_outline.1,person_radar.1,my_videos.1,trending_up.1
0,@AFFIN,,2025-10-23 19:19:33.602173,,www.youtube.com/@AFFIN,Malaysia,Joined 18 Jun 2014,4.9K subscribers,325 videos,"15,050,294 views",,,NaT,,,,,,,
1,@AllianceBankMY,,2025-10-23 19:19:49.725717,,www.youtube.com/@AllianceBankMY,Malaysia,Joined 15 Aug 2013,8.89K subscribers,398 videos,"55,406,213 views",,,NaT,,,,,,,
2,@AmBankTV,,2025-10-23 19:20:05.871238,,www.youtube.com/@AmBankTV,Malaysia,Joined 1 Jul 2013,7.35K subscribers,432 videos,"27,976,034 views",,,NaT,,,,,,,
3,@bangkokbankchannel,,2025-10-23 19:20:22.260778,,www.youtube.com/@bangkokbankchannel,Thailand,Joined 17 Apr 2012,28.8K subscribers,124 videos,"8,970,431 views",,,NaT,,,,,,,
4,@BankofAmerica,,2025-10-23 19:20:38.180865,,www.youtube.com/@BankofAmerica,United States,Joined 18 Mar 2006,122K subscribers,169 videos,"788,249 views",,,NaT,,,,,,,
19,,,NaT,,,,,,,,@PublicBankGroup,,2025-10-23 19:24:59.020736,,www.youtube.com/@PublicBankGroup,Malaysia,Joined 5 Jun 2015,17.9K subscribers,217 videos,"4,743,003 views"
20,,,NaT,,,,,,,,@rhbgroup,,2025-10-23 19:25:15.063088,,www.youtube.com/@rhbgroup,Malaysia,Joined 30 Jan 2010,34.8K subscribers,620 videos,"309,416,389 views"
21,,,NaT,,,,,,,,@standardchartered,,2025-10-23 19:25:30.991864,,www.youtube.com/@standardchartered,United Kingdom,Joined 25 May 2009,56.9K subscribers,"1,711 videos","158,301,441 views"
22,,,NaT,,,,,,,,@smbcgroup,,2025-10-23 19:25:46.879627,,www.youtube.com/@smbcgroup,Japan,Joined 3 Oct 2023,371 subscribers,22 videos,"14,889 views"
23,,,NaT,,,,,,,,@uob,,2025-10-23 19:26:02.841470,,www.youtube.com/@uob,Singapore,Joined 24 Nov 2012,29.2K subscribers,"1,286 videos","86,088,593 views"


## Scraping a video for likes comments views 

In [2]:
import re
def correct_number(number_string):
    if re.search(pattern=r"^\d{1,3}(?:,\d{3})*(?:\.\d*)?$", string=number_string):
        temp = number_string.replace(',', '')
        good_float = int(temp)
    return good_float

correct_number('1,000')

1000

## Working 
> Only works for primary OP poster

In [3]:
## Disabled comments
url = "https://www.youtube.com/watch?v=3RKCWeOpip0"

## Normal
# url = r"https://www.youtube.com/watch?v=sIYNr_B1TEQ"

## Descriptionless video
# url = r"https://www.youtube.com/watch?v=xMBJOEU29Zw"

## Zero Comments
# url = r"https://www.youtube.com/watch?v=QjXMxlR8zKo"



service = Service("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(service=service)
driver.get(url)

# (Optional) move browser window using pyautogui
pyautogui.hotkey('ctrl', 'alt', 'left')

wait = WebDriverWait(driver, 30)

more_info = (By.XPATH, '//tp-yt-paper-button[contains(@id, "expand")]')
class1 = (By.CLASS_NAME, "button style-scope ytd-text-inline-expander")
xpath_2 = (By.XPATH, "//yt-formatted-string[contains(@id, 'info')]")
comments_path = (By.XPATH, "//ytd-comment-view-model[contains(@id, 'comment')]")
reply_comments_path = (By.XPATH, ".//div[contains(@id, 'collapsed-threads')]")
like_button_path = (By.XPATH, "//button[contains(@aria-label, 'like')]")
replies_button = (By.XPATH, "//button[contains(@aria-label, 'replies')]")


MM_collector = {}

# Next we have to wait for the page to load
WebDriverWait(driver, 15).until(
    EC.presence_of_element_located(more_info)
)
# Still a bit of lag though
time.sleep(3)

## ---------------VIEWS_AND_DATE----------------------------
# Now that we know our element exists, we can click it -THIS DOES NOT WORK
# Check which elements are actually interactable
elements = driver.find_elements(*more_info) # use plural
print("---------------VIEWS_AND_DATE----------------------------")
for i, el in enumerate(elements):
    rect = el.rect
    displayed = el.is_displayed()
    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
    if displayed:
        clickable_element = el

# From above we know the last element is the interactable element we can click to expand : driver.find_elements(*more_info)[-1].click()
# Check if we found any interactable element
if clickable_element:
    # Scroll into view before clicking (good practice)
    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", clickable_element)
    clickable_element.click()
    print("‚úÖ Clicked visible 'expand' element.")

# Next we have to explicitly wait for the new content to render
time.sleep(5)
# Next we have to wait for the page to load
wait.until(
    EC.presence_of_element_located(more_info)
)

# Collect all loaded elements
views_and_date = driver.find_elements(*xpath_2)[-1].find_elements(By.XPATH, ".//span[contains(@class, 'style-scope yt-formatted-string')]")
print("---------------VIEWS_AND_DATE 2---------------------------")
if views_and_date:
    for idx, e_child_ele in enumerate(views_and_date, start=1):
        some_info = e_child_ele.text
        print(some_info)
        if idx == 1 and some_info:
            MM_collector.update({'views' : some_info})
        elif idx == 3 and some_info:
            MM_collector.update({'date' : some_info})
else: # For descriptionless videos
    try:
        metadata_block = wait.until(EC.presence_of_element_located(
            (By.XPATH, "//ytd-watch-metadata")
        ))
    
        # Search for views and date strings inside that block
        info_spans = metadata_block.find_elements(
            By.XPATH, ".//span[contains(@class, 'style-scope yt-formatted-string')]"
        )
    
        print("---------------VIEWS_AND_DATE----------------------------")
        for idx, span in enumerate(info_spans, start=1):
            text = span.text.strip()
            if "views" in text:
                MM_collector["views"] = text
            elif any(k in text.lower() for k in ["ago", "202", "201", "2025"]):
                MM_collector["date"] = text
            print(f"{idx:02d}: {text}")
    except Exception as e:
        print("‚ùå Could not extract metadata:", e)


## --------------- TITLE ---------------------------------
title = driver.find_elements(By.XPATH, "//yt-formatted-string[@title]")
print("--------------- TITLE ---------------------------------")
for i, el in enumerate(title):
    rect = el.rect
    displayed = el.is_displayed()
    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
    if displayed:
        # Get the first visible element
        clickable_element = el
        break
MM_collector.update({'title' :  clickable_element.get_attribute('title')})

## --------------- LIKE-BUTTON ----------------------------
# First grab all buttons with aria-label attribute
like_button = driver.find_elements(*like_button_path)
print("--------------- LIKE-BUTTON ---------------------------------")
for i, el in enumerate(like_button):
    rect = el.rect
    displayed = el.is_displayed()
    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
    if displayed:
        clickable_element = el
        break

# Then filter with regex
pattern = re.compile(r"^like.*", re.IGNORECASE)
like_info_match = pattern.search(string=clickable_element.get_attribute("aria-label"))
if like_info_match:
    like_string = clickable_element.get_attribute('aria-label')
    print(like_string)
    thousand_comma_separation_pattern = re.compile(r"\d{1,3}(?:,\d{3})*(?:\.\d*)?", re.IGNORECASE)
    number = thousand_comma_separation_pattern.search(string=like_string)
    print(f"Here is the like-count : {correct_number(number.group(0))}")
    MM_collector.update({'likes' : correct_number(number.group(0))})

##---------------DESCRIPTION ----------------------------
# Inspect which elements are interactable
description = driver.find_elements(By.XPATH, "//div[contains(@id, 'expanded')]") # First path
for i, el in enumerate(description):
    rect = el.rect
    displayed = el.is_displayed()
    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
    if displayed:
        clickable_element = el

try:
    # Use relative ".//" to search within nested elements/descendants 'under' descripton[-1]
    many_spans = clickable_element.find_element(By.XPATH, ".//yt-attributed-string[contains(@class, 'ytd-text-inline-expander')]")\
                                  .find_elements(By.XPATH, ".//span[contains(@class, 'yt-core-attributed-string--link-inherit-color')]") # third path
    
    description_list = []
    count = 0
    for e_span in many_spans:
        clean_str = e_span.text.strip()
        description_list.append(clean_str)
        description_string = '\n'.join(description_list)
    
        count += 1
    
    print(f"This is the description:\n{description_string}")
except NoSuchElementException:
    description_string = 'No description has been added to this video'
    
MM_collector.update({'description' : description_string, 
                     'MM_timestamp' : datetime.now().strftime("%m/%d/%Y %H:%M:%S")
                    })

max_num_of_scrolls = None
num_of_scrolls = 0
pause_time=2
max_attempts=2

last_height = driver.execute_script("return document.documentElement.scrollHeight")
same_height_attempts = 0
print("üîÑ Starting infinite scroll...")

while same_height_attempts < max_attempts:
    if num_of_scrolls == max_num_of_scrolls:
        print(f"Max num of scrolls reached. Breaking out.")
        break
    # Scroll to the bottom
    # driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
    
    # Wait for page to load new items
    time.sleep(pause_time)
    
    # (Optional) move browser window using pyautogui
    pyautogui.hotkey('ctrl', 'alt', 'left')
    
    # Wait for page to load new items
    time.sleep(pause_time)
    
    new_height = driver.execute_script("return document.documentElement.scrollHeight")
    
    # Wait for page to load new items
    time.sleep(pause_time)
    
    if new_height == last_height:
        same_height_attempts += 1
        print(f"‚ö†Ô∏è No new content... ({same_height_attempts}/{max_attempts})")
    else:
        same_height_attempts = 0
        last_height = new_height
        print("‚úÖ New content loaded.")
        num_of_scrolls += 1


# Collect all loaded comments
COMMENT_DICT = []
comments = driver.find_elements(*comments_path)

for el in comments:
    emojies_string = []
    # Get any emojis
    emojies = el.find_elements(By.XPATH, ".//img[@alt]")
    for e_emoji in emojies:
        print(e_emoji.get_attribute('alt'))
        actual_emoji = e_emoji.get_attribute('alt')
        emojies_string.append(actual_emoji)
    # Get the replies 
    replies = el.find_elements(*reply_comments_path)
    # print(replies.text)
    small_comment_dict = {}
    # Process the text
    li = el.text.split('\n')
    lit = list(filter(lambda x : not re.search(pattern=r"^Pinned", string=x), li))

    # Deal with unliked comments
    if not re.search(pattern=r'^\d+$', string=lit[-2]):
        print("Unliked comment")
        lit.insert(-1, '0')
    else:
        pass
    print(lit)
        
        
    poster, comment_age, *comment_text_content, like_count, reply = lit
    full_comment = ' '.join(comment_text_content)
    emoji_comment = ' '.join(emojies_string)
    full_comment += emoji_comment
    small_comment_dict.update({'poster_OP' : poster,
                               'comment_age' : comment_age,
                               'full_comment' : full_comment,
                               'like_count' : like_count
                              })
    COMMENT_DICT.append(small_comment_dict)
    print("-----")

# Done can exit window
driver.quit()

# pprint(MM_collector)
# pprint(COMMENT_DICT)

NameError: name 'Service' is not defined

In [4]:
pd.DataFrame(data=COMMENT_DICT)

NameError: name 'pd' is not defined

In [5]:
pd.DataFrame.from_dict(data=pd.Series(MM_collector)).T

NameError: name 'pd' is not defined

In [674]:


import pandas as pd
import numpy as np

data = {'A': [1, 2, 3], 'B': [10, 20, 30]}
df = pd.DataFrame(data)
display(df)

# Apply a function (e.g., sum) along columns
column_sums = df.apply(np.sum, axis=0)
print("Column sums:\n")
display(column_sums)

Unnamed: 0,A,B
0,1,10
1,2,20
2,3,30


Column sums:



A     6
B    60
dtype: int64

In [867]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

driver = webdriver.Chrome()
driver.get("https://www.youtube.com/watch?v=dQw4w9WgXcQ")

# Wait briefly for the page to load
time.sleep(3)

try:
    # Works for watch pages too, sometimes visible in overlay
    duration_element = driver.find_element(
        By.CSS_SELECTOR, 'span.ytp-time-duration'
    )
    print("Video length:", duration_element.text)
except Exception as e:
    print("Could not find duration:", e)

driver.quit()


Video length: 3:33


## Function to create databases

In [1]:
import re
from dateutil.parser import parse
International_date = r"\d{1,2},?\s+[A-Za-z]{3,9},?\s+\d{4}"
US_date = r"[A-Za-z]{3,9},?\s+\d{1,2},?\s+\d{4}"

tc = ['12 Sept, 2023', 'Mar 14 2025', '14 September 2023', 'Sep 1, 2025', 'Premiered on Jan 13 2025', 'Premiered Mar 19, 2024']
for e_tc in tc:
    match = re.search(pattern=rf'\b(?:{International_date}|{US_date})\b$', string=e_tc) #[A-Z-a-z\s]*[^\w]*
    print(match.group(), parse(match.group()).date())
    

print(rf'\b(?:{International_date}|{US_date})\b')

12 Sept, 2023 2023-09-12
Mar 14 2025 2025-03-14
14 September 2023 2023-09-14
Sep 1, 2025 2025-09-01
Jan 13 2025 2025-01-13
Mar 19, 2024 2024-03-19
\b(?:\d{1,2},?\s+[A-Za-z]{3,9},?\s+\d{4}|[A-Za-z]{3,9},?\s+\d{1,2},?\s+\d{4})\b


In [2]:
import re
def correct_number(number_string):
    if re.search(pattern=r"^\d{1,3}(?:,\d{3})*(?:\.\d*)?$", string=number_string):
        temp = number_string.replace(',', '')
        good_float = int(temp)
    return good_float

correct_number('1,000')

1000

In [3]:
import sys
sys.path.append('/Users/Malcolm/Documents/Helper Functions/')
import Read_n_Write as RW # We won't be using the write_list_into_txt

In [4]:
import os
import time 
from selenium import webdriver 
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import subprocess
import pyautogui
# from tabula import read_pdf
# import camelot
import pickle # Save the scraped list of dataframes df into a pick
import time 
from datetime import datetime
import pandas as pd
now = datetime.now()


In [5]:
## Create function 
# Call 
from dateutil.parser import parse
from pytube import YouTube

def scrape_into_2_databases(url_ID, is_print=False):
    """
    """

    url = fr"https://www.youtube.com/watch?v={url_ID}"
    yt = YouTube(url)

    service = Service("/usr/local/bin/chromedriver")
    driver = webdriver.Chrome(service=service)
    driver.get(url)
    
    # (Optional) move browser window using pyautogui
    pyautogui.hotkey('ctrl', 'alt', 'left')
    
    wait = WebDriverWait(driver, 30)
    
    more_info = (By.XPATH, '//tp-yt-paper-button[contains(@id, "expand")]')
    class1 = (By.CLASS_NAME, "button style-scope ytd-text-inline-expander")
    xpath_2 = (By.XPATH, "//yt-formatted-string[contains(@id, 'info')]")
    comments_path = (By.XPATH, "//ytd-comment-view-model[contains(@id, 'comment')]")
    reply_comments_path = (By.XPATH, ".//div[contains(@id, 'collapsed-threads')]")
    like_button_path = (By.XPATH, "//button[contains(@aria-label, 'like')]")
    replies_button = (By.XPATH, "//button[contains(@aria-label, 'replies')]")
    
    
    MM_collector = {}
    COMMENT_DICT = []

    
    # Next we have to wait for the page to load
    try: # Firstly check if the video is NOT Private

        # Next we have to wait for the page to load
        wait.until(
            EC.presence_of_element_located(xpath_2)
        )

    except TimeoutException: # Handle if it is Private
        try:
            wait.until(
                EC.presence_of_element_located((By.XPATH, "//yt-button-renderer[contains(@id, 'button')]"))
            )
             # Still a bit of lag though
            time.sleep(3)
            sign_in_btn = driver.find_elements(By.XPATH, "//yt-button-renderer[contains(@id, 'button')]") 
            for i, el in enumerate(sign_in_btn):
                rect = el.rect
                displayed = el.is_displayed()
                if is_print:
                    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
                if displayed:
                    clickable_element = el
                    
            if clickable_element:
                print('A private video!!')
                MM_collector.update({'video_length' : None,
                                     'date': pd.NaT,
                                     'views':pd.NA,	
                                     'title':None,	
                                     'likes':pd.NA,	
                                     'description':None,
                                     'MM_timestamp':pd.NaT,
                                     'is_commented':None
                                    })
            return pd.DataFrame.from_dict(data=pd.Series(MM_collector)).T, pd.DataFrame(data=COMMENT_DICT)
        except TimeoutException:
            driver.quit()
            time.sleep(60)
            scrape_into_2_databases(url_ID, is_print=False)
            
        
        
    # Still a bit of lag though
    time.sleep(3)

    ## ---------------VIDEO LENGTH----------------------------
    print('## ---------------VIDEO LENGTH----------------------------')
    try:
        # Works for watch pages too, sometimes visible in overlay
        duration_element = driver.find_element(
            By.CSS_SELECTOR, 'span.ytp-time-duration'
        )
        print("Video length:", duration_element.text)
        if duration_element.text:
            MM_collector.update({'video_length' : duration_element.text })
        else:
             MM_collector.update({'video_length' : yt.length})
    except Exception as e:
        print("Could not find duration:", e)
    
    ## ---------------VIEWS_AND_DATE----------------------------
    # Now that we know our element exists, we can click it -THIS DOES NOT WORK
    # Check which elements are actually interactable
    elements = driver.find_elements(*more_info) # use plural
    print("---------------VIEWS_AND_DATE----------------------------")
    
    for i, el in enumerate(elements):
        rect = el.rect
        displayed = el.is_displayed()
        if is_print:
            print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
        if displayed:
            clickable_element = el
    
    # From above we know the last element is the interactable element we can click to expand : driver.find_elements(*more_info)[-1].click()
    # Check if we found any interactable element
    if clickable_element:
        # Scroll into view before clicking (good practice)
        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", clickable_element)
        clickable_element.click()
        print("‚úÖ Clicked visible 'expand' element.")
    
    # Next we have to explicitly wait for the new content to render
    time.sleep(5)
    # Next we have to wait for the page to load
    wait.until(
        EC.presence_of_element_located(more_info) 
    )
    # Next we have to explicitly wait for the new content to render
    time.sleep(3)
    
    # Collect all loaded elements
    views_and_date = driver.find_elements(*xpath_2)[-1].find_elements(By.XPATH, ".//span[contains(@class, 'style-scope yt-formatted-string')]")
    print("---------------VIEWS_AND_DATE 2---------------------------")
    International_date = r"\d{1,2},?\s+[A-Za-z]{3,9},?\s+\d{4}"
    US_date = r"[A-Za-z]{3,9},?\s+\d{1,2},?\s+\d{4}"
    if views_and_date:
        
        for idx, e_child_ele in enumerate(views_and_date, start=1):
            some_info = e_child_ele.text
            views_match = re.search(pattern=r"^\d{1,3}(?:,\d{3})*\s+(?:\w*)?$", string=some_info)
            date_match = re.search(pattern=rf'\b(?:{International_date}|{US_date})\b$', string=some_info) #[A-Z-a-z\s]*[^\w]*
            print(some_info)
            if views_match: # idx == 1 and some_info
                MM_collector.update({'views' : views_match.group()})
            elif date_match:
                MM_collector.update({'date' : date_match.group()})
    else: # For descriptionless videos
        try:
            metadata_block = wait.until(EC.presence_of_element_located(
                (By.XPATH, "//ytd-watch-metadata")
            ))
        
            # Search for views and date strings inside that block
            info_spans = metadata_block.find_elements(
                By.XPATH, ".//span[contains(@class, 'style-scope yt-formatted-string')]"
            )
        
            print("---------------VIEWS_AND_DATE for basic videos----------------------------")
            for idx, span in enumerate(info_spans, start=1):
                some_info = span.text.strip()
                views_match = re.search(pattern=r"^\d{1,3}(?:,\d{3})*\s+(?:\w*)?$", string=some_info)
                date_match = re.search(pattern=rf'\b(?:{International_date}|{US_date})\b$', string=some_info) #[A-Z-a-z\s]*[^\w]*
                
                if views_match: #"views" in text
                    MM_collector.update({'views' : views_match.group()})
                    print(f"{idx:02d}: {views_match.group(0)}")
                elif date_match: 
                    MM_collector.update({'date' : date_match.group()})
                    print(f"{idx:02d}: {date_match.group(0)}")
                elif any(k in some_info.lower() for k in ["ago", "202", "201", "2025"]):
                    MM_collector["date"] = some_info
                    pass
                    print(f"{idx:02d}: {some_info}")
                  
        except Exception as e:
            print("‚ùå Could not extract metadata:", e)

    ## --------------- TITLE ---------------------------------
    title = driver.find_elements(By.XPATH, "//yt-formatted-string[@title]")
    for i, el in enumerate(title):
        rect = el.rect
        displayed = el.is_displayed()
        if is_print:
            print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
        if displayed:
            # Get the first visible element
            clickable_element = el
            break
    MM_collector.update({'title' :  clickable_element.get_attribute('title')})
    ## ---------------LIKE-BUTTON ----------------------------
    # First grab all buttons with aria-label attribute
    like_button = driver.find_elements(*like_button_path)
    for i, el in enumerate(like_button):
        rect = el.rect
        displayed = el.is_displayed()
        if is_print:
            print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
        if displayed:
            clickable_element = el
            break
    
    # Then filter with regex
    pattern = re.compile(r"^like.*", re.IGNORECASE)
    like_info_match = pattern.search(string=clickable_element.get_attribute("aria-label"))
    if like_info_match:
        like_string = clickable_element.get_attribute('aria-label')
        thousand_comma_separation_pattern = re.compile(r"\d{1,3}(?:,\d{3})*(?:\.\d*)?", re.IGNORECASE)
        number = thousand_comma_separation_pattern.search(string=like_string)
        print(f"Here is the like-count : {correct_number(number.group(0))}")
        MM_collector.update({'likes' : correct_number(number.group(0))})
    else: # Need to deal with the case that the video has no likes
        MM_collector.update({'likes' : 0})
    
    ##---------------DESCRIPTION ----------------------------
    # Inspect which elements are interactable
    description = driver.find_elements(By.XPATH, "//div[contains(@id, 'expanded')]") # First path
    for i, el in enumerate(description):
        rect = el.rect
        displayed = el.is_displayed()
        if is_print:
            print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
        if displayed:
            clickable_element = el

    try:
        # Use relative ".//" to search within nested elements/descendants 'under' descripton[-1]
        many_spans = clickable_element.find_element(By.XPATH, ".//yt-attributed-string[contains(@class, 'ytd-text-inline-expander')]")\
                                      .find_elements(By.XPATH, ".//span[contains(@class, 'yt-core-attributed-string--link-inherit-color')]") # third path
        
        description_list = []
        count = 0
        for e_span in many_spans:
            clean_str = e_span.text.strip()
            description_list.append(clean_str)
            description_string = '\n'.join(description_list)
            count += 1
    except NoSuchElementException:
        description_string = 'No description has been added to this video'
    
    print(f"This is the description:\n{description_string}")
    MM_collector.update({'description' : description_string,
                         'MM_timestamp' : datetime.now().strftime("%m/%d/%Y %H:%M:%S")
                        })
    # ---------- SCROLLING -------------------------
    max_num_of_scrolls = None
    num_of_scrolls = 0 # Track the num of scrolls AND HEIGHT REFRESHES/CHANGE
    pause_time = 2
    max_attempts = 1
    
    last_height = driver.execute_script("return document.documentElement.scrollHeight")
    same_height_attempts = 0
    
    # Track how many comment elements are present
    last_comment_count = 0
    
    print("üîÑ Starting infinite scroll...")
    
    is_scrape_comments = True
    
    while same_height_attempts < max_attempts:
        if max_num_of_scrolls and num_of_scrolls >= max_num_of_scrolls:
            print("üõë Max num of scrolls reached. Breaking out.")
            break
    
        # Scroll to bottom
        driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
        time.sleep(pause_time)
    
        # Optional: move window back into view (if YouTube switches focus)
        try:
            pyautogui.hotkey('ctrl', 'alt', 'left')
            time.sleep(0.5)
        except Exception:
            pass
    
        # Check how many comments are currently loaded
        comment_elements = driver.find_elements(By.XPATH, "//ytd-comment-view-model[contains(@id, 'comment')]")
        current_comment_count = len(comment_elements)
    
        print(f"üßÆ Loaded comments so far: {current_comment_count}")
    
        if current_comment_count == last_comment_count:
            same_height_attempts += 1
            print(f"‚ö†Ô∏è No new comments loaded... ({same_height_attempts}/{max_attempts})")
        else:
            print(f"‚úÖ New comments loaded: +{current_comment_count - last_comment_count}")
            same_height_attempts = 0
            last_comment_count = current_comment_count
            num_of_scrolls += 1
    
        # Optional: fallback to scroll height comparison as backup
        new_height = driver.execute_script("return document.documentElement.scrollHeight")
        if new_height == last_height:
            same_height_attempts += 1
        else:
            last_height = new_height

        
        is_scrape_comments = True
        if is_scrape_comments : # WOrk around - Set True early 
            MM_collector.update({'is_commented': True})

        print(f"Current number of scrolls {num_of_scrolls}")
        if num_of_scrolls == 0:
            print('We have scrolled to the bottom, but the height did not change!')
        
            try:
                # ‚úÖ Wait for the comment section header to appear
                wait = WebDriverWait(driver, 10)
                clickable_element = wait.until(
                    EC.presence_of_element_located((By.XPATH, "//ytd-comments-header-renderer[contains(@class, 'style-scope')]"))
                )
                print("‚úÖ Comment header detected.")
            except TimeoutException:
                print("‚ö†Ô∏è No comment header found ‚Äî likely disabled comments or comments not loaded yet.")
                MM_collector.update({'is_commented': None})
                is_scrape_comments = False
                clickable_element = None
        
            if clickable_element:
                print("-----------COMMENTS-----------------------")
        
                # Print child elements to inspect structure
                all_yt_strings = clickable_element.find_elements(By.XPATH, ".//yt-formatted-string[contains(@class, 'count-text')]")
                for idx, e in enumerate(all_yt_strings):
                    print(f"[{idx}] -> '{e.text}' | class='{e.get_attribute('class')}' | id='{e.get_attribute('id')}'")
        
                # --- Detect comment count text ---
                hope_text_list = clickable_element.find_elements(
                    By.XPATH, ".//yt-formatted-string[contains(@class, 'count-text')]"
                )
        
                if hope_text_list:
                    for e_text in hope_text_list:
                        text_clean = e_text.text.strip().replace('\u202f', ' ').replace('\xa0', ' ')
                        print(f"üîç Comment count text: '{text_clean}'")
        
                        match = re.search(r'^(\d+)\s*[^\w]*[Cc]omments', text_clean)
                        if match:
                            count = match.group(1)
                            print(f"‚úÖ Matched comment count: {count}")
                            if count == '0':
                                print('üö´ Uncommented video')
                                MM_collector.update({'is_commented': False})
                                is_scrape_comments = False
                            else:
                                # Some reason not working 
                                print('üí¨ Commented video')
                                MM_collector.update({'is_commented': True})
                            break
                else:
                    print('‚ùå Disabled comments video')
                    MM_collector.update({'is_commented': None})
                    is_scrape_comments = False
                    
    
    COMMENT_DICT = []
    if is_scrape_comments:
        
        # Collect all loaded comments
        comments = driver.find_elements(*comments_path)
        
        for el in comments:
            emojies_string = []
            # Get any emojis
            emojies = el.find_elements(By.XPATH, ".//img[@alt]")
            for e_emoji in emojies:
                # print(e_emoji.get_attribute('alt'))
                actual_emoji = e_emoji.get_attribute('alt')
                emojies_string.append(actual_emoji)
            # Get the replies 
            replies = el.find_elements(*reply_comments_path)
            # print(replies.text)
            small_comment_dict = {}
            # Process the text
            li = el.text.split('\n')
            lit = list(filter(lambda x : not re.search(pattern=r"^Pinned", string=x), li))
        
            # Deal with unliked comments
            found_num = re.search(pattern=r'^\d{1,3}$', string=lit[-2]) # Checks if the second last element is a number
            if not found_num:
                print("Unliked comment")
                lit.insert(-1, '0')
            else:
                pass
            print(lit)
            poster, comment_age, *comment_text_content, like_count, reply = lit
            full_comment = ' '.join(comment_text_content)
            emoji_comment = ' '.join(emojies_string)
            full_comment += emoji_comment
            small_comment_dict.update({'poster_OP' : poster,
                                       'comment_age' : comment_age,
                                       'full_comment' : full_comment,
                                       'like_count' : like_count
                                      })
            COMMENT_DICT.append(small_comment_dict)
            # print("-----")
        # # Done can exit window
        driver.quit()
        return pd.DataFrame.from_dict(data=pd.Series(MM_collector)).T, pd.DataFrame(data=COMMENT_DICT)
    else:
        # # Done can exit window
        driver.quit()
        return pd.DataFrame.from_dict(data=pd.Series(MM_collector)).T, pd.DataFrame(data=COMMENT_DICT)
        
## Invoke 
# MM_df, comment_df = scrape_into_2_databases(url_ID='D_O6KXM7GwY') ## zOufUbKXjTY

## Data cleaning

In [6]:
from datetime import datetime, timedelta
import re
from datetime import timedelta
from dateutil import parser

def parse_relative_time(text):
    """
    Auxillary function 
    """
    # Match patterns like "25 minutes ago"
    match = re.match(r'(\d+)\s+(\w+)', text)
    if not match:
        return pd.NaT  # If format doesn't match, return missing
    
    value, unit = match.groups()
    value = int(value)
    unit = unit.lower()
    
    # Handle plural forms
    if unit.endswith('s'):
        unit = unit[:-1]
    
    # Convert to timedelta (approximate for months/years)
    if unit == 'year':
        return timedelta(days=value * 365)
    elif unit == 'month':
        return timedelta(days=value * 30)
    elif unit == 'week':
        return timedelta(weeks=value)
    elif unit == 'day':
        return timedelta(days=value)
    elif unit == 'hour':
        return timedelta(hours=value)
    elif unit == 'minute':
        return timedelta(minutes=value)
    elif unit == 'second':
        return timedelta(seconds=value)
    else:
        return pd.NaT

# invoke
parse_relative_time('25 minutes ago')

datetime.timedelta(seconds=1500)

In [7]:
## Data clea
def cast_datatypes(MM_df, comment_df):
    # print(f"is_commented_value : {is_commented}")
    try: # Outer try block: might encounters if video is private
        MM_df.replace({ 'views': { r'^([\d\,]+)\s(\w+)': r'\1' } }, regex=True, inplace=True)
        try: # Put app apply functions in try block to prevent errors on reruns
            # Views
            MM_df['views'] = MM_df['views'].apply(correct_number) 
            # Date
            MM_df['date'] =  MM_df['date'].apply(parse) #pd.to_datetime(MM_df['date'], format='%d %b %Y') 
            # MM_timestamp
            MM_df['MM_timestamp'] = MM_df['MM_timestamp'].apply(parse)
    
            if MM_df.loc[0, 'is_commented']:
                try: # There can be instances like 'rhPIKZgQVZo'
                    comment_df['like_count'] = comment_df['like_count'].astype(int)
                except KeyError:
                    pass
                pass
        except TypeError: # TypeError: expected string or bytes-like object
            pass
        finally:
            # Views
            MM_df['views'] = MM_df['views'].astype(int) # pd.to_numeric(MM_df['views'])
            # Likes
            MM_df['likes'] =  MM_df['likes'].astype(int)
        
        if MM_df.loc[0, 'is_commented']:
            try: # There can be instances 'rhPIKZgQVZo'
                comment_df['like_count'] = comment_df['like_count'].astype(int)
            except KeyError:
                MM_df.loc[0, 'is_commented'] = 'Type 1 Error'
                pass
        else:
            pass
    except ValueError:
        pass


    return MM_df, comment_df

In [8]:
# # # Invoke 
# # lOMrlMb611s
# a, b = scrape_into_2_databases(url_ID='D_O6KXM7GwY', is_print=True) #sIYNr_B1TEQ #QjXMxlR8zKo #3RKCWeOpip0 
# MM_df, comment_df = cast_datatypes(a, b)

## Attempt

In [9]:
# Reading the file - unpickling
import pickle
with open('./Data/overview_youtube_2.pkl', 'rb') as fp:
    overview_df = pickle.load(fp) # New and improved 
overview_df

Unnamed: 0,ID,mail,Scraped_DateTime,phone,language,privacy_public,info_outline,person_radar,my_videos,Videos_Not_Shorts,trending_up
0,@AFFIN,,2025-10-23 19:19:33.602173,,www.youtube.com/@AFFIN,Malaysia,Joined 18 Jun 2014,4.9K subscribers,325 videos,221,"15,050,294 views"
1,@AllianceBankMY,,2025-10-23 19:19:49.725717,,www.youtube.com/@AllianceBankMY,Malaysia,Joined 15 Aug 2013,8.89K subscribers,398 videos,356,"55,406,213 views"
2,@AmBankTV,,2025-10-23 19:20:05.871238,,www.youtube.com/@AmBankTV,Malaysia,Joined 1 Jul 2013,7.35K subscribers,432 videos,362,"27,976,034 views"
3,@bangkokbankchannel,,2025-10-23 19:20:22.260778,,www.youtube.com/@bangkokbankchannel,Thailand,Joined 17 Apr 2012,28.8K subscribers,124 videos,98,"8,970,431 views"
4,@BankofAmerica,,2025-10-23 19:20:38.180865,,www.youtube.com/@BankofAmerica,United States,Joined 18 Mar 2006,122K subscribers,169 videos,102,"788,249 views"
5,@BankofChinaManila,,2025-10-23 19:20:54.087877,,www.youtube.com/@BankofChinaManila,Philippines,Joined 19 Oct 2020,461 subscribers,8 videos,8,"38,112 views"
6,@labanquedunmondequichange,,2025-10-23 19:21:10.391375,,www.youtube.com/@labanquedunmondequichange,France,Joined 28 Jun 2006,57.9K subscribers,"2,815 videos",2579,"220,818,912 views"
7,@MyBoostApp,,2025-10-23 19:21:26.614858,,www.youtube.com/@MyBoostApp,Malaysia,Joined 15 Nov 2016,70.2K subscribers,611 videos,350,"106,143,812 views"
8,@CIMBGroupHoldingsBhd,,2025-10-23 19:22:00.576527,,www.youtube.com/@CIMBGroupHoldingsBhd,Malaysia,Joined 7 Sept 2010,67.6K subscribers,517 videos,468,"175,834,466 views"
9,@Citi,,2025-10-23 19:22:17.412780,,www.youtube.com/@Citi,,Joined 24 Oct 2005,94.2K subscribers,"1,680 videos",1663,"176,269,397 views"


In [10]:
# Convert the text data into Python list
# This data is a textfile of YT_Handle.pkl - if appeneded this YT_CHannel id done
memoized_channels = RW.read_txt_into_list('memo_database_scrape') # touch this file beforehand
memoized_IDs = RW.read_txt_into_list('memo_VideoID_scrape')
## Test one
import os
import re
from tqdm import tqdm
directory = r"./Data/Database/"
with open("./Data/memo_database_scrape.txt", "a") as writer: # touch this file beforehand
    for e_pickle in os.listdir(directory):
        if e_pickle not in memoized_channels:
            match = re.search(pattern=r"^(@\w+)\.pkl", string=e_pickle) # Match the Youtube_Handle; @AFFFIN, exclude hidden files
            if match:
                YT_handle = match.group(1)
                print(f'Starting the process for {YT_handle}')
                fullpath = os.path.normpath(os.path.join(directory, e_pickle))
                with open(fullpath, 'rb') as fp:
                    # Retrieve the dataframe - read it back
                    df = pickle.load(fp)
                    print(f"This {YT_handle}  has {df.shape[0]} video_IDs")


                    ## -------- Unpickle MEMO A ---------------
                    # Pick up where we left off 
                    if os.path.exists(fr'./Data/Lists/MM_temp/{YT_handle}.pkl'):
                        # Reading the file - unpickling
                        with open(fr'./Data/Lists/MM_temp/{YT_handle}.pkl', 'rb') as memo_1:
                            MM_BIG_LIST = pickle.load(memo_1)
                            print(f"Succesfully loaded pickle from : {f'./Data/Lists/MM_temp/{YT_handle}.pkl'}")
                    else:
                        MM_BIG_LIST = [] # To regather the data

                    if os.path.exists(fr'./Data/Lists/COMMENT_temp/{YT_handle}.pkl'):
                        # Reading the file - unpickling
                        with open(fr'./Data/Lists/COMMENT_temp/{YT_handle}.pkl', 'rb') as memo_2:
                            COMMENT_BIG_LIST = pickle.load(memo_2)
                            print(f"Succesfully loaded pickle from : {f'./Data/Lists/COMMENT_temp/{YT_handle}.pkl'}")
                    else:
                        COMMENT_BIG_LIST = [] # To regather the data
                    
                    ## -------- Unpickle MEMO A ---------------

                    progress_count = 0
                    with open("./Data/memo_VideoID_scrape.txt", "a") as small_writer:
                        for idx, row_dict in tqdm(df.to_dict(orient='index').items()):
                            print(f"Currently scraping for {row_dict['Youtube_Video_IDs']}")
                            if row_dict['Youtube_Video_IDs'] not in memoized_IDs:
                                ## Invoke the function to generate 2 dataframes
                                a, b = scrape_into_2_databases(url_ID=row_dict['Youtube_Video_IDs'])
                                MM_df_oneline, COMMENT_df = cast_datatypes(a, b)
                                # # Good so far, we will need to add the primary keys back
                                # display(MM_df_oneline)
                                # display(COMMENT_df)
                
                                # Insert the two series one after the other.
                                MM_df_oneline.insert(loc=0, column='Youtube_Handle', value=row_dict['Youtube_Handle'])
                                MM_df_oneline.insert(loc=0, column='Youtube_Video_IDs', value=row_dict['Youtube_Video_IDs'])
                
                                # Just the ID for the comment_df
                                COMMENT_df.insert(loc=0, column='Youtube_Handle', value=row_dict['Youtube_Handle'])
                                COMMENT_df.insert(loc=0, column='Youtube_Video_IDs', value=row_dict['Youtube_Video_IDs'])
                            
                
                                MM_dict = MM_df_oneline.to_dict(orient='index') # One solitary index
                                COMMENT_dict = COMMENT_df.to_dict(orient='index')
                
                                # Good so far, we will need to add the primary keys back
                                display(MM_df_oneline)
                                display(COMMENT_df)
                                
                                # Write in-disk, across sessions
                                small_writer.write(f"{row_dict['Youtube_Video_IDs']}\n")
                                small_writer.flush()
                                # Write in-memory, within the same session
                                memoized_IDs.append(row_dict['Youtube_Video_IDs'])
                                
                                MM_BIG_LIST.append(MM_dict)
                                COMMENT_BIG_LIST.append(COMMENT_dict)


                                ## -------- Pickle MEMO A ---------------
                                print(f"Saving...")
                                ## Save progrss
                                # Write into a file everytime
                                with open(fr'./Data/Lists/MM_temp/{YT_handle}.pkl', 'wb') as save_1:
                                    pickle.dump(MM_BIG_LIST, save_1)
                                with open(fr'./Data/Lists/COMMENT_temp/{YT_handle}.pkl', 'wb') as save_2:
                                    pickle.dump(COMMENT_BIG_LIST, save_2)
                                ## -------- Pickle MEMO A ---------------
                                 
                                progress_count += 1
                            else:
                                print(f"Skiiping {row_dict['Youtube_Video_IDs']}, already memoized")
                                progress_count += 1
                            print(f"Done {progress_count}/{df.shape[0]}")
        
                    ## Post-processing
                    print(f'Performing post-processing for {YT_handle}')
                    LIST = []
                    for subdict in MM_BIG_LIST:
                        for idx, subdict_2 in subdict.items():
                            LIST.append(subdict_2)
                    MM_dataframe = pd.DataFrame.from_dict(LIST)
        
                    LIST2 = []
                    for subdict in COMMENT_BIG_LIST:
                        for idx, subdict_2 in subdict.items():
                            LIST2.append(subdict_2)
                    COMMENT_dataframe = pd.DataFrame.from_dict(LIST2)
                    
                    ## Write MM_table 
                    with open(fr'./Data/MM_database/{match.group(1)}.pkl', 'wb') as file_1:
                        pickle.dump(MM_dataframe, file_1)
                    ## Write COmment_table 
                    with open(fr'./Data/COMMENT_database/{match.group(1)}.pkl', 'wb') as file_2:
                        pickle.dump(COMMENT_dataframe, file_2) 

                    # Write in-disk, across sessions
                    writer.write(f"{e_pickle}\n")
                    writer.flush()
                    # Write in-memory, within the same session
                    memoized_channels.append(e_pickle)
                    
        else:
            print(f"Skiiping {e_pickle}, already memoized")
            pass            

Skiiping @channelocbc.pkl, already memoized
Skiiping @AllianceBankMY.pkl, already memoized
Skiiping @labanquedunmondequichange.pkl, already memoized
Skiiping @PublicBankGroup.pkl, already memoized
Skiiping @CIMBGroupHoldingsBhd.pkl, already memoized
Skiiping @smbcgroup.pkl, already memoized
Skiiping @BankofChinaManila.pkl, already memoized
Skiiping @GXBank.pkl, already memoized
Skiiping @Citi.pkl, already memoized
Skiiping @HSBC_MY.pkl, already memoized
Skiiping @standardchartered.pkl, already memoized
Skiiping @rhbgroup.pkl, already memoized
Skiiping @jpmorgan.pkl, already memoized
Skiiping @MizuhoAmericas.pkl, already memoized
Skiiping @AmBankTV.pkl, already memoized
Skiiping @BankofAmerica.pkl, already memoized
Skiiping @MyBoostApp.pkl, already memoized
Skiiping @DeutscheBank.pkl, already memoized
Skiiping @AFFIN.pkl, already memoized
Skiiping @maybankvideos.pkl, already memoized
Skiiping @hongleongbankmy.pkl, already memoized
Skiiping @bangkokbankchannel.pkl, already memoized
Skiip

## Pickle Collate

In [11]:
## Read back data
# Takes a while to read the pickle
import pickle
# Reading the file - unpickling

# Pure MM and Contents
MM_dir = r'./Data/MM_database/'
MM_contents = os.listdir(r'./Data/MM_database/')
COMMENT_dir = r'./Data/COMMENT_database/'
COMMENT_contents = os.listdir(r'./Data/COMMENT_database/')

## Temp folders
MM_temp_dir = fr'./Data/Lists/MM_temp/'
MM_temp_contents = os.listdir(MM_temp_dir)
COMMENT_temp_dir = fr"./Data/Lists/COMMENT_temp/"
COMMENT_temp_contents = os.listdir(COMMENT_temp_dir)

print(MM_temp_contents)

COLLATE_MM = pd.DataFrame()
COLLATE_COMMENT = pd.DataFrame()
for file in MM_temp_contents:
    match = re.search(pattern=r"^[^\.]", string=file)
    if match:
        print('Metadata:\n')
        with open(os.path.normpath(os.path.join(MM_temp_dir, file)), 'rb') as fp:
            MM_BIG_LIST = pickle.load(fp) # e_MM_df or MM_BIG_LIST
            # display(e_MM_df) 
            LIST = []
            for subdict in MM_BIG_LIST:
                for idx, subdict_2 in subdict.items():
                    LIST.append(subdict_2)
            MM_dataframe = pd.DataFrame.from_dict(LIST)
            COLLATE_MM = pd.concat([COLLATE_MM, MM_dataframe], axis='index')
            display(pd.concat([MM_dataframe.head(), MM_dataframe.tail()]))

            
            
        print('Comments:\n')
        print(COMMENT_contents)
        with open(os.path.normpath(os.path.join(COMMENT_temp_dir, file)), 'rb') as fp_2:
            COMMENT_BIG_LIST = pickle.load(fp_2)  # e_COMMENT_df or COMMENT_BIG_LIST
            # display(e_COMMENT_df)
            LIST2 = []
            for subdict in COMMENT_BIG_LIST:
                for idx, subdict_2 in subdict.items():
                    LIST2.append(subdict_2)
            COMMENT_dataframe = pd.DataFrame.from_dict(LIST2)
            COLLATE_COMMENT = pd.concat([COLLATE_COMMENT, COMMENT_dataframe], axis='index')
            display(pd.concat([COMMENT_dataframe.head(), COMMENT_dataframe.tail()]))


COLLATE_MM.reset_index(inplace=True, drop=True)
COLLATE_COMMENT.reset_index(inplace=True, drop=True)

# Write into COLLATE folder
with open('./Data/COLLATE/MM.pkl', 'wb') as f:
    pickle.dump(COLLATE_MM, f)
with open('./Data/COLLATE/COMMENT.pkl', 'wb') as f:
    pickle.dump(COLLATE_COMMENT, f)

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']
Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,LiFxYCLSvb4&pp=0gcJCQYKAYcqIYzv,@channelocbc,1:07,90,2025-10-15,OCBC Structured Notes: Customise Your Risk and...,3,Enjoy the flexibility of attaining exposure to...,2025-11-02 22:59:13,False
1,N9SBRNEAnhI,@channelocbc,2:11,514,2025-10-10,OCBC - Banking on Us - Episode 1,16,"While the city sleeps, our teams are wide awak...",2025-11-02 22:59:33,False
2,PLqT8W6JNDY,@channelocbc,1:47,104,2025-10-03,OCBC One Connect - Integrated Ecosystem Suppor...,1,OCBC One Connect - Integrated Ecosystem Suppor...,2025-11-02 22:59:52,False
3,4UBrl92xXOU,@channelocbc,1:59,79932,2025-10-01,Behind the scenes of our 2025 brand campaign ‚Äì...,7,Watch how our latest brand campaign stories ge...,2025-11-02 23:00:12,False
4,cYtlA9Q7UAo,@channelocbc,4:18,178,2025-09-30,OCBCËèØÂÉëÈäÄË°åÂ∏ÇÂ†¥ÁÜ±Èªû: È†êÊúü‰ªäÂπ¥Ê∏õÊÅØ‰∏âÊ¨° (22-09-2025),2,OCBCËèØÂÉëÈäÄË°åË≤°ÂØåÁÆ°ÁêÜÂ∞àÂÆ∂ÂúòÈöäÈ†êÊúüÔºåÁæéÂúãÂ∞±Ê•≠Êï∏ÊìöÁñ≤Âº±ÔºåÂ∞á‰øÉ‰ΩøËÅØÂÑ≤Â±ÄÂä†Âø´Ê∏õÊÅØÔºåÈ†êÊúüËÅØÂÑ≤Â±ÄÂ∞áÊñº...,2025-11-02 23:00:32,False
819,x77sEC98AxI,@channelocbc,1:35,1073,2018-04-17,Gastronomic Adventures featuring Sushi Kimura ...,3,"At Sushi Kimura, expect to enjoy seasonally cu...",2025-11-07 02:15:41,False
820,30w8gUkZEoQ,@channelocbc,3:49,126,2018-04-10,Project Training Wheels S3 E1: On your bikes...,0,A new group of 16 non-cyclists attempt to lear...,2025-11-07 02:15:58,False
821,7gg3e7mlWcM,@channelocbc,1:50,689,2018-04-09,Campus Star Awards 2018: Recognising Our Inter...,0,"Each year, Campus At OCBC appreciates our inte...",2025-11-07 02:16:14,True
822,q81Zpm5oDNQ,@channelocbc,0:20,702,2018-04-09,Bank with your voice today!,1,OCBC Bank is the first in Singapore to let you...,2025-11-07 02:16:33,False
823,AqrZxrIqsJM,@channelocbc,1:14,1725,2018-04-03,Game Changers at OCBC | Group Operations & Tec...,4,"Within our team, it‚Äôs the people that make the...",2025-11-07 02:16:50,False


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count,Youtube_Handle
0,FmQz15WJkWE,@tinkerlee-r9z,3 weeks ago,BECAREFUL WHAT YOU WISH FOR - CIRCUS ???!...,0,
1,zb3oKcLv1yA,@kayanemahberkah,1 month ago,kak cara pindahin ocbc mobile dari hp lama ke ...,0,
2,Y6GQJUyjDO4,@VikkiNesspara-f2u,1 month ago,happy 60th birthday Singapore üá∏üá¨üá∏üá¨üá∏üá¨Ô∏èüá∏üá¨Ô∏èÔ∏èüá∏üá¨Ô∏èüá∏üá¨...,0,
3,GGbcl2AcSBU,@RisNandar-c9h,2 months ago,OCBC ‚ù§ ‚ù§,0,
4,meamip7zfzI,@BozuBoyYT,11 days ago,The person doing the voice over is so good!,2,
323,mpbVv4swkzs&pp=0gcJCQYKAYcqIYzv,@jaketan5172,3 years ago,Most value-adding contrarian and yet not liste...,0,@channelocbc
324,avodcx-LQpo,@zueanzuean2330,4 years ago,After transfer why didn't have receipt,0,@channelocbc
325,-9TXIjL-hXo,@kokbontan206,2 years ago,ËØ∑ÈóÆÊàë‰ª•ÂâçÊîæÂú®Èì∂Ë°åÁöÑÂ≠òÊ¨æËøòËÉΩÊãøÂõûÊù•ÂêóÔºü,0,@channelocbc
326,NPq2Tr5J76E,@gladeglades6388,1 year ago,If OCBC considered presenting these 2 in publi...,0,@channelocbc
327,7gg3e7mlWcM,@ngankhayseng7905,7 years ago,0:33 (This is a brain) OCBC: This is a brain m...,0,@channelocbc


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,_lMssbgr7gE,@AllianceBankMY,0:39,80,2025-09-01,Young Executives Sustainability Summit 2025,1,We are proud to be the Young Executives Sustai...,2025-11-02 23:35:27,
1,V7Vd7Rs65Rw,@AllianceBankMY,0:15,594450,2025-08-17,THE ULTIMATE ELEVATOR PITCH‚Ñ¢,4,THE ULTIMATE ELEVATOR PITCH‚Ñ¢ sets the stage fo...,2025-11-02 23:36:08,
2,xFBxisokdQ8,@AllianceBankMY,0:15,708492,2025-08-17,THE ULTIMATE ELEVATOR PITCH‚Ñ¢,2,THE ULTIMATE ELEVATOR PITCH‚Ñ¢ sets the stage fo...,2025-11-02 23:36:49,
3,IzJxsEIvNr0,@AllianceBankMY,,4257361,2025-08-17,THE ULTIMATE ELEVATOR PITCH‚Ñ¢,3897,THE ULTIMATE ELEVATOR PITCH‚Ñ¢ sets the stage fo...,2025-11-02 23:37:29,
4,gom4PuhRcow,@AllianceBankMY,7:30,122,2025-07-30,FY25 Highlights Video,5,No description has been added to this video,2025-11-02 23:38:10,
351,lGsm4jqhpnI,@AllianceBankMY,2:05,1039,2013-10-01,Alliance Bank BizSmart Challenge 2013 | Finali...,7,Introducing the finalists:\n1) Bloc Forty Eigh...,2025-11-04 18:30:29,True
352,O3CCSjG0NBI,@AllianceBankMY,2:14,787,2013-10-01,Alliance Bank BizSmart Challenge 2013 | Finali...,6,Introducing the finalists:\n1) Smooshie Juice\...,2025-11-04 18:30:49,True
353,7pB_uwqYGoA,@AllianceBankMY,14:21,347,2013-09-10,"BFM Interviews Steve Miller, Head of Group Bus...",0,"Steve Miller, Head of Group Business Banking, ...",2025-11-04 18:31:08,False
354,Bai4FR7Oc_g,@AllianceBankMY,0:35,1111,2013-08-22,Alliance Bank BizSmart Challenge 2013 | Radio ...,1,Got a great business plan but have yet to actu...,2025-11-04 18:31:28,False
355,-BwwqLthVVg,@AllianceBankMY,1:02,257199,2013-08-15,How the Claypot Tycoon Did It | Alliance Bank ...,0,Got a great business plan but have yet to actu...,2025-11-04 18:31:48,False


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,P6R1IPsSu_o,@AllianceBankMY,@suhanjayalian5044,9 years ago,first,2
1,Eey978-FzNE,@AllianceBankMY,@pointcollect,9 years ago,That's very insightful !,1
2,xcmbKTn-Ipo&pp=0gcJCQYKAYcqIYzv,@AllianceBankMY,@ImMochiAlyyyyy,10 years ago,mingyue xD hahahahha TOO CUTE,0
3,xcmbKTn-Ipo&pp=0gcJCQYKAYcqIYzv,@AllianceBankMY,@useroosstt,10 years ago,Awwhh it's Ming Yue! <3,0
4,JnEjs5YowIg,@AllianceBankMY,@blackcookie1988,9 years ago,"Hi A.Bank, please let me know why I can't tran...",0
128,ejhYRD_UV1Q,@AllianceBankMY,@misserene9,12 years ago,Can't wait to see what Christy Ng Shoes does n...,0
129,ejhYRD_UV1Q,@AllianceBankMY,@MrChongkahtze,11 years ago,Gary Chong and his signature hat behind the sc...,0
130,lGsm4jqhpnI,@AllianceBankMY,@aaronwwsum,12 years ago,Congrats everyone!,0
131,O3CCSjG0NBI,@AllianceBankMY,@aaronwwsum,12 years ago,Congrats and all the best!,0
132,O3CCSjG0NBI,@AllianceBankMY,@elaine7an,12 years ago,Go Smooshieeeee!,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,3S-E702D-c4,@labanquedunmondequichange,0:15,1518580,2025-10-24,Multi √©pargne MA,2,campagne essentielle epargne,2025-11-03 00:37:52,True
1,xMBJOEU29Zw,@labanquedunmondequichange,0:30,652176,2025-10-13,"BNP Paribas - H√° 40 anos, uma gera√ß√£o em movim...",6,No description has been added to this video,2025-11-03 00:38:11,False
2,mdqMhRr_v3U,@labanquedunmondequichange,16:13,672,2025-10-10,UnexpectedPeople #5 : S'engager en tant qu' ac...,1,"Dans ce cinqui√®me √©pisode, rencontrez Juergen ...",2025-11-03 00:38:31,False
3,PptbLdnUaoM,@labanquedunmondequichange,19:18,222,2025-10-10,UnexpectedPeople #5: Getting involved as profe...,0,"In this fifth episode, meet Juergen Petrasch, ...",2025-11-03 00:38:51,False
4,duhODksusCA,@labanquedunmondequichange,0:10,374732,2025-09-22,Banque au quotidien - Wero,3,"R√©alisez des virements rapides et gratuits, sa...",2025-11-03 00:39:11,False
2574,qeUzVF824Yk,@labanquedunmondequichange,2:12,293,2011-08-11,PRECLIDE (english),0,M√©c√©nat climatique : programme de recherche PR...,2025-11-09 03:53:15,False
2575,zbNrhPBUlNg,@labanquedunmondequichange,1:57,342,2011-08-10,AAA : Access to climate Archives despite Asbestos,0,M√©c√©nat climatique : programme de recherche AA...,2025-11-09 03:53:31,False
2576,xv4wxAiu8CE&pp=0gcJCQYKAYcqIYzv,@labanquedunmondequichange,1:56,805,2011-08-10,"AAA, Climate Initiative (french)",1,M√©c√©nat climatique : programme de recherche AA...,2025-11-09 03:53:48,False
2577,H1Trutqy0kQ,@labanquedunmondequichange,3:39,31,2011-08-03,BlogTV: a humanitarian experience in Africa,0,"Asoociation ARU, Action, R√©ussite & Unit√©\nhtt...",2025-11-09 03:54:04,False
2578,pvMXbEBPpxA,@labanquedunmondequichange,3:38,83,2011-08-03,BlogTV : une exp√©rience humaine en Afrique,0,"ARU, Action, R√©ussite & Unit√©, une exp√©rience ...",2025-11-09 03:54:20,False


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,3S-E702D-c4,@labanquedunmondequichange,@mickaellamoureux5392,1 day ago,Accordez nous plut√¥t vos offres sp√©ciales gros...,0
1,oR2zsinBhFA,@labanquedunmondequichange,@BICLHQ1,1 month ago,üòä,1
2,oR2zsinBhFA,@labanquedunmondequichange,@BICLHQ1,1 month ago,üëç,0
3,oR2zsinBhFA,@labanquedunmondequichange,@BICLHQ1,1 month ago,üòä,0
4,i9fdbZ-L_hA,@labanquedunmondequichange,@Angelboy99,1 month ago,Mountain data centers is a good idea but any d...,0
1573,XvcOymspIj8,@labanquedunmondequichange,@83999,4 years ago,Merci √† tous ceux qui aident ces personnes en ...,0
1574,XvcOymspIj8,@labanquedunmondequichange,@ascensi,13 years ago,je dis bravau !,0
1575,7NN2hq53ZEk,@labanquedunmondequichange,@brunodegres3529,7 years ago,Nouvelles technologies >>> plus de 4 mois pou...,0
1576,4uMTFsNBf5g,@labanquedunmondequichange,@chriscastillo7549,10 years ago,Je participais au Jumbo - Run. Une grande sol...,0
1577,GsOYdqkITX0,@labanquedunmondequichange,@latifaelyakoubi1134,4 years ago,Bravo madame,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,7GJgJUQ42Y4,@PublicBankGroup,30:47,765,2025-09-11,PB Scam Rangers Webinar Series: Loan Scam,0,PB Scam Rangers Webinar Series: Loan Scam,2025-11-03 01:16:26,
1,TnqmRE2S_Qs&pp=0gcJCQYKAYcqIYzv,@PublicBankGroup,0:51,374,2025-07-24,PB Idaman: Your Takaful Protection Plan for Yo...,0,"Plan ahead today, so you're prepared for every...",2025-11-03 01:19:13,
2,zidZFWsoWnU&pp=0gcJCQYKAYcqIYzv,@PublicBankGroup,0:45,879,2025-06-04,MyPB App Guide - Card Payments (MyPB App),0,Getting started with MyPB App can seem challen...,2025-11-03 01:19:40,
3,zdE_EIWCG1c,@PublicBankGroup,1:03,256,2025-06-04,MyPB App Guide - Prepaid Top Up (MyPB App),0,Getting started with MyPB App can seem challen...,2025-11-03 01:20:20,
4,tpUXALvJU-k,@PublicBankGroup,1:12,1210,2025-06-04,MyPB App Guide - Step 2 : Getting Started on M...,0,Getting started with MyPB App can seem challen...,2025-11-03 01:21:01,
201,NAoquLRGc-s,@PublicBankGroup,0:35,2465,2016-02-22,Owning a car made easier,0,No description has been added to this video,2025-11-04 02:40:26,
202,akLn0Qvev3M,@PublicBankGroup,0:35,2247,2016-02-22,Enjoy priority banking services with Premier S...,0,No description has been added to this video,2025-11-04 02:41:07,
203,YtU2KRuBVTw,@PublicBankGroup,2:24,15976,2015-07-14,Public Bank Visa Commercial Card,0,The one card that helps your business operatio...,2025-11-04 02:41:47,
204,Kkgg1-kY4HE,@PublicBankGroup,9:20,594,2015-06-29,"The Asian Banker: William ""Bill"" Seidman Lifet...",0,No description has been added to this video,2025-11-04 02:42:27,
205,m2rnVUVCQgA,@PublicBankGroup,2:04,16756,2015-06-25,IBG and IBFT via ATM,0,IBG and IBFT via ATM,2025-11-04 02:43:07,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,YueA7snetLY,@CIMBGroupHoldingsBhd,0:32,95,2025-10-20,Kita Bagi Jadi | Jaga Duit | CIMB Malaysia,4,From learning to manage your money wisely with...,2025-11-03 02:25:25,True
1,VBiJ-hHYeo0,@CIMBGroupHoldingsBhd,5:26,338,2025-10-15,Making the Journey of our SMEs more Powerful w...,31,Not all heroes wear capes. Many are our brave ...,2025-11-03 02:25:45,True
2,rhPIKZgQVZo&pp=0gcJCQYKAYcqIYzv,@CIMBGroupHoldingsBhd,0:36,114,2025-10-13,Kita Bagi Jadi | SME | CIMB Malaysia,6,Whether you‚Äôre just starting out or scaling up...,2025-11-03 02:26:05,Type 1 Error
3,bhGsQRa3ESY,@CIMBGroupHoldingsBhd,1:32,236,2025-10-09,CIMB OCTO Biz | CIMB Malaysia,7,"Move your business forward with OCTO Biz, the ...",2025-11-03 02:26:25,False
4,M6knHi8fOyw,@CIMBGroupHoldingsBhd,5:25,189768,2025-10-07,CIMB Artober | Datuk Ibrahim Hussein Showcase ...,31,A tribute to one of Malaysia‚Äôs legendary artis...,2025-11-03 02:26:44,True
463,EkCY842PEtY,@CIMBGroupHoldingsBhd,0:31,9680,2014-06-13,CIMB Octo's Pic,27,No description has been added to this video,2025-11-04 19:23:32,False
464,J7OPAMPrCic,@CIMBGroupHoldingsBhd,1:54,19256,2014-04-16,CIMB Preferred Visa Infinite,51,Introducing the CIMB Preferred Visa INFINITE c...,2025-11-04 19:23:52,True
465,Zgb8agaWNGI,@CIMBGroupHoldingsBhd,0:42,1787,2013-07-04,CIMB Fusion,4,CIMB Fusion allows fresh graduates to join CIM...,2025-11-04 19:24:12,False
466,p53JpWvCOTE,@CIMBGroupHoldingsBhd,1:37,4265,2012-12-13,What is OctoPay?,12,An introduction to OctoPay.\nASEAN's first onl...,2025-11-04 19:24:31,True
467,_dnozEeK5KA,@CIMBGroupHoldingsBhd,0:20,937,2011-09-29,The CIMB Prime Plan TVC,3,No description has been added to this video,2025-11-04 19:24:52,True


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,YueA7snetLY,@CIMBGroupHoldingsBhd,@RisNandar-c9h,12 days ago,CIMB Malaysia ‚ù§ ‚ù§,0
1,VBiJ-hHYeo0,@CIMBGroupHoldingsBhd,@aisyahsharum8626,2 weeks ago,Fantastic job CIMB,1
2,VBiJ-hHYeo0,@CIMBGroupHoldingsBhd,@ceciliawong8535,2 weeks ago,Inspiring!!,1
3,VBiJ-hHYeo0,@CIMBGroupHoldingsBhd,@ridzefairuzahmad7849,2 weeks ago,Fantastic video CIMB!!! Kita Bagi Jadi!,1
4,VBiJ-hHYeo0,@CIMBGroupHoldingsBhd,@JDoubleU238,2 weeks ago,Awesome stuffs! Well done üëè üëè üëè,1
3649,J7OPAMPrCic,@CIMBGroupHoldingsBhd,@muhammadsatrian1275,10 years ago,BNI,0
3650,J7OPAMPrCic,@CIMBGroupHoldingsBhd,@alorsetarstyle,10 years ago,Sufficient.,0
3651,p53JpWvCOTE,@CIMBGroupHoldingsBhd,@rairamlan7746,12 years ago,terangkan dgn lanjut mengenai coin box,0
3652,p53JpWvCOTE,@CIMBGroupHoldingsBhd,@rairamlan7746,12 years ago,tidak boleh reload pn dr octopay?mo kena ada f...,0
3653,_dnozEeK5KA,@CIMBGroupHoldingsBhd,@AbyanZaynAlbani,4 months ago,1 Januari 2016,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,bT1Gjcx6RKo,@smbcgroup,4:04,575,2025-10-07,Partnership for the Goals,5,"Through strong partnerships, we aim to unlock ...",2025-11-03 03:02:10,
1,eOfTS3U4CXU&pp=0gcJCQYKAYcqIYzv,@smbcgroup,1:00,72,2025-10-07,[Teaser] Partnership for the Goals,2,"Through strong partnerships, we aim to unlock ...",2025-11-03 03:02:51,
2,lzXASjvBPHQ,@smbcgroup,1:02,1141,2025-09-17,SMBC Careers | Georgeana Somesan on finding th...,4,"Meet Georgeana Somesan, part of the Leveraged ...",2025-11-03 03:03:31,
3,mF8Pj9dSReQ,@smbcgroup,3:03,626,2025-07-08,SMBC Group Americas Division Analyst Pitch Cha...,2,SMBC Group Americas Division recently held its...,2025-11-03 03:04:12,True
4,lvFzbTdxFMU,@smbcgroup,19:52,637,2025-05-19,Building for Climate Resilience,5,"In decarbonizing Asia, collaboration and scale...",2025-11-03 03:04:31,False
16,Ayc4HcUzOdg,@smbcgroup,20:05,404,2024-05-03,Greening the Built Environment: Developing Change,8,With real estate contributing to 39% of global...,2025-11-03 03:08:30,True
17,VEGEIcyCE2I,@smbcgroup,18:45,614,2024-05-03,Blended Finance in Southeast Asia's Energy Tra...,9,SMBC invites IFC to explore the ground-breakin...,2025-11-03 03:08:49,True
18,1HuDjrAA86Q,@smbcgroup,5:42,2962,2023-10-25,SMBC Singapore‚Äôs 60th Anniversary,25,"In 2023, SMBC celebrates our 60th year milesto...",2025-11-03 03:09:09,
19,mmcslYJQbdo&pp=0gcJCQYKAYcqIYzv,@smbcgroup,1:01,320,2023-10-19,SFF 2022 video highlights,2,"From enhancing customer convenience, improving...",2025-11-03 03:09:49,
20,iEEKCOzAO0A,@smbcgroup,4:51,674,2023-10-19,SMBC JICA Sustainable Finance Framework,7,SMBC and the Japan International Cooperation A...,2025-11-03 03:10:29,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,mF8Pj9dSReQ,@smbcgroup,@lullapugh9466,1 day ago,Can anyone journey smbc,0
1,mF8Pj9dSReQ,@smbcgroup,@viktor.o.ledenyov5829,2 weeks ago,Super!!! üáØüáµüáØüáµüáØüáµ Did you read the Japanese-Swis...,1
2,2WKGyDI6pOc,@smbcgroup,@anandvasava5880,1 month ago,‚ù§,0
3,2WKGyDI6pOc,@smbcgroup,@anandvasava5880,1 month ago,Excellent,0
4,Zc1ITq__Nd8,@smbcgroup,@TriagusdinaAndriansyahdi-li6iy,4 months ago,Gak punya m banking kah,0
4,Zc1ITq__Nd8,@smbcgroup,@TriagusdinaAndriansyahdi-li6iy,4 months ago,Gak punya m banking kah,0
5,roP2Ur1BN_g,@smbcgroup,@darwisdaeng3105,7 months ago,Saya TDK merasa ada pinjaman atau tunggakan d...,0
6,Ayc4HcUzOdg,@smbcgroup,@HMm-nv7sh,1 year ago,I'm other country in Asia i work at bank in ou...,0
7,VEGEIcyCE2I,@smbcgroup,@ELIOSANFELIU,1 year ago,So intresting summit¬°¬°Energy transition needs ...,0
8,VEGEIcyCE2I,@smbcgroup,@judyl.7811,1 year ago (edited),provide capital and advisory across energy and...,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,7_N4eJHUL8o,@BankofChinaManila,5:02,128,2024-01-18,Bank of China Cultural Arts and Exchange Program,0,"Entitled In Harmony with Nature, BOC brought t...",2025-11-03 03:11:09,
1,rCdcnf12qKk,@BankofChinaManila,1:30,2138,2022-02-06,Bank of China‚Äôs Online Foreign Exchange,0,The Philippines‚Äô first real-time Renminbi fore...,2025-11-03 03:11:50,
2,sblTHxeSq1o,@BankofChinaManila,3:04,232,2022-01-22,Our promising Filipino winter athletes are exc...,0,After almost two years of having no place to s...,2025-11-03 03:12:30,
3,yh_Ffddf91c,@BankofChinaManila,0:34,503,2022-01-22,Asa Miller is ready to show Beijing what he ca...,0,"We wish you the best of luck, Asa!",2025-11-03 03:13:10,
4,acFjC8zSOAk,@BankofChinaManila,1:00,12375,2022-01-22,"Introducing Asa Miller, the Philippines‚Äô lone ...",0,Join us in wishing him the best of luck! We‚Äôre...,2025-11-03 03:13:50,
3,yh_Ffddf91c,@BankofChinaManila,0:34,503,2022-01-22,Asa Miller is ready to show Beijing what he ca...,0,"We wish you the best of luck, Asa!",2025-11-03 03:13:10,
4,acFjC8zSOAk,@BankofChinaManila,1:00,12375,2022-01-22,"Introducing Asa Miller, the Philippines‚Äô lone ...",0,Join us in wishing him the best of luck! We‚Äôre...,2025-11-03 03:13:50,
5,7lYfdJUZ-ZE,@BankofChinaManila,3:12,882,2020-10-28,Bank of China SME Cross-Border Matchmaking Act...,0,Our mission is to be the bridge between Chines...,2025-11-03 03:14:31,
6,HCD2h-lyvoc,@BankofChinaManila,7:07,1625,2020-10-27,Bank of China and the RMB Community,0,Know more about the local RMB initiatives and ...,2025-11-03 03:15:11,
7,2qNvymbihoU,@BankofChinaManila,6:39,20319,2020-10-18,What is RMB or renminbi?,0,Know more about RMB and the benefits of using ...,2025-11-03 03:15:51,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,5bu0SaFobcs,@GXBank,0:56,56,2025-10-20,GXBank - Product Experience,2,Everything else in life and business is instan...,2025-11-03 03:16:32,True
1,HmLEjPeomS0,@GXBank,2:24,208,2024-11-29,GX2.0 ICYMI Recap,3,This time last year we launched our bank - GXB...,2025-11-03 03:16:51,False
2,xSnNpsLTcks,@GXBank,42:19,667,2024-11-06,GX2.0 | Next Starts Now - Full Event Recording,8,Watch the full GX2.0 Event to learn about the ...,2025-11-03 03:17:11,True
3,g7Jzg7srT0U,@GXBank,0:36,1148,2024-11-05,GX Rewards,6,Why should banking be boring? Our upcoming GX ...,2025-11-03 03:17:31,True
4,YiSxB745GS4,@GXBank,1:01,361,2024-11-05,Thank you Malaysia for GX1.0,5,"Thank you, Malaysia, for a remarkable first ye...",2025-11-03 03:17:51,True
28,YQx1s-cd9S4&pp=0gcJCQYKAYcqIYzv,@GXBank,52:21,580,2023-12-25,GXBank Chillest How-To Guide: Stress Free Acco...,10,Skip the hassle of starting a regular bank acc...,2025-11-03 03:25:43,
29,4Iu3BJyPfnE,@GXBank,0:10,442,2023-12-06,GXBank: Security l We are safe and secure,2,No description has been added to this video,2025-11-03 03:26:24,False
30,qyOaQ37WaqQ,@GXBank,0:10,644,2023-12-06,GXBank: Grab Benefits l Unlock the best of Grab,5,No description has been added to this video,2025-11-03 03:26:43,False
31,H6ZOW3Z4g2o,@GXBank,0:15,5613126,2023-12-01,GXBank: Download Today!,5388,No secret handshakes required üòâ Just download\...,2025-11-03 03:27:03,False
32,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@GXBank,1:00,4781,2023-12-01,GXBank: Malaysia's First Digital Bank,49,"At GXBank, we believe all Malaysians deserve a...",2025-11-03 03:27:22,True


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,5bu0SaFobcs,@GXBank,@SieyyaOfficial,8 days ago,I reset my phone because full of memory... And...,0
1,5bu0SaFobcs,@GXBank,@SieyyaOfficial,8 days ago,"Hello GX Bank, I cant login to my account how ...",0
2,xSnNpsLTcks,@GXBank,@owhdanny5270,11 months ago,Xde yg menarik dah,0
3,xSnNpsLTcks,@GXBank,@14bqdonk,11 months ago,lolok üí®,0
4,g7Jzg7srT0U,@GXBank,@14bqdonk,11 months ago,When this start?,0
42,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@GXBank,@UchihaDestiny09,1 year ago,1 Thank You GX Bank,2
43,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@GXBank,@muhdfadillah2343,1 year ago,how to log in through PC,1
44,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@GXBank,@momopeace875,1 year ago,Halo spa2 tau tolong2 dlu phone aku hilng.. le...,1
45,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@GXBank,@kokhieng1772,1 year ago,gx bank can withdraw cash at atmÔºü,1
46,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@GXBank,@ykj131,1 year ago,Warga asing boleh buka akaun tak?,1


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,8HOTIQPfVmo,@Citi,2:21,235.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,7.0,Citi's Treasury and Finance Conference (TFC) 2...,2025-11-03 03:27:43,False
1,wPtt16HaQJI,@Citi,0:58,41.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,2.0,At our recent Treasury and Finance Conference ...,2025-11-03 03:28:03,False
2,eB4Ga22J9WQ,@Citi,0:57,46.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,2.0,Watch the video to hear industry leaders' pers...,2025-11-03 03:28:23,False
3,MvhedwHIJ-g,@Citi,0:59,90.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,1.0,"Join Johanna Chua, Citi's Head of Emerging Mar...",2025-11-03 03:28:43,False
4,x0rQ502qskc,@Citi,0:55,69.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,3.0,"As market dynamics rapidly evolve, companies a...",2025-11-03 03:29:02,False
1658,9tIRWNbIeTA,@Citi,0:39,4066.0,2010-10-14,Citi QuickTake Demo: How to View a Payee Spend...,0.0,http://www.citibank.com/\nView this Citibank Q...,2025-11-09 04:11:12,True
1659,_wX-5mIVVE4,@Citi,0:46,5528.0,2010-10-14,Citi QuickTake Demo: How to Make a One-Time Pa...,7.0,http://www.citibank.com/\nView this Citibank Q...,2025-11-09 04:11:32,False
1660,gK8mFG5ELlI,@Citi,0:43,44074.0,2010-10-14,Citi QuickTake Demo: How to Link Accounts usin...,26.0,http://www.citibank.com/\nView this Citibank Q...,2025-11-09 04:11:49,True
1661,LUa_DW2B2m0,@Citi,0:59,71536.0,2010-10-14,Citi QuickTake Demo: How to View your Account ...,47.0,http://www.citibank.com/\nView this Citibank Q...,2025-11-09 04:12:09,True
1662,KrxyoIVokrs,@Citi,2:54,376.0,2010-09-20,Citi & New York Mets Alumni Association Team U...,1.0,Citi volunteers pitched in on construction of ...,2025-11-09 04:12:28,False


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,5PWcyptUyBQ,@Citi,@bobbybrown221-vh1,2 weeks ago,government shutdown is rigged long enough for ...,1
1,_nK_24F9sfM,@Citi,@AbdulRehman-o3g8o,2 weeks ago,"Hello sir, I need to talk to you about somethi...",0
2,_nK_24F9sfM,@Citi,@AbdulRehman-o3g8o,2 weeks ago,sir please help me,0
3,_nK_24F9sfM,@Citi,@AbdulRehman-o3g8o,2 weeks ago,"Sir, I just need to talk to you for 5 minutes.",0
4,_nK_24F9sfM,@Citi,@AbdulRehman-o3g8o,2 weeks ago,hlo,0
1976,LUa_DW2B2m0,@Citi,@JoshuaSutto,10 years ago,But you don't have all the account details.. h...,6
1977,LUa_DW2B2m0,@Citi,@isidoromarkus6625,9 years ago,I wanted to check the numbers on my account by...,2
1978,LUa_DW2B2m0,@Citi,@krazi3csSS,5 years ago,"Update your front-end, cant find anything on y...",1
1979,LUa_DW2B2m0,@Citi,@aerohk,6 years ago,"No account number, no routing number.",0
1980,LUa_DW2B2m0,@Citi,@ManishKumar-pr2zx,5 years ago (edited),worst bank ever...horrible. You can't get pass...,1


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,m3aIQVfe53I,@HSBC_MY,5:28,37,2025-10-02,HSBC | The Design Exchange,1,Chinese craftsmanship has a rich heritage and ...,2025-11-03 04:00:51,False
1,iDKbYSvixE8,@HSBC_MY,0:35,556308,2025-08-14,"HSBC Premier, redefined",32,From award-winning wealth and protection solut...,2025-11-03 04:01:10,False
2,kTcYf6uTnTU,@HSBC_MY,4:17,86,2025-08-07,HSBC | The Culinary Exchange,1,Can blending cultures help boost local food ec...,2025-11-03 04:01:30,False
3,ZM9CollqVig,@HSBC_MY,1:27,172,2025-07-25,HSBC Malaysia | Premier Market Outlook 2H2025,3,"From Kuala Lumpur to Penang to Johor, HSBC Mal...",2025-11-03 04:01:50,False
4,iBm1EQ1GmmI,@HSBC_MY,5:08,134,2025-06-25,HSBC | The Sound Exchange,4,Emirati artist Maitha Hamdan and American orch...,2025-11-03 04:02:10,False
183,mD6gzw7f-PI,@HSBC_MY,0:57,22266,2020-05-15,How to transfer money on the go | HSBC Malaysi...,64,It‚Äôs now easier to transfer money to your save...,2025-11-04 04:49:33,
184,oGYZ-QzVYDo,@HSBC_MY,1:14,3554,2020-05-15,How to register your DuitNow ID | HSBC Malaysi...,14,Register to receive funds with DuitNow via the...,2025-11-04 04:50:12,
185,SA7bBUWBntQ,@HSBC_MY,1:15,15726,2020-05-15,How to transfer money using DuitNow | HSBC Mal...,42,It's now easier to transfer fund by entering t...,2025-11-04 04:50:52,
186,Ok82jOqMrn8,@HSBC_MY,1:32,16190,2020-05-13,How to log on | HSBC Malaysia Mobile Banking App,54,A faster and simpler way to log on with biomet...,2025-11-04 04:51:32,
187,aYVk7ngRmeY,@HSBC_MY,1:38,2386,2020-05-07,Together for 155 years | Together We Thrive,215,"Together, we‚Äôve weathered storms and helped in...",2025-11-04 04:52:12,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,6i9yQv0wKuc&pp=0gcJCQYKAYcqIYzv,@HSBC_MY,@andycheong794,10 months ago,WOW I am not crying üò¢ üò¢ üò¢ üò¢ üò¢,3
1,6i9yQv0wKuc&pp=0gcJCQYKAYcqIYzv,@HSBC_MY,@tjloveprincess235,11 months ago,Well.... This ad is such a beautiful message. ...,8
2,6i9yQv0wKuc&pp=0gcJCQYKAYcqIYzv,@HSBC_MY,@SeanLow1709,1 year ago,Very touching üò¢,10
3,6i9yQv0wKuc&pp=0gcJCQYKAYcqIYzv,@HSBC_MY,@SyntheticCharmVA,1 year ago,lets gooo Ali,6
4,YuFcEehccaM,@HSBC_MY,@lazizkhalil2629,11 months ago,I love this spirit. I want to work with Keling...,0
11,BX4qLlzSags,@HSBC_MY,@SaadonAksah,1 year ago,How does that work ü§î,0
12,MZutPvxZ-Xs,@HSBC_MY,@bokken5107,1 year ago,Thank you HSBC ‚ù§,0
13,MZutPvxZ-Xs,@HSBC_MY,@summersnowchia3619,8 months ago,came here after scanned the angpow. Happy Year...,0
14,MZutPvxZ-Xs,@HSBC_MY,@rajanwajee276,9 months ago,From lorong setiabistari 1,0
15,GPTPq9EQCXM,@HSBC_MY,@DanielToday,3 years ago,"Ppl rarely do this for CNY nowadays, nice one!",0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,QV08nJWwly4,@standardchartered,0:38,130,2025-10-17,Africa Public Sector Institutions Summit ‚Äì Fai...,2,"On 1 July 2025, we invited our African Public ...",2025-11-03 04:54:12,False
1,T2WEOqovnBk,@standardchartered,0:31,31,2025-10-17,Africa Public Sector Institutions Summit ‚Äì Dhi...,0,"On 1 July 2025, we invited our African Public ...",2025-11-03 04:54:32,False
2,rcgkVvXEpVQ,@standardchartered,0:39,35,2025-10-17,Africa Public Sector Institutions Summit ‚Äì Cha...,0,"On 1 July 2025, we invited our African Public ...",2025-11-03 04:54:52,False
3,3uMX74AleW4,@standardchartered,0:20,1331760,2025-10-16,Now's Your Time For Wealth - Signature CIO Funds,6,It‚Äôs the final quarter of the year and the per...,2025-11-03 04:55:12,False
4,QRsBKphWOOE,@standardchartered,1:49,67,2025-10-16,Global trade solutions to unlock growth,1,With our comprehensive suite of global trade s...,2025-11-03 04:55:31,False
1658,If6ukc8_qV0&pp=0gcJCQYKAYcqIYzv,@standardchartered,2:18,2472,2009-12-29,New World Order,13,17 April 2009 (SCTV) -- The G20 Summit was a f...,2025-11-09 04:30:37,Type 1 Error
1659,-AgRppTon7E,@standardchartered,3:37,36,2009-12-29,Asia braving crisis,1,17 April 2009 (SCTV) -- No one doubts 2009 wil...,2025-11-09 04:30:53,False
1660,ramBC36El3k,@standardchartered,4:29,163,2009-12-29,Story of two cities - Hong Kong and Shanghai,2,17 April 2009 (SCTV) -- China aims to build Sh...,2025-11-09 04:31:10,False
1661,RxQdloRojVw,@standardchartered,6:16,562,2009-12-27,Transformation of Standard Chartered,1,17 July 2009 (SCTV) -- Where does Standard Cha...,2025-11-09 04:31:27,False
1662,Knhm30I9QHo,@standardchartered,6:40,290,2009-12-27,Building a sustainable business,5,17 July 2009 (SCTV) -- Do companies have a rol...,2025-11-09 04:31:43,True


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,DRSiZZ90pIE,@standardchartered,@AsjarAli-o8y,3 weeks ago,Mashallha subhanallha,1
1,DRSiZZ90pIE,@standardchartered,@muhammadjamal4252,3 weeks ago,It‚Äôs not ‚Ä¶ why is the product linked to LIBOR,0
2,DRSiZZ90pIE,@standardchartered,@TA-kz1jc,2 weeks ago,Sponsored by Standard Charted ?,0
3,DRSiZZ90pIE,@standardchartered,@althea_is_smokin_hot,3 weeks ago,"Sir,islam is a heap of deceptions. Every day, ...",0
4,N8fFrxl9_aA,@standardchartered,@artimall2714,4 weeks ago,Ohhh good üòØ ü§Ø üòä üëç,8
1588,QFeCh68Dx04,@standardchartered,@keungkuenlai5638,3 years ago,2008 09 08,0
1589,mlWgA0nf-g4,@standardchartered,@keungkuenlai5638,3 years ago,2008 09 08,0
1590,Dc7MhnlSmAI,@standardchartered,@keungkuenlai5638,3 years ago,2008 06 08,0
1591,Knhm30I9QHo,@standardchartered,@hikikomori7757,6 years ago,Hello!,0
1592,Knhm30I9QHo,@standardchartered,@hikikomori7757,6 years ago,–Ø –ø–æ—Å–º–æ—Ç—Ä–µ–ª —ç—Ç–æ –≤–∏–¥–µ–æ!,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,tpc7cx78S1M,@rhbgroup,14:37,48614,2025-10-14,MERGE by RHB: Budget Announcement 2026,14,"In this episode, we delve into Malaysia's Budg...",2025-11-03 05:27:09,False
1,DkjlVuSsZYg,@rhbgroup,18:48,79393,2025-10-09,MERGE by RHB: China Equities Outpacing Develop...,14,From PBoC‚Äôs policy signals to sector-specific ...,2025-11-03 05:27:29,False
2,m4qipvOn5Ro,@rhbgroup,7:38,63155,2025-09-24,MERGE by RHB: July Monthly Market Insights 2025,15,In this episode of the Monthly Investment Insi...,2025-11-03 05:27:48,True
3,zUvlFzCHtbE,@rhbgroup,46:28,312896,2025-09-23,MERGE by RHB - Navigating Uncertainty: 2H 2025...,116,"In the second half of 2025, as escalating geop...",2025-11-03 05:28:08,False
4,K1h_yRWEMOE,@rhbgroup,0:54,298,2025-09-23,Around the world with RHB Multi Currency Visa ...,4,https://www.rhbgroup.com/mcv/index.ht...\n\n#T...,2025-11-03 05:28:28,False
556,oKW2zaf6Wfg,@rhbgroup,11:09,475,2013-05-01,RHB Corporate Responsibility Initiatives (2010),4,RHB's corporate responsibility initiatives inc...,2025-11-05 15:25:11,False
557,GOgpzKc-ykU,@rhbgroup,0:30,1502,2013-05-01,Reuniting Families -- Child Safety Commercial,6,Reuniting Families - Child Safety programme is...,2025-11-05 15:25:29,False
558,iE9MYApZua4,@rhbgroup,0:30,1545,2013-05-01,Reuniting Families -- Child Safety Commercial ...,3,Reuniting Families - Child Safety programme is...,2025-11-05 15:25:47,True
559,-nw-WO7sSIw,@rhbgroup,16:24,1746,2013-05-01,Official Launch of RHB 100 Years Celebration,8,The RHB Banking Group will be celebrating its ...,2025-11-05 15:26:08,True
560,hAFPYIha3vQ&pp=0gcJCQYKAYcqIYzv,@rhbgroup,1:22:25,1352,2013-04-02,Message to Japan: Road to Recovery in the Glob...,12,"YABhg Tun Dr Mahathir Mohamad, former Prime Mi...",2025-11-05 15:26:28,True


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,m4qipvOn5Ro,@rhbgroup,@ZuirRahman77,1 month ago,Ziurrahman,1
1,m4qipvOn5Ro,@rhbgroup,@mohammadrajibhossain8802,1 month ago,How much money can be deposited into RHB Bank ...,0
2,N6BlA6KPt3Q,@rhbgroup,@RisNandar-c9h,1 month ago,RHB Group ‚ù§ ‚ù§,0
3,LYHOKLNj4Sk&pp=0gcJCQYKAYcqIYzv,@rhbgroup,@RisNandar-c9h,2 months ago,RHB Group ‚ù§ ‚ù§,0
4,0jJdYrsEl7E,@rhbgroup,@RisNandar-c9h,2 months ago,RHB Group ‚ù§ ‚ù§,0
2858,-nw-WO7sSIw,@rhbgroup,@ahmadazharbinatalib7303,3 years ago,Yes Sir..i look2 info. Thankyou very2 much all...,1
2859,-nw-WO7sSIw,@rhbgroup,@ahmadazharbinatalib7303,3 years ago,Ok.. good evening all friend company..,1
2860,-nw-WO7sSIw,@rhbgroup,@alorsetarstyle,6 years ago,Pretty steady RHB behavioral..,1
2861,hAFPYIha3vQ&pp=0gcJCQYKAYcqIYzv,@rhbgroup,@syedadeelhussain2691,7 years ago,Look East policy was followed by all SE ASIAN ...,0
2862,hAFPYIha3vQ&pp=0gcJCQYKAYcqIYzv,@rhbgroup,@alorsetarstyle,6 years ago,all in but me..,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,wxaej_sK_FI,@jpmorgan,1:37,551.0,2025-10-23,Why the World‚Äôs Top Institutional Investors Ch...,0.0,J.P. Morgan Securities Services delivers best-...,2025-11-03 06:02:22,
1,P0O7lbIsQQk,@jpmorgan,10:45,2867.0,2025-10-21,Investment Opportunities in Security & Tech Am...,0.0,How can investors navigate the potential chall...,2025-11-03 06:03:02,
2,uK3QAfCg8kk,@jpmorgan,17:41,523.0,2025-10-20,Trading Insights: Exploring trend-following st...,0.0,"In this episode, Martin Kallstr√∂m, CEO of Swed...",2025-11-03 06:03:43,
3,cij_EDnZ_K0,@jpmorgan,19:26,448.0,2025-10-17,Inside Tech Stars 2025: From IPOs to defense tech,0.0,What‚Äôs driving record venture capital flows in...,2025-11-03 06:04:23,
4,SqN7D4tHoDY,@jpmorgan,14:08,631.0,2025-10-06,Trading Insights: US policy and the impact of ...,0.0,"In this episode, Eloise Goulder sits down with...",2025-11-03 06:05:04,
1277,DME_gvWWyY4,@jpmorgan,1:57,2001.0,2013-06-18,Global Technology Infrastructure: Steve | Hear...,0.0,SUBSCRIBE:\nhttp://jpm.com/x/i/NFPWfK0\nHear f...,2025-11-07 15:40:50,
1278,ahWyPgabCYQ,@jpmorgan,1:29,383.0,2013-06-18,Equity Technology: Robert | Hear From Our Team...,0.0,SUBSCRIBE:\nhttp://jpm.com/x/i/NFPWfK0\nHear f...,2025-11-07 15:41:18,
1279,jhk2Xe_P8QI&pp=0gcJCQYKAYcqIYzv,@jpmorgan,1:56,525.0,2013-06-18,Technology in Asia Pacific: Sofia | Hear From ...,0.0,SUBSCRIBE:\nhttp://jpm.com/x/i/NFPWfK0\nHear f...,2025-11-07 15:41:44,
1280,d3ooP0BjXns,@jpmorgan,2:21,622.0,2013-06-18,Technology in Asia Pacific: Yvonne | Hear From...,0.0,SUBSCRIBE:\nhttp://jpm.com/x/i/NFPWfK0\nHear f...,2025-11-07 15:42:12,
1281,9_vDc_O1sFU,@jpmorgan,1:50,1178.0,2013-06-18,Technology in Asia Pacific: Atul | Hear From O...,0.0,SUBSCRIBE:\nhttp://jpm.com/x/i/NFPWfK0\nHear f...,2025-11-07 15:42:39,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,2zdhfXjpmyg,@MizuhoAmericas,16:41,60,2025-10-21,European Dealmaking Landscape,1,From shifting political environments to macroe...,2025-11-03 07:09:33,False
1,6tZ-JvQAXc8,@MizuhoAmericas,7:23,198,2025-08-12,Q3 2025 Equity Capital Markets: Mid-Year Snapshot,6,Is the long-awaited return of the IPO market f...,2025-11-03 07:09:53,False
2,N4CQkUvz7Ns&pp=0gcJCQYKAYcqIYzv,@MizuhoAmericas,9:53,394,2025-07-25,Q3 2025 Debt Capital Markets: Mid-Summer Update,13,Investment Grade Debt Capital Markets are seei...,2025-11-03 07:10:13,False
3,2DMHEx5FSgE,@MizuhoAmericas,1:11,47,2025-07-24,Mizuho‚Äôs ApprenTECH Program with Marcy Lab School,2,"At Mizuho, we know community investment is goo...",2025-11-03 07:10:32,False
4,S9dPCscz9NI,@MizuhoAmericas,9:19,94,2025-07-15,The Business of Sports,0,In the current era of must-see live sporting e...,2025-11-03 07:10:51,False
149,UKSZhnUQ_bQ,@MizuhoAmericas,3:13,47659,2017-03-20,How One Bank's Unique Approach is Working on W...,159,Hear our leaders discuss our recent growth and...,2025-11-04 17:31:48,True
150,lZIfmkw7-3k,@MizuhoAmericas,3:24,44414,2017-03-08,Community Partner Feature: Hot Bread Kitchen,23,"Our partner, Hot Bread Kitchen, has a unique s...",2025-11-04 17:32:07,False
151,r0m46N0SDxM,@MizuhoAmericas,0:55,394,2016-11-29,Mizuho Volunteer Day 2016 - NYC,3,"In 2006, we launched Mizuho Volunteer Day in N...",2025-11-04 17:32:27,False
152,my-V4BAXJcE,@MizuhoAmericas,4:35,1603,2016-11-09,Career Path Success Stories - Mizuho Women's N...,11,Empowering women to be their best - it's in ou...,2025-11-04 17:32:46,False
153,ooUJtzDnMaY,@MizuhoAmericas,1:39,97482,2016-08-19,The Mizuho Americas‚Äô Brand,202,Today‚Äôs world is ruled by hybrid thinkers and ...,2025-11-04 17:33:07,False


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,vwM-LePWC6w,@MizuhoAmericas,@Md_Masum_Mia,8 months ago,"I saw that your video quality is excellent, bu...",0
1,eR4vdSZbLC0,@MizuhoAmericas,@ganesh12353,9 months ago,Good client,0
2,0k0czcW-rao,@MizuhoAmericas,@samuelaurelianoo,10 months ago,"Interesting video, looking forward for the Deb...",0
3,0k0czcW-rao,@MizuhoAmericas,@chonky8067,9 months ago,Great video!,0
4,7bR8RHdyEL8,@MizuhoAmericas,@Franklin-pc3xd,11 months ago,Financial Yenta Hour,0
84,9smCg2tXd6c,@MizuhoAmericas,@michaelfiguly1654,4 years ago,"Great not grey, spell CK, lol!",0
85,EjYh8X1XSJ4,@MizuhoAmericas,@kalpeshstationery6125,7 years ago,HII,1
86,xx-DP6rop98,@MizuhoAmericas,@orangeflip10,7 years ago,AMAZING,0
87,xx-DP6rop98,@MizuhoAmericas,@shamekebrooks9303,3 years ago,Wow Dislike,0
88,UKSZhnUQ_bQ,@MizuhoAmericas,@27986234,8 months ago,incredible video,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,p7bPO1SfHJc,@AmBankTV,13:03,178,2025-10-23,AmBank Group | Mount Conquerors | Full Hike Do...,8,In celebration of AmBank Group‚Äôs 50th Annivers...,2025-11-03 07:42:24,True
1,xceiS_r8WlQ,@AmBankTV,1:33,103,2025-10-22,Capturing the essence of Festival of Lights wi...,2,"Thank You for the 10 million views, in support...",2025-11-03 07:42:43,False
2,s9pNJYzRvxs,@AmBankTV,1:33,2647957,2025-10-13,AmBank Group | Rhythm of Deepavali | Deepavali...,355,"Watch AmBank Group‚Äôs 2025 Deepavali film, Rhyt...",2025-11-03 07:43:04,True
3,FPGhDYBIlVw,@AmBankTV,6:59,142,2025-09-27,AmBank Group | The Light Beyond,3,"Izz Zara and her friends, students from SK Sil...",2025-11-03 07:43:24,False
4,obl5r-aOqy4,@AmBankTV,1:08,115,2025-09-23,Spot Aman and win Aman Plushies or Rm100 Cash ...,2,How to spot Aman? Here‚Äôs a tip!\nOn 18 Sept 20...,2025-11-03 07:43:44,True
357,mzUNM7FFv_A,@AmBankTV,1:00,1326537,2014-08-11,Are you ready to Live Ready?,0,"Starting 8 August, 2014 a new life insurance r...",2025-11-05 03:37:43,
358,ak9zHJ1H5rg,@AmBankTV,1:00,713866,2014-06-24,Raya Bergaya Bersama AmBank (Music Video),156,What's a celebration without music and loads o...,2025-11-05 03:38:23,
359,YrdmuJ9xtUo,@AmBankTV,3:12,10003,2014-03-12,IBG Transfer Guide by ABM,0,The Association of Banks in Malaysia video gui...,2025-11-05 03:39:03,
360,geZ4C_F4-NA,@AmBankTV,0:58,33879,2014-03-04,AmOnline IBG Transfer Guide,75,AmBank video guide on how to perform IBG trans...,2025-11-05 03:39:44,
361,B73s5Khyu1g,@AmBankTV,3:14,87869,2014-01-14,I AmBanking on‚Ñ¢ a Better Journey!,13,I AmBanking on a Better Journey: AmBank Unveil...,2025-11-05 03:40:23,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,p7bPO1SfHJc,@AmBankTV,@ronimia7630,5 days ago,Hello,0
1,s9pNJYzRvxs,@AmBankTV,@kaminimanikam,2 weeks ago (edited),Thank you Ambank for celebrating Bharatanatyam...,18
2,s9pNJYzRvxs,@AmBankTV,@RajesvarySamymalai,4 days ago,"Well done, AmBank ü•∞",0
3,s9pNJYzRvxs,@AmBankTV,@RoseEsrosy,2 weeks ago,So beautiful well done Ambank üòç ‚ù§,4
4,s9pNJYzRvxs,@AmBankTV,@sivaparathy7806,6 days ago,beautiful ‚ù§,0
316,povDvSsCC6s,@AmBankTV,@tophero6468,6 years ago,Cara memulakan permohonan adalah kunci kepada.,2
317,povDvSsCC6s,@AmBankTV,@allynisantonius2759,3 years ago,Macam mana nak bankin duit dari ambank ke asnb,2
318,povDvSsCC6s,@AmBankTV,@robiulsunny9961,5 years ago,Tolong bagi tahu aku macam mana boleh deposit,2
319,povDvSsCC6s,@AmBankTV,@tophero6468,6 years ago,Jika saya membuat video atau memberitahu saya ...,2
320,povDvSsCC6s,@AmBankTV,@joharijohari9075,9 years ago,kenapa sekarang org luar yg py suami org mal...,2


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,xkGvTGF8iJ4,@BankofAmerica,2:40,319,2025-10-10,A Competitive Approach to Winning Talent,5,"In today‚Äôs hiring landscape, candidates are as...",2025-11-03 08:15:21,
1,Va-9TQFZTjI,@BankofAmerica,2:57,179,2025-10-09,Supporting Women in the Workplace,3,Are employer benefits really as comprehensive ...,2025-11-03 08:16:01,
2,X9_ZAaMIVxI,@BankofAmerica,1:31,446,2025-10-08,Welcome to Bank of America Workplace Benefits‚Ñ¢,3,"The future is changing, and so are employee be...",2025-11-03 08:16:42,
3,KceZziVoytk,@BankofAmerica,1:21,264,2025-10-01,Does saving money make you feel intimidated? W...,4,Take the pressure off yourself and start small...,2025-11-03 08:17:22,
4,mrgsvmnXl0I,@BankofAmerica,1:25,203,2025-09-29,Understanding Credit and How it Can Help You,8,Get the basics about how credit works and how ...,2025-11-03 08:18:02,
97,2Ut7ASj2GUM,@BankofAmerica,1:48,4493,2014-09-19,Bank of America Consumer MBA Associate,22,Jerry Decembre talks about his experience in t...,2025-11-03 10:33:10,
98,bjEI1fmwXzs,@BankofAmerica,3:06,2567,2014-06-26,Spring 2014 Bank of America Small Business Own...,14,We are pleased to share the results of the spr...,2025-11-03 10:33:38,
99,hohd63syjZI,@BankofAmerica,5:04,9801,2013-12-12,Bank of America Ally program sends message of ...,84,"At Bank of America, we strive to help all empl...",2025-11-03 10:34:07,
100,3SQirS6MjiU,@BankofAmerica,4:55,2794,2012-09-07,Big Idea for Small Businesses: Key Business In...,11,"Small business expert and columnist, Steve Str...",2025-11-03 17:34:45,
101,gHeAsNpIcn8,@BankofAmerica,8:30,3001,2012-08-11,Bank of America tech exec talks to Institution...,5,Bank of America technology executive Cathy Bes...,2025-11-03 17:35:13,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,JjMcghmK9ng,@MyBoostApp,0:12,7927,2025-10-20,Why No One Tell Me I Can Get RM1.95/L for RON95?,1,"Many people ask, how is it possible to get RM1...",2025-11-03 10:34:35,False
1,uiEe-l1LfJM,@MyBoostApp,0:15,20357,2025-10-15,Harga RON95 jadi RM1.95 jer seliter?,1,Harga RON95 jadi RM1.95 jer seliter?\n\nMemang...,2025-11-03 10:34:57,False
2,Fq7W7ytCMz0&pp=0gcJCQYKAYcqIYzv,@MyBoostApp,0:20,65539,2025-10-10,Boost Epic Unstoppable Birthday,0,"Spend a minimum of RM30 using Boost, Boost Ban...",2025-11-03 10:35:22,False
3,gMbiTUi0dgw,@MyBoostApp,0:14,94552,2025-10-09,Win Gold Dinar Boost Epic Unstoppable,2,üéâ More epic surprises are dropping for Boost‚Äôs...,2025-11-03 10:35:42,False
4,BwfBq2jzAyA,@MyBoostApp,0:17,111637,2025-10-08,Boost Epic Unstoppable Birthday 2025,2,It‚Äôs an epic spending spree with Boost & Boost...,2025-11-03 10:36:01,False
345,5q-HssXdZJo,@MyBoostApp,0:32,1422593,2017-01-11,Kamal Sees Boost Has Taken Over Town!,57,Hey all prepaid users! You can\n#DoTheBoost\nn...,2025-11-05 04:02:55,True
346,wk_9SGIxiaE,@MyBoostApp,1:32,337409,2017-01-11,How To Buy Boost Credits & Top up Prepaid,241,"To kickstart your Boost experience, you will n...",2025-11-05 04:03:14,True
347,9eZyOqwe5_8,@MyBoostApp,0:32,1766673,2017-01-06,How Elizabeth Caught the #DoTheBoost Fever!,63,All you prepaid users can now stay connected w...,2025-11-05 04:03:40,True
348,REL-JmnoynU,@MyBoostApp,0:15,332631,2016-12-22,#DoTheBoost with Kamal Adli,50,Watch Kamal Adli\n#DoTheBoost\n. He is excited...,2025-11-05 04:04:00,Type 1 Error
349,0rir35vDnWk,@MyBoostApp,0:15,226425,2016-12-22,#DoTheBoost with Elizabeth Tan,39,Watch Elizabeth Tan\n#DoTheBoost\n. She is exc...,2025-11-05 04:04:19,True


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,oiVhAT47iDs,@MyBoostApp,@topethidayat1620,2 months ago,Merdeka kewangan! Merdeka kewangan! üéâ üéâ üéâ,3
1,oiVhAT47iDs,@MyBoostApp,@mz6228,2 months ago,Selamat Hari Kebangsaan Malaysia üá≤üáæ Merdeka!! ...,0
2,oiVhAT47iDs,@MyBoostApp,@muhammadhazim1581,2 months ago,vfx need a raise,1
3,oiVhAT47iDs,@MyBoostApp,@eappieflag,2 months ago,bayar bill guna boost -bill tidak berjaya dib...,2
4,oiVhAT47iDs,@MyBoostApp,@mdshaidik9275,2 months ago,What's with that fever patch? Hinting somethin...,0
864,0rir35vDnWk,@MyBoostApp,@afyakatsukihafiy5539,8 years ago,hahaha kelakar tp nmpk comel...,0
865,0rir35vDnWk,@MyBoostApp,@azarudinyusofmansor9482,8 years ago,üëç üëç,1
866,0rir35vDnWk,@MyBoostApp,@Wzeta2103,7 years ago,Camne nak buat passwordnya aku day nap Kali ke...,0
867,0rir35vDnWk,@MyBoostApp,@amirulmusicsound7844,7 years ago,Elizabeth tan is boost & cimb Eva Dislike,0
868,0rir35vDnWk,@MyBoostApp,@Funnyvideos-fc1yn,8 years ago,guna kod ni ro7b5v2,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,E_xLZrttDVM,@DeutscheBank,4:39,139,2025-10-24,Phishing. Gesch√§ftsrisiko und Kostentreiber #E...,6,Entdecker J√ºrgen Schmitt trifft sich zum Thema...,2025-11-03 11:17:44,Type 1 Error
1,HSSp8POMUHY,@DeutscheBank,12:08,175,2025-10-23,PERSPEKTIVEN To Go ‚Äì der BoÃàrsenpodcast: Digit...,8,"Steigende Staatsverschuldung, Unsicherheit an ...",2025-11-03 11:18:05,False
2,EP7SYyVqdDs,@DeutscheBank,16:33,176,2025-10-21,We present Art:LIVE from Frieze London & Friez...,6,Catch all the highlights from this year's fair...,2025-11-03 11:18:26,False
3,-02nefEBp5s,@DeutscheBank,5:37,115,2025-10-21,Hier ist der Euro Stablecoin #ExpeditionFinance,7,Alles rund um den ersten von der BaFin zugelas...,2025-11-03 11:18:47,True
4,yPBxR0MnKXQ,@DeutscheBank,3:30,160,2025-10-17,Deutsche Bank Art & Culture presents - No√©mie ...,9,"üìπ Watch French visual artist, No√©mie Goudal (b...",2025-11-03 11:19:13,True
1914,9-etvyJFiaU,@DeutscheBank,3:46,983,2010-08-06,Sir Simon Rattle about Deutsche Bank and the D...,3,In addition to the direct sponsorship of the p...,2025-11-09 06:01:11,False
1915,KBx8hfSET8s,@DeutscheBank,0:34,751,2010-08-06,Deutsche Bank and the Digital Concert Hall,1,"The Berliner Philharmoniker, one of the world'...",2025-11-09 06:01:27,False
1916,or0a79fgwgQ,@DeutscheBank,3:02,2821,2010-06-02,Deutsche Bank - Corporate Social Responsibilit...,5,More than money: Deutsche Bank and Corporate S...,2025-11-09 06:01:44,False
1917,3V2-CJwKfDY,@DeutscheBank,9:14,1271,2009-05-22,Anish Kapoors Memory on display at Deutsche Gu...,4,A new installation by celebrated British-India...,2025-11-09 06:02:00,False
1918,Nf2DsaSEP_w,@DeutscheBank,7:15,3284,2009-04-01,Picturing Americas - American Photorealism in ...,17,"Vernissage video of ""Picturing Americas"", an a...",2025-11-09 06:02:17,False


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,-02nefEBp5s,@DeutscheBank,@cybi2684,10 days ago (edited),"Haha... Stablecoin, ja genau. Der EURO wird se...",0
1,yPBxR0MnKXQ,@DeutscheBank,@tormagnuslarsen9870,2 weeks ago,WTM World Trade Marketing WTM ‚Ñ¢,1
2,a7pwVINtKV8,@DeutscheBank,@‰øäÈúñÈô≥-k7x,2 weeks ago (edited),ÊàëÊÑõÊàëÊòØÂæ∑ÂúãÈäÄË°åÁî∑Â•≥ËÉΩÊ∫ê‰πãÊòü‰Ω†‰∫∫ÁúüÂ•Ω ‚ù§,1
3,a7pwVINtKV8,@DeutscheBank,@MayurMisra,2 weeks ago,"""Introducing dbX - The Future of Corpoarte Tre...",0
4,NgdCIqcCdRc,@DeutscheBank,@D19Oekonomie,2 days ago,1.5% 2026 ... No way,0
403,O3nwMx8Sus8,@DeutscheBank,@timothyhill1149,8 years ago,Were you briefed?,0
404,4g6PqItHhII,@DeutscheBank,@thelinke,13 years ago,"i love this piece, but don't know the name of ...",0
405,mrxuU7EEUdU,@DeutscheBank,@sobertents,12 years ago,wonderful,1
406,LWE9jUPtM5s,@DeutscheBank,@wantanmien,12 years ago,This service looks very interesting. Is it onl...,0
407,TgFhxqtU1DQ,@DeutscheBank,@Volodka7000,1 year ago,"13 years ago, it was 2011, HMMM, I dreamt work...",0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,RMZdBov0FrQ,@AFFIN,1:00,225,2025-08-29,"AFFIN Borneo Credit Card/-i - Unlimited, the B...",5,"üå¥‚ú® Hello Sabah & Sarawak!\nYour card, your per...",2025-11-03 12:07:46,False
1,Hhi_ERnrv1g,@AFFIN,1:17,129,2025-08-29,Detik Niaga TV3 : Kerjasama Strategik antara A...,3,"A memorable event with Marriott International,...",2025-11-03 12:08:07,False
2,t8UCJJafTd0,@AFFIN,0:52,113,2025-08-26,ASTON MARTIN X AFFIN,1,The road is yours üöò‚ú®\n\nEnjoy exclusive owners...,2025-11-03 12:08:29,False
3,PyuTTP4C12c,@AFFIN,2:22,87,2025-08-13,Event Highlight AFFIN BizChat KL 2025,3,AFFIN SME BizChat 2025 in Kuala Lumpur was an ...,2025-11-03 12:08:47,False
4,SPZ4q1CnxnE,@AFFIN,1:14,98,2025-07-17,AFFIN SME BizChat KL 2025 - Teaser,10,"The highly anticipated flagship business talk,...",2025-11-03 12:09:10,True
216,raaESaRta4k,@AFFIN,0:31,126,2017-06-02,Photo Challenge Contest,1,Don‚Äôt miss this opportunity! Campaign starts n...,2025-11-04 11:24:41,False
217,UyWEmdaX2EI,@AFFIN,0:30,444,2017-01-12,AFFINBANK Group - Karnival Kewangan 2017,3,Karnival Kewangan 2017 at PWTC,2025-11-04 11:25:01,False
218,PjBmn4J4z4E,@AFFIN,0:30,95,2017-01-10,FINAL KARNIVAL KEWANGAN BM,0,Karnival Kewangan 2017\n\nKarnival yang bertem...,2025-11-04 11:25:20,False
219,P2d9kJ8pBLE,@AFFIN,0:30,229,2017-01-09,KARNIVAL KEWANGAN 2017,2,Karnival yang bertemakan ‚ÄúUtamakan Keperluan K...,2025-11-04 11:25:40,False
220,mVr-5QEAlKg,@AFFIN,0:15,226,2016-12-08,OMG RETURNS! BIGGER THAN EVER CAMPAIGN,2,From 1 December 2016 till 30 June 2017,2025-11-04 11:26:00,True


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,SPZ4q1CnxnE,@AFFIN,@RisNandar-c9h,3 months ago,AFFIN ‚ù§ ‚ù§,0
1,PXGs538p-rQ,@AFFIN,@fitxgtgg4158,2 months ago,Mslhnya klau nak tunjuk qr untuk pay cmna,0
2,PXGs538p-rQ,@AFFIN,@nadiahamizah6119,4 months ago,affin dh ada qr pay ke sekarang?,0
3,PXGs538p-rQ,@AFFIN,@fadhalphard788,22 hours ago,Menyusahkan..dh la nk kn upgrade telefon plak....,0
4,wNDPH4Fawao,@AFFIN,@kysuperfunchannel6725,6 months ago,Gempak raya,0
575,xfZhmmGzdcM,@AFFIN,@tommeltom480,5 years ago,Payalah nak transfer ke akaun lain,0
576,xfZhmmGzdcM,@AFFIN,@alangzentong1146,6 years ago,Makin menyusahkan la sistem baru.....terus tak...,0
577,H_RyYb-eAbg,@AFFIN,@izzatothman4532,6 months ago,"Affin logo really really look baphomet symbol,...",0
578,sjxElAhdzek,@AFFIN,@norhisyam785,3 years ago,Macam mana nak tukar nombor telefon debit card...,1
579,mVr-5QEAlKg,@AFFIN,@dewiiiffqwff4herPC,7 months ago,"[‚Ä¢,,,,,,,‚Ä¢] ‚ù§",0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,tFnzm8vhnbY,@maybankvideos,23:28,120,2025-10-21,Rich Conversations ‚Äì Episode 4: Going All-in ‚Äì...,3,In Episode 4 of the Rich Conversations podcast...,2025-11-03 12:46:33,False
1,-Jk6A2w2n4I,@maybankvideos,2:38,4841027,2025-10-16,"Maybank ""Light of Laughter"" (Deepavali 2025)",365,The true magic of Deepavali isn‚Äôt just in the ...,2025-11-03 12:46:51,True
2,oMYeO1P7eOw,@maybankvideos,2:39,169,2025-10-15,Maybank Investment Bank's Daily Technical Anal...,3,Broader market participation was seen as both ...,2025-11-03 12:47:14,
3,qnUyxwENgKY,@maybankvideos,2:53,178,2025-10-14,Maybank Investment Bank's Daily Technical Anal...,3,"Despite the weakness, the consumer sector stoo...",2025-11-03 12:47:45,
4,8CmxafBZ1EI,@maybankvideos,29:44,133,2025-10-13,Market Mondays | Trade War Redux?,2,00:00\n----- Intro\n00:07\n----- Report Links\...,2025-11-03 12:48:17,
1343,T4hGpN0uX-I&pp=0gcJCQYKAYcqIYzv,@maybankvideos,33:18,180,2022-01-09,ASEAN Speaks: Morning Briefing - 2022 Actionab...,3,The first Monday call of 2022 with our economi...,2025-11-08 04:04:58,
1344,RviPh0ysqLU,@maybankvideos,1:36,1464,2022-01-09,Maybank #SamaSamaLokal Stands Together with Lo...,34,We at Maybank are grateful to be a part of thi...,2025-11-08 04:05:25,
1345,fz_qcsnUhDI,@maybankvideos,0:16,32604,2022-01-06,Mohon kad MAE sekarang,15,Nak menikmati kadar pertukaran yang lebih baik...,2025-11-08 04:05:51,
1346,zFKNMlocphw,@maybankvideos,0:16,46626,2022-01-06,Apply for a MAE card now,14,Spending abroad or on international sites? Jus...,2025-11-08 04:06:19,
1347,IxAW4rJ5dKc,@maybankvideos,20:24,712,2022-01-05,2022 Year of the Tiger Hong Kong Market Outloo...,19,00:30\nHang Seng Index: Year 2022 forecast\n08...,2025-11-08 04:06:45,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,-Jk6A2w2n4I,@maybankvideos,@ChocdcRao,12 days ago,"‡ÆÖ‡Æµ‡Æ∞‡ØÅ ‡Æú‡Øã‡Æï‡Øç ‡Æö‡Øä‡Æ≤‡Øç‡Æ≤‡Æø, ‡Æ™‡Ææ‡Æ∞‡Øç‡Æµ‡Øà‡ÆØ‡Ææ‡Æ≥‡Æ∞‡Øç‡Æï‡Æ≥‡Øã‡Æü ‡Æµ‡Ææ‡Æ¥‡Øç‡Æï‡Øç‡Æï‡Øà ‡Æï‡Æ§‡Øà...",1
1,-Jk6A2w2n4I,@maybankvideos,@syameeraothman3891,2 weeks ago,i wish i can attend there to share all loves w...,1
2,-Jk6A2w2n4I,@maybankvideos,@ajeerahmonir389,2 weeks ago,Haha feels like I‚Äôm at the open house myself ....,2
3,-Jk6A2w2n4I,@maybankvideos,@karithiyannaidu504,2 weeks ago,"Fantastic slogan: Where there‚Äôs laughter, ther...",2
4,-Jk6A2w2n4I,@maybankvideos,@KavinashSankar,11 days ago,"Thank you Maybank,Murty brother and video crea...",0
443,_8anXt02Qck,@maybankvideos,@UchihaDestiny09,1 year ago,1,1
444,-4CgJAIdkMQ,@maybankvideos,@zakwanrahman9656,1 year ago,"Hye, I have apply for the unit trust. However,...",2
445,-4CgJAIdkMQ,@maybankvideos,@Discrete-trill9,1 year ago (edited),"already subscribed it, but,why they said there...",0
446,-4CgJAIdkMQ,@maybankvideos,@Ibnbakr93,1 year ago,This fund only one time subcription eh? Meanin...,1
447,RvDQf-LozPM,@maybankvideos,@muhammadfitri5679,1 year ago,Saya punya tak ada unit trust. Dah register ta...,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,tTDpKPTzlF8&pp=0gcJCQYKAYcqIYzv,@hongleongbankmy,26:14,105,2025-10-02,The Fed's Verdict: Navigating The Next Chapter,0,"In a world of constant market speculation, the...",2025-11-03 13:36:30,
1,qwHGks88pfA,@hongleongbankmy,0:30,224,2025-09-30,HLB Private Bank - Generations Ahead (30s),0,There is more wealth being generated now than ...,2025-11-03 13:37:00,
2,mX0YYhMF9_Q,@hongleongbankmy,1:15,2348373,2025-09-30,HLB Private Bank - Generations Ahead,0,There is more wealth being generated now than ...,2025-11-03 13:37:29,
3,KjN0bNrWS7A&pp=0gcJCQYKAYcqIYzv,@hongleongbankmy,0:49,222,2025-09-23,How to activate your HLB Card using HLB Connect,0,Just got your HLB Credit Card approved? This v...,2025-11-03 13:38:03,
4,PaRDMbh0vHI&pp=0gcJCQYKAYcqIYzv,@hongleongbankmy,0:45,2095757,2025-09-17,Akaun HLB Meezani-i,0,Buka Akaun HLB Meezani-i dan nikmati kehidupan...,2025-11-03 13:38:35,
192,s5H0u27N0_E,@hongleongbankmy,2:12,8881,2017-03-24,Hong Leong Bank CEO/GMD Domenic Fuda,0,"Interview with Domenic Fuda, Group Managing Di...",2025-11-04 13:22:12,
193,JBlEc5-spms,@hongleongbankmy,4:08,3665,2016-06-30,Sentiasa di Hati,0,"Sempena Hari Raya Aidilfitri yang mulia ini, g...",2025-11-04 13:22:52,
194,ey_HMSyeyUs,@hongleongbankmy,4:09,216628,2016-05-05,Si Jantung Hati,0,There's no love greater than mother‚Äôs love in ...,2025-11-04 13:23:33,
195,2ffpRrC1WSc,@hongleongbankmy,0:50,217007,2016-01-15,Huat The Fish with GSC Hong Leong Credit Card,20,Catch ' Huat The Fish' in cinemas this coming...,2025-11-04 13:24:13,False
196,BEqhK4SboRg,@hongleongbankmy,6:07,905383,2015-11-26,Amin Eh Mano By HLISB,0,The bliss and rewards of marriage in Islam are...,2025-11-04 13:24:33,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,BQWdCqcg7n0,@bangkokbankchannel,0:30,73082,2025-10-21,Bangkok Bank Merchant Pro,6,Bangkok Bank Merchant Pro \n‡πÅ‡∏≠‡∏õ‡∏£‡∏±‡∏ö‡∏ä‡∏≥‡∏£‡∏∞‡πÄ‡∏á‡∏¥‡∏ô‡∏™‡∏≥‡∏´‡∏£...,2025-11-03 14:26:50,
1,7S4W1OeKTlo,@bangkokbankchannel,0:45,254,2025-09-16,‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ó‡∏µ‡πà‡πÄ‡∏Ç‡πâ‡∏≤‡πÉ‡∏à‡∏ó‡∏∏‡∏Å‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï | ‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥...,5,‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏™‡∏∞‡∏™‡∏°‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå ‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ó‡∏µ‡πà‡πÄ‡∏Ç‡πâ‡∏≤‡πÉ...,2025-11-03 14:27:24,
2,rQuDMLr5PKc,@bangkokbankchannel,0:30,270,2025-09-16,‡∏ä‡πà‡∏ß‡∏¢‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏á‡∏¥‡∏ô ‡πÅ‡∏•‡∏∞‡∏Ñ‡∏∏‡πâ‡∏°‡∏Ñ‡∏£‡∏≠‡∏á 5 ‡πÇ‡∏£‡∏Ñ‡∏£‡πâ‡∏≤‡∏¢‡πÅ‡∏£‡∏á | ‡∏õ...,5,‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏™‡∏∞‡∏™‡∏°‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå ‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡πÄ‡∏ã‡∏ü‡∏ß‡∏¥‡πà‡∏á‡∏™‡πå ‡πÅ‡∏≠‡∏ô‡∏î...,2025-11-03 14:27:55,
3,qGTU-un5puM,@bangkokbankchannel,1:00,592,2025-09-15,‡∏ä‡πà‡∏ß‡∏¢‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏á‡∏¥‡∏ô ‡πÅ‡∏•‡∏∞‡∏Ñ‡∏∏‡πâ‡∏°‡∏Ñ‡∏£‡∏≠‡∏á 5 ‡πÇ‡∏£‡∏Ñ‡∏£‡πâ‡∏≤‡∏¢‡πÅ‡∏£‡∏á | ‡∏õ...,4,‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏™‡∏∞‡∏™‡∏°‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå ‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡πÄ‡∏ã‡∏ü‡∏ß‡∏¥‡πà‡∏á‡∏™‡πå ‡πÅ‡∏≠‡∏ô‡∏î...,2025-11-03 14:28:25,
4,E8npH3K99DE&pp=0gcJCQYKAYcqIYzv,@bangkokbankchannel,0:30,663,2025-09-15,‡∏ã‡∏±‡∏õ‡∏û‡∏≠‡∏£‡πå‡∏ï‡∏ó‡∏∏‡∏Å‡∏Ñ‡∏ß‡∏≤‡∏°‡∏ù‡∏±‡∏ô ‡πÅ‡∏•‡∏∞‡∏Å‡∏≤‡∏£‡πÄ‡∏ï‡∏¥‡∏ö‡πÇ‡∏ï‡∏Ç‡∏≠‡∏á‡∏•‡∏π‡∏Å | ‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô...,6,‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏™‡∏∞‡∏™‡∏°‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå ‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡πÄ‡∏ã‡∏ü‡∏ß‡∏¥‡πà‡∏á‡∏™‡πå & ‡πÅ‡∏Ñ...,2025-11-03 14:28:54,
93,x6TsChMFmH8,@bangkokbankchannel,6:18,550,2018-04-27,BBL Trade Expert Knowledge Sharing EP02 ‚Äì 6 ‡∏Ç‡πâ...,7,‡πÉ‡∏ô‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏ò‡∏∏‡∏£‡∏Å‡∏¥‡∏à‡∏™‡πà‡∏á‡∏≠‡∏≠‡∏Å‡∏™‡∏¥‡∏ô‡∏Ñ‡πâ‡∏≤‡πÑ‡∏õ‡∏¢‡∏±‡∏á‡∏ï‡πà‡∏≤‡∏á‡∏õ‡∏£‡∏∞‡πÄ‡∏ó‡∏®‡∏ô‡∏±‡πâ‡∏ô ‡∏Ñ...,2025-11-03 15:13:59,False
94,dzdj_ObwRhY,@bangkokbankchannel,2:05:05,7084,2018-01-29,"‡∏Ñ‡∏•‡∏¥‡∏õ‡∏á‡∏≤‡∏ô‡∏™‡∏±‡∏°‡∏°‡∏ô‡∏≤ ""‡∏Å‡∏≤‡∏£‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô‡∏à‡∏±‡∏î‡∏Å‡∏≤‡∏£‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå‡∏™‡∏¥‡∏ô‡πÉ‡∏ô‡∏ò‡∏∏‡∏£‡∏Å‡∏¥...",117,‡∏ö‡∏£‡∏£‡∏¢‡∏≤‡∏¢‡πÇ‡∏î‡∏¢ ‡∏≠‡∏≤‡∏à‡∏≤‡∏£‡∏¢‡πå‡∏ä‡∏¥‡∏ô‡∏†‡∏±‡∏ó‡∏£ ‡∏ß‡∏¥‡∏™‡∏∏‡∏ó‡∏ò‡∏¥‡πÅ‡∏û‡∏ó‡∏¢‡πå ‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∂‡∏Å‡∏©...,2025-11-03 15:14:18,
95,u6IyCJw-mQM&pp=0gcJCQYKAYcqIYzv,@bangkokbankchannel,5:05,10543,2017-11-10,BBL Trade Expert Knowledge Sharing - ‡∏û.‡∏£.‡∏ö. ‡∏®‡∏∏...,0,‡πÄ‡∏õ‡πá‡∏ô‡πÄ‡∏ß‡∏•‡∏≤‡∏Å‡∏ß‡πà‡∏≤ 90 ‡∏õ‡∏µ ‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡πÄ‡∏ó‡∏®‡πÑ‡∏ó‡∏¢‡πÉ‡∏ä‡πâ‡∏û‡∏£‡∏∞‡∏£‡∏≤‡∏ä‡∏ö‡∏±‡∏ç‡∏ç‡∏±‡∏ï...,2025-11-03 15:15:01,
96,Vqh3svmw5h4,@bangkokbankchannel,1:20,5585,2016-10-20,BIZ iBanking - ‡∏ï‡∏≠‡∏ô‡∏ó‡∏µ‡πà 3 ‡∏î‡∏π‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡∏£‡∏π‡∏î‡∏ö‡∏±‡∏ï‡∏£...,0,No description has been added to this video,2025-11-03 15:15:33,
97,ZYsREViKRoc,@bangkokbankchannel,1:45,18635,2016-10-20,BIZ iBanking - ‡∏ï‡∏≠‡∏ô‡∏ó‡∏µ‡πà 2 ‡πÇ‡∏≠‡∏ô‡∏ó‡∏±‡∏ô‡πÉ‡∏à ‡πÑ‡∏õ‡∏ó‡∏±‡πà‡∏ß‡πÇ‡∏•‡∏Å ‡πÇ‡∏≠‡∏ô...,0,No description has been added to this video,2025-11-03 15:16:03,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,hcf_6_g-t1c&pp=0gcJCQYKAYcqIYzv,@bangkokbankchannel,@theinw1,3 years ago,‡∏ï‡∏≠‡∏ô‡∏ô‡∏µ‡πâ Update ‡πÄ‡∏õ‡πá‡∏ô Customs Trader Portal ‡πÅ‡∏•‡πâ‡∏ß‡∏£...,0
0,hcf_6_g-t1c&pp=0gcJCQYKAYcqIYzv,@bangkokbankchannel,@theinw1,3 years ago,‡∏ï‡∏≠‡∏ô‡∏ô‡∏µ‡πâ Update ‡πÄ‡∏õ‡πá‡∏ô Customs Trader Portal ‡πÅ‡∏•‡πâ‡∏ß‡∏£...,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,D-ovCsB7Y50,@uob,3:42,166870,2025-10-23,Tilly Birds - White Pills | UOB Afterhours,18,"In celebration of our 90th anniversary, UOB Af...",2025-11-03 15:16:38,True
1,JICJTROmB9M&pp=0gcJCQYKAYcqIYzv,@uob,2:13,91,2025-10-23,90 Years and Beyond | Right By Our Customers f...,2,Supporting our customers through different lif...,2025-11-03 15:17:05,False
2,cDMfGtU6ROc&pp=0gcJCQYKAYcqIYzv,@uob,5:17,703,2025-10-16,Tilly Birds - Until Then (‡∏ñ‡πâ‡∏≤‡πÄ‡∏£‡∏≤‡πÄ‡∏à‡∏≠‡∏Å‡∏±‡∏ô‡∏≠‡∏µ‡∏Å) | U...,43,"In celebration of our 90th anniversary, UOB Af...",2025-11-03 15:17:27,True
3,dLbUmPCSnlo,@uob,2:00,160,2025-10-13,Fred Chin on Global Supply Chain Shifts | Gate...,1,"Fred Chin, our Head of Group Wholesale Banking...",2025-11-03 15:17:51,False
4,sFUnaalcnNc&pp=0gcJCQYKAYcqIYzv,@uob,3:53,499846,2025-10-09,Tilly Birds - Never a Waste of Time | UOB Afte...,108,"In celebration of our 90th anniversary, UOB Af...",2025-11-03 15:18:14,True
1205,ZhRH84MCtLk,@uob,1:50,1271,2015-08-03,BlackRock‚Äôs views on the Implications of Risin...,0,BlackRock shares the potential implications of...,2025-11-08 05:04:31,False
1206,CDbE5GhhqWU,@uob,1:00,773,2015-07-12,UOB Income Builder - What is your Dream? (Recap),0,What's your dream?\n\nDo you have a plan?\n\nL...,2025-11-08 05:04:47,False
1207,iezgyN2pWmw,@uob,1:00,869,2015-07-12,UOB Income Builder - What is your Dream? (Swee...,0,What's your dream?\n\nDo you have a plan?\n\nL...,2025-11-08 05:05:03,False
1208,A3Dm7UFcx9s,@uob,1:00,1015,2015-07-12,UOB Income Builder - What is your Dream? (Stroll),0,What's your dream?\n\nDo you have a plan?\n\nL...,2025-11-08 05:05:20,False
1209,gA0hQtE6TL4,@uob,1:00,4616,2015-03-04,UOB Business App,0,An app for small businesses providing one-stop...,2025-11-08 05:05:36,True


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,poster_OP,comment_age,full_comment,like_count
0,D-ovCsB7Y50,@uob,@Moon-iv1xy,7 days ago,so excited for the new album!!,0
1,D-ovCsB7Y50,@uob,@Misnomer_alan,9 days ago,‚ù§ ‚ù§ ‚ù§ ‚ù§,0
2,cDMfGtU6ROc&pp=0gcJCQYKAYcqIYzv,@uob,@linarrow1275,2 weeks ago,A song that will never get tiresome,0
3,cDMfGtU6ROc&pp=0gcJCQYKAYcqIYzv,@uob,@JunyaoZhu-g1d,2 weeks ago,So healing,0
4,cDMfGtU6ROc&pp=0gcJCQYKAYcqIYzv,@uob,@Misnomer_alan,2 weeks ago,Love this ‚ù§ ‚ù§ ‚ù§,0
525,4tA8wtLaui4,@uob,@lamanblog,9 years ago (edited),Mind anyone can share what song is this (Remem...,0
526,4tA8wtLaui4,@uob,@blakesmith7151,7 years ago,Fiiirrrsstt,0
527,4tA8wtLaui4,@uob,@jonaerakua1,8 years ago,"Thing is, you can inherit dad's skin AND mom's...",0
528,4tA8wtLaui4,@uob,@JokeRQuas,7 years ago,.,0
529,gA0hQtE6TL4,@uob,@floristdahlins8520,9 years ago,",",0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,JQ4mfx8jFQ4,@MUFGBankChannel,2:18,426.0,2025-10-23,„Äå‰∏âËè±ÔºµÔº¶Ôº™ÈäÄË°å„Äç„Ç¢„Éó„É™Êìç‰ΩúÊñπÊ≥ï„ÄÄ„Ç´„Éº„Éâ„ÉªÈÄöÂ∏≥„ÉªÂç∞Èëë„ÅÆÁ¥õÂ§±„ÇÑÂÜçÁô∫Ë°åÊâãÁ∂ö„Åç„ÅØ„Ç¢„Éó„É™„ÅßÁ∞°Âçò„Äê‰∏âËè±...,0.0,„Çπ„Éû„Éº„Éà„Éï„Ç©„É≥„Ç¢„Éó„É™„Äå‰∏âËè±ÔºµÔº¶Ôº™ÈäÄË°å„Äç„Åß„Ç≠„É£„ÉÉ„Ç∑„É•„Ç´„Éº„Éâ„ÇÑÈÄöÂ∏≥„ÄÅÂç∞Èëë„Å´Èñ¢„Åô„ÇãÁ¥õÂ§±„ÅÆ„ÅäÂ±ä„Åë„ÇÑÂÜç...,2025-11-03 15:57:15,
1,dBFpxf79fs0,@MUFGBankChannel,1:12,1641.0,2025-07-06,ÊÆãÈ´òË®ºÊòéÊõ∏„ÅØPDF„Å™„Çâ„Ç¢„Éó„É™„ÅßÂç≥ÊôÇÁô∫Ë°åÔºÜÊâãÊï∞ÊñôÁÑ°ÊñôÔºÅ„Äê‰∏âËè±UFJÈäÄË°åÂÖ¨Âºè„Äë,0.0,‚òÜ„Åì„ÅÆÂïÜÂìÅ„ÅÆHP„ÇÑ„Ç≠„É£„É≥„Éö„Éº„É≥ÊÉÖÂ†±‚òÜ\n„Çπ„Éû„Éº„Éà„Éï„Ç©„É≥„Ç¢„Éó„É™„Äå„Åã„Çì„Åü„ÇìÊâãÁ∂ö„Ç¢„Éó„É™„Äç \nÊÆãÈ´ò...,2025-11-03 15:57:44,
2,XQspWed5lsY,@MUFGBankChannel,2:00,3439.0,2025-06-15,„Äê„Ç®„É†„ÉÉ„Éà„Äë„Ç≥„É≥„Çª„Éó„ÉàMOVIE „Äê‰∏âËè±UFJÈäÄË°åÂÖ¨Âºè„Äë,0.0,„ÅäÈáë„ÅÆ„ÅÇ„Çå„Åì„Çå„ÄÅ„Åæ„Çã„Å£„Å®„ÄÇ„Äå„Ç®„É†„ÉÉ„Éà„Äç\n\n„ÅäÈáë„Çí„Å§„Åã„ÅÜ„ÄÅ„Åü„ÇÅ„Çã„ÄÅ„Åµ„ÇÑ„Åô„ÄÅ„Åù„Åó„Å¶Ê¨°‰∏ñ‰ª£„Å´Áπã...,2025-11-03 15:58:15,
3,HMWHmNmM0TI,@MUFGBankChannel,0:15,12733.0,2025-06-15,„Äå„Ç¢„Éó„É™1„Å§„Åß„Ç¢„ÇØ„Çª„Çπ„ÄçÁØá„Äê‰∏âËè±UFJÈäÄË°åÂÖ¨Âºè„Äë,0.0,‰∏âËè±UFJÈäÄË°å„Å™„Çâ\n„Ç¢„Éó„É™1„Å§„Åß„ÅÑ„Çç„Çì„Å™ÈáëËûç„Çµ„Éº„Éì„Çπ„Å´ÔºÅ\n\n‚ÄªÊú¨Á∑®ÂæåÂçä„Å´Ë°®Á§∫„ÅÆ„ÄåÊñ∞Ë¶èÂè£...,2025-11-03 15:58:45,
4,3PeHEunv8Ts,@MUFGBankChannel,2:41,2035.0,2025-06-02,NISAÂà∂Â∫¶„ÅÆ„Éù„Ç§„É≥„Éà„Äê‰∏âËè±UFJÈäÄË°åÂÖ¨Âºè„Äë,0.0,2024Âπ¥1Êúà„Çà„Çä„ÄÅNISAÂà∂Â∫¶„ÅØ„Çà„ÇäÈ≠ÖÂäõÁöÑ„Å™Âà∂Â∫¶„Å´Â§â„Çè„Çä„Åæ„Åó„Åü„ÄÇ\nÊú¨ÂãïÁîª„Åß„ÅØ„ÄÅNISAÂà∂...,2025-11-03 15:59:16,
91,3MmwCVxXWks&pp=0gcJCQYKAYcqIYzv,@MUFGBankChannel,1:26,3658.0,2020-12-27,„Äé„ÇÑ„Çä„Åü„ÅÑ„Åì„Å®„Åå„ÅÇ„Çä„Åæ„Åô„Äè„Çµ„Çπ„ÉÜ„Ç§„Éä„Éñ„É´„Éï„Ç°„Ç§„Éä„É≥„ÇπÁ∑®„Äê‰∏âËè±UFJÈäÄË°åÂÖ¨Âºè„Äë,0.0,‚Üì‚ÜìË°åÂì°ÈÅî„ÅÆ„ÄåÁßÅ„Åü„Å°„ÅÆ„ÇÑ„Çä„Åü„ÅÑ„Åì„Å®„Äç„ÇíÊñ∞ÂçíÊé°Áî®„Éõ„Éº„É†„Éö„Éº„Ç∏„Å´Ë®òËºâ„Åó„Å¶„Åä„Çä„Åæ„Åô„ÅÆ„ÅßÊòØÈùû„ÅîË¶ß„Åè„Å†...,2025-11-03 16:46:58,
92,xhX81v6p64k,@MUFGBankChannel,1:37,8666.0,2020-12-27,„Äé„ÇÑ„Çä„Åü„ÅÑ„Åì„Å®„Åå„ÅÇ„Çä„Åæ„Åô„Äè„Éó„É≠„Ç∏„Çß„ÇØ„Éà„Éï„Ç°„Ç§„Éä„É≥„ÇπÁ∑®„Äê‰∏âËè±UFJÈäÄË°åÂÖ¨Âºè„Äë,0.0,‚Üì‚ÜìË°åÂì°ÈÅî„ÅÆ„ÄåÁßÅ„Åü„Å°„ÅÆ„ÇÑ„Çä„Åü„ÅÑ„Åì„Å®„Äç„ÇíÊñ∞ÂçíÊé°Áî®„Éõ„Éº„É†„Éö„Éº„Ç∏„Å´Ë®òËºâ„Åó„Å¶„Åä„Çä„Åæ„Åô„ÅÆ„ÅßÊòØÈùû„ÅîË¶ß„Åè„Å†...,2025-11-03 16:47:31,
93,SPUo_4hOa5w,@MUFGBankChannel,1:33,3271.0,2020-12-27,„Äé„ÇÑ„Çä„Åü„ÅÑ„Åì„Å®„Åå„ÅÇ„Çä„Åæ„Åô„ÄèÊàêÈï∑Áî£Ê•≠ÊîØÊè¥ÂÆ§Á∑®„Äê‰∏âËè±UFJÈäÄË°åÂÖ¨Âºè„Äë,0.0,‚Üì‚ÜìË°åÂì°ÈÅî„ÅÆ„ÄåÁßÅ„Åü„Å°„ÅÆ„ÇÑ„Çä„Åü„ÅÑ„Åì„Å®„Äç„ÇíÊñ∞ÂçíÊé°Áî®„Éõ„Éº„É†„Éö„Éº„Ç∏„Å´Ë®òËºâ„Åó„Å¶„Åä„Çä„Åæ„Åô„ÅÆ„ÅßÊòØÈùû„ÅîË¶ß„Åè„Å†...,2025-11-03 16:48:02,
94,Fll6MebIf8I,@MUFGBankChannel,2:30,6420.0,2020-03-30,„ÄåÈÅ∫Áî£Êï¥ÁêÜÊ•≠Âãô„Äê„Çè„Åã„Å°ÊÑõ„Äë„ÄÄ„ÅîÁ¥π‰ªã„ÄçÁØá,0.0,No description has been added to this video,2025-11-03 16:48:31,
95,tew6Ly8NDcc,@MUFGBankChannel,4:50,3859.0,2019-08-12,ÂãïÁîª„ÅßÂ≠¶„Å∂Ë≥áÁî£ÈÅãÁî®„Äå„Å§„Åø„Åü„Å¶ÊäïË≥á„Å´„Çà„Çç„Åó„Åè„Äç,0.0,No description has been added to this video,2025-11-03 16:49:01,


Comments:

['@channelocbc.pkl', '@AllianceBankMY.pkl', '@labanquedunmondequichange.pkl', '@PublicBankGroup.pkl', '@CIMBGroupHoldingsBhd.pkl', '@smbcgroup.pkl', '@BankofChinaManila.pkl', '@GXBank.pkl', '@Citi.pkl', '@HSBC_MY.pkl', '@standardchartered.pkl', '@rhbgroup.pkl', '@jpmorgan.pkl', '@MizuhoAmericas.pkl', '@AmBankTV.pkl', '@BankofAmerica.pkl', '@MyBoostApp.pkl', '@DeutscheBank.pkl', '@AFFIN.pkl', '@maybankvideos.pkl', '@hongleongbankmy.pkl', '.ipynb_checkpoints', '@bangkokbankchannel.pkl', '@uob.pkl', '@MUFGBankChannel.pkl']


In [12]:
COLLATE_MM.groupby('Youtube_Handle')['Youtube_Handle'].count()

Youtube_Handle
@AFFIN                         221
@AllianceBankMY                356
@AmBankTV                      362
@BankofAmerica                 102
@BankofChinaManila               8
@CIMBGroupHoldingsBhd          468
@Citi                         1663
@DeutscheBank                 1919
@GXBank                         33
@HSBC_MY                       188
@MUFGBankChannel                96
@MizuhoAmericas                154
@MyBoostApp                    350
@PublicBankGroup               206
@bangkokbankchannel             98
@channelocbc                   824
@hongleongbankmy               197
@jpmorgan                     1282
@labanquedunmondequichange    2579
@maybankvideos                1348
@rhbgroup                      561
@smbcgroup                      21
@standardchartered            1663
@uob                          1210
Name: Youtube_Handle, dtype: int64

In [13]:
COLLATE_COMMENT[COLLATE_COMMENT.Youtube_Handle.isna()]

Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count,Youtube_Handle
0,FmQz15WJkWE,@tinkerlee-r9z,3 weeks ago,BECAREFUL WHAT YOU WISH FOR - CIRCUS ???!...,0,
1,zb3oKcLv1yA,@kayanemahberkah,1 month ago,kak cara pindahin ocbc mobile dari hp lama ke ...,0,
2,Y6GQJUyjDO4,@VikkiNesspara-f2u,1 month ago,happy 60th birthday Singapore üá∏üá¨üá∏üá¨üá∏üá¨Ô∏èüá∏üá¨Ô∏èÔ∏èüá∏üá¨Ô∏èüá∏üá¨...,0,
3,GGbcl2AcSBU,@RisNandar-c9h,2 months ago,OCBC ‚ù§ ‚ù§,0,
4,meamip7zfzI,@BozuBoyYT,11 days ago,The person doing the voice over is so good!,2,
5,j-UzkGGiAGg,@RisNandar-c9h,2 months ago,OCBC ‚ù§ ‚ù§,0,
6,KsrYGzbc2GA,@RisNandar-c9h,3 months ago,OCBC ‚ù§ ‚ù§,0,
7,gwrlQkXp7qk,@dc1211,3 months ago,This is touching and inspiring.,0,
8,j7JD5Qp-c5c,@RisNandar-c9h,4 months ago,OCBC ‚ù§ ‚ù§,1,
9,aqpc1usDDsY,@RoyShouriMustango,4 months ago,No one cares,0,


In [14]:
with open('./Data/COLLATE/COMMENT.pkl', 'wb') as f:
    pickle.dump(COLLATE_COMMENT, f)

```python
import os
import re
import pickle
import pandas as pd
from tqdm import tqdm
import pyarrow as pa
import pyarrow.parquet as pq

# --- Your memoization text-based files ---
memoized_channels = RW.read_txt_into_list('memo_database_scrape')
memoized_IDs = RW.read_txt_into_list('memo_VideoID_scrape')

directory = r"./Data/Database/"

def append_to_parquet(df, path):
    """Append or create parquet file efficiently."""
    table = pa.Table.from_pandas(df)
    if os.path.exists(path):
        with pq.ParquetWriter(path, table.schema, compression='snappy', use_dictionary=True) as writer:
            writer.write_table(table)
    else:
        pq.write_table(table, path, compression='snappy', use_dictionary=True)


with open("./Data/memo_database_scrape.txt", "a") as writer:
    for e_pickle in os.listdir(directory):
        if e_pickle not in memoized_channels:
            match = re.search(r"^(@\w+)\.parquet", e_pickle)
            if not match:
                continue

            YT_handle = match.group(1)
            print(f"Starting the process for {YT_handle}")
            fullpath = os.path.join(directory, match.group(0))

            df = pd.read_parquet(fullpath)
            print(f"{YT_handle} has {df.shape[0]} video_IDs")

            # --- Define parquet paths ---
            MM_temp_path = f'./Data/Lists/MM_temp/{YT_handle}.parquet'
            COMMENT_temp_path = f'./Data/Lists/COMMENT_temp/{YT_handle}.parquet'

            with open("./Data/memo_VideoID_scrape.txt", "a") as small_writer:
                for idx, row_dict in tqdm(df[:240].to_dict(orient='index').items()):
                    vid_id = row_dict['Youtube_Video_IDs']
                    if vid_id in memoized_IDs:
                        print(f"Skipping {vid_id}, already memoized")
                        continue

                    print(f"Currently scraping for {vid_id}")
                    a, b = scrape_into_2_databases(url_ID=vid_id)
                    MM_df_oneline, COMMENT_df = cast_datatypes(a, b)
                    
                    # Add identifying columns
                    MM_df_oneline.insert(0, 'Youtube_Handle', YT_handle)
                    MM_df_oneline.insert(0, 'Youtube_Video_IDs', vid_id)
                    COMMENT_df.insert(0, 'Youtube_Video_IDs', vid_id)
                
                    display(MM_df_oneline)
                    display(COMMENT_df)

                    # --- Append to temp parquet ---
                    append_to_parquet(MM_df_oneline, MM_temp_path)
                    append_to_parquet(COMMENT_df, COMMENT_temp_path)

                    # Update memo
                    small_writer.write(f"{vid_id}\n")
                    small_writer.flush()
                    memoized_IDs.append(vid_id)

            # --- Post-processing final merge ---
            print(f"Performing post-processing for {YT_handle}")

            # Concatenate temp parquet into final database folder
            final_MM_path = f'./Data/MM_database/{YT_handle}.parquet'
            final_COMMENT_path = f'./Data/COMMENT_database/{YT_handle}.parquet'

            # Merge existing temp parquet files into a clean final version
            if os.path.exists(MM_temp_path):
                temp_MM_df = pd.read_parquet(MM_temp_path)
                # temp_MM_df['is_commented'] = temp_MM_df['is_commented'].astype(str)
                temp_MM_df.to_parquet(final_MM_path, compression='snappy', index=False)
            if os.path.exists(COMMENT_temp_path):
                temp_COMMENT_df = pd.read_parquet(COMMENT_temp_path)
                temp_COMMENT_df.to_parquet(final_COMMENT_path, compression='snappy', index=False)

            # Update channel memo
            writer.write(f"{e_pickle}\n")
            writer.flush()
            memoized_channels.append(e_pickle)

        else:
            print(f"Skipping {e_pickle}, already memoized")
```

## Migration script

In [122]:
import pandas as pd
import pickle
from pathlib import Path
def safe_type_coercion(df):
    for col in df.columns:
        series = df[col]
        # Skip columns that are already datetimes
        if pd.api.types.is_datetime64_any_dtype(series):
            continue
        
        # Try detecting datetime-like object columns (as strings)
        if series.dtype == object:
            try:
                parsed = pd.to_datetime(series, errors='raise', utc=False)
                # Only treat as datetime if a large majority parsed successfully
                if parsed.notna().mean() > 0.9:
                    df[col] = parsed
                    continue
            except Exception:
                pass

        # Try numeric conversion
        try:
            df[col] = pd.to_numeric(series)
            continue
        except Exception:
            pass

        # Fallback: convert to string
        try:
            df[col] = series.astype(str)
        except Exception:
            pass
    
    return df

def convert_pickle_to_parquet(pkl_path):
    try:
        with open(pkl_path, "rb") as f:
            data = pickle.load(f)
    except Exception as e:
        print(f"‚ùå Failed to load {pkl_path}: {e}")
        return

    parquet_path = Path(pkl_path).with_suffix(".parquet")

    # --- Normalize into DataFrame ---
    if isinstance(data, pd.DataFrame):
        df = data
    elif isinstance(data, list):
        if len(data) > 0 and isinstance(data[0], dict):
            df = pd.DataFrame(data)
        else:
            df = pd.DataFrame({"values": data})
    elif isinstance(data, dict):
        df = pd.DataFrame.from_dict(data, orient="index")
    else:
        print(f"‚ö†Ô∏è Skipping {pkl_path} ‚Äî not a tabular structure.")
        return

    # --- Coerce all columns to strings to avoid pyarrow dtype issues ---
    df = safe_type_coercion(df)

    # --- Write Parquet safely ---
    try:
        df.to_parquet(parquet_path, compression="snappy", index=False)
        print(f"‚úÖ Converted {pkl_path} ‚Üí {parquet_path}")
    except Exception as e:
        print(f"‚ùå Failed to write parquet for {pkl_path}: {e}")

# --- MAIN LOOP ---
for root in root_dirs:
    for file in os.listdir(root):
        if file.endswith(".pkl"):
            fullpath = os.path.join(root, file)
            convert_pickle_to_parquet(fullpath)

  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetim

‚úÖ Converted ./Data/Database/@channelocbc.pkl ‚Üí Data/Database/@channelocbc.parquet
‚úÖ Converted ./Data/Database/@AllianceBankMY.pkl ‚Üí Data/Database/@AllianceBankMY.parquet
‚úÖ Converted ./Data/Database/@labanquedunmondequichange.pkl ‚Üí Data/Database/@labanquedunmondequichange.parquet
‚úÖ Converted ./Data/Database/@PublicBankGroup.pkl ‚Üí Data/Database/@PublicBankGroup.parquet
‚úÖ Converted ./Data/Database/@CIMBGroupHoldingsBhd.pkl ‚Üí Data/Database/@CIMBGroupHoldingsBhd.parquet
‚úÖ Converted ./Data/Database/@smbcgroup.pkl ‚Üí Data/Database/@smbcgroup.parquet
‚úÖ Converted ./Data/Database/@BankofChinaManila.pkl ‚Üí Data/Database/@BankofChinaManila.parquet
‚úÖ Converted ./Data/Database/@GXBank.pkl ‚Üí Data/Database/@GXBank.parquet
‚úÖ Converted ./Data/Database/@Citi.pkl ‚Üí Data/Database/@Citi.parquet
‚úÖ Converted ./Data/Database/@HSBC_MY.pkl ‚Üí Data/Database/@HSBC_MY.parquet
‚úÖ Converted ./Data/Database/@standardchartered.pkl ‚Üí Data/Database/@standardchartered.parquet
‚úÖ Co

  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetime(series, errors='raise', utc=False)
  parsed = pd.to_datetim

## Sanity check - migration successful

In [72]:
import pandas as pd

# df_old = pickle.load(open("./Data/MM_database/@AFFIN.pkl", "rb"))
df_new = pd.read_parquet("./Data/MM_database/@AFFIN.parquet")

list_temp_MM_df = pd.read_parquet(r"./Data/Lists/MM_temp/@AFFIN.parquet")
list_temp_MM_df
# print(df_.columns.symmetric_difference(df_new.columns))

0      {'Youtube_Video_IDs': 'RMZdBov0FrQ', 'Youtube_...
1      {'Youtube_Video_IDs': 'Hhi_ERnrv1g', 'Youtube_...
2      {'Youtube_Video_IDs': 't8UCJJafTd0', 'Youtube_...
3      {'Youtube_Video_IDs': 'PyuTTP4C12c', 'Youtube_...
4      {'Youtube_Video_IDs': 'SPZ4q1CnxnE', 'Youtube_...
                             ...                        
216    {'Youtube_Video_IDs': 'raaESaRta4k', 'Youtube_...
217    {'Youtube_Video_IDs': 'UyWEmdaX2EI', 'Youtube_...
218    {'Youtube_Video_IDs': 'PjBmn4J4z4E', 'Youtube_...
219    {'Youtube_Video_IDs': 'P2d9kJ8pBLE', 'Youtube_...
220    {'Youtube_Video_IDs': 'mVr-5QEAlKg', 'Youtube_...
Name: 0, Length: 221, dtype: object

In [54]:
df_new

Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,RMZdBov0FrQ,@AFFIN,1:00,220,2025-08-29,"AFFIN Borneo Credit Card/-i - Unlimited, the B...",5,"üå¥‚ú® Hello Sabah & Sarawak!\nYour card, your per...",2025-10-30 18:08:36,False
1,Hhi_ERnrv1g,@AFFIN,1:17,127,2025-08-29,Detik Niaga TV3 : Kerjasama Strategik antara A...,3,"A memorable event with Marriott International,...",2025-10-30 18:08:54,False
2,t8UCJJafTd0,@AFFIN,0:52,111,2025-08-26,ASTON MARTIN X AFFIN,1,The road is yours üöò‚ú®\n\nEnjoy exclusive owners...,2025-10-30 18:09:10,False
3,PyuTTP4C12c,@AFFIN,2:22,85,2025-08-13,Event Highlight AFFIN BizChat KL 2025,3,AFFIN SME BizChat 2025 in Kuala Lumpur was an ...,2025-10-30 18:09:28,False
4,SPZ4q1CnxnE,@AFFIN,1:14,97,2025-07-17,AFFIN SME BizChat KL 2025 - Teaser,10,"The highly anticipated flagship business talk,...",2025-10-30 18:09:47,True
...,...,...,...,...,...,...,...,...,...,...
216,raaESaRta4k,@AFFIN,0:31,126,2021-02-26,Photo Challenge Contest,1,Don‚Äôt miss this opportunity! Campaign starts n...,2025-11-01 01:26:26,False
217,UyWEmdaX2EI,@AFFIN,0:30,444,2017-01-12,AFFINBANK Group - Karnival Kewangan 2017,3,Karnival Kewangan 2017 at PWTC,2025-11-01 01:26:42,False
218,PjBmn4J4z4E,@AFFIN,0:30,95,2017-01-10,FINAL KARNIVAL KEWANGAN BM,0,Karnival Kewangan 2017\n\nKarnival yang bertem...,2025-11-01 01:26:59,False
219,P2d9kJ8pBLE,@AFFIN,0:30,229,2017-01-09,KARNIVAL KEWANGAN 2017,2,Karnival yang bertemakan ‚ÄúUtamakan Keperluan K...,2025-11-01 01:27:15,False


## Parquet Collate

In [15]:
## Temp folders
MM_temp_dir = fr'./Data/Lists/MM_temp/'
MM_temp_contents = os.listdir(MM_temp_dir)
COMMENT_temp_dir = fr"./Data/Lists/COMMENT_temp/"
COMMENT_temp_contents = os.listdir(COMMENT_temp_dir)

# Pure MM and Contents
MM_dir = r'./Data/MM_database/'
MM_contents = os.listdir(r'./Data/MM_database/')
COMMENT_dir = r'./Data/COMMENT_database/'
COMMENT_contents = os.listdir(r'./Data/COMMENT_database/')


print(MM_contents)

COLLATE_MM = pd.DataFrame()
COLLATE_COMMENT = pd.DataFrame()
for file in MM_contents:
    match = re.search(pattern=r"^[^\.].*\.parquet$", string=file)
   
    if match:
        print('Metadata:\n')
        # Read the Parquet file into a pandas DataFrame
        MM_df = pd.read_parquet(os.path.normpath(os.path.join(MM_dir, match.group(0))))
        COLLATE_MM = pd.concat([COLLATE_MM, MM_df], axis='index')
        display(pd.concat([MM_df.head(), MM_df.tail()]))

        print('Comments:\n')
        COMMENT_df = pd.read_parquet(os.path.normpath(os.path.join(COMMENT_dir, match.group(0))))
        COLLATE_COMMENT = pd.concat([COLLATE_COMMENT, COMMENT_df], axis='index')
        display(pd.concat([COMMENT_df.head(), COMMENT_df.tail()]))


COLLATE_MM.reset_index(inplace=True, drop=True)
COLLATE_MM['is_commented'] = COLLATE_MM['is_commented'].astype(str)
COLLATE_MM['video_length'] = COLLATE_MM['video_length'].astype(str)
COLLATE_COMMENT.reset_index(inplace=True, drop=True)

# Write into COLLATE folder
COLLATE_MM.to_parquet(r"./Data/COLLATE/MM.parquet", compression="snappy", index=False)
COLLATE_COMMENT.to_parquet(r"./Data/COLLATE/COMMENT.parquet", compression="snappy", index=False)

['@uob.parquet', '@AFFIN.parquet', '@smbcgroup.parquet', '@channelocbc.parquet', '@CIMBGroupHoldingsBhd.parquet', '@PublicBankGroup.parquet', '@MizuhoAmericas.parquet', '@HSBC_MY.parquet', '@AmBankTV.parquet', '@rhbgroup.parquet', '@BankofChinaManila.parquet', '@MUFGBankChannel.parquet', '@AllianceBankMY.parquet', '@maybankvideos.parquet', '@BankofAmerica.parquet', '.ipynb_checkpoints', '@standardchartered.parquet', '@GXBank.parquet', '@bangkokbankchannel.parquet', '@hongleongbankmy.parquet', '@labanquedunmondequichange.parquet', '@Citi.parquet', '@jpmorgan.parquet', '@DeutscheBank.parquet', '@MyBoostApp.parquet']
Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,Y0b3KoxDmc8,@uob,2:05,61177,2023-10-02,Be Cyber Savvy: The Interceptor,29,Saw a great deal online? A tempting ad may jus...,2025-11-02 19:33:31,False
0,Y0b3KoxDmc8,@uob,2:05,61177,2023-10-02,Be Cyber Savvy: The Interceptor,29,Saw a great deal online? A tempting ad may jus...,2025-11-02 19:33:31,False


Comments:



Unnamed: 0,Youtube_Video_IDs


Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': 'RMZdBov0FrQ', 'Youtube_..."
1,"{'Youtube_Video_IDs': 'Hhi_ERnrv1g', 'Youtube_..."
2,"{'Youtube_Video_IDs': 't8UCJJafTd0', 'Youtube_..."
3,"{'Youtube_Video_IDs': 'PyuTTP4C12c', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'SPZ4q1CnxnE', 'Youtube_..."
216,"{'Youtube_Video_IDs': 'raaESaRta4k', 'Youtube_..."
217,"{'Youtube_Video_IDs': 'UyWEmdaX2EI', 'Youtube_..."
218,"{'Youtube_Video_IDs': 'PjBmn4J4z4E', 'Youtube_..."
219,"{'Youtube_Video_IDs': 'P2d9kJ8pBLE', 'Youtube_..."
220,"{'Youtube_Video_IDs': 'mVr-5QEAlKg', 'Youtube_..."


Comments:



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,94,95,96,97,98,99,100,101,102,103
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,"{'Youtube_Video_IDs': 'SPZ4q1CnxnE', 'poster_O...",,,,,,,,,,...,,,,,,,,,,
216,,,,,,,,,,,...,,,,,,,,,,
217,,,,,,,,,,,...,,,,,,,,,,
218,,,,,,,,,,,...,,,,,,,,,,
219,,,,,,,,,,,...,,,,,,,,,,
220,"{'Youtube_Video_IDs': 'mVr-5QEAlKg', 'poster_O...",,,,,,,,,,...,,,,,,,,,,


Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': 'bT1Gjcx6RKo', 'Youtube_..."
1,{'Youtube_Video_IDs': 'eOfTS3U4CXU&pp=0gcJCQYK...
2,"{'Youtube_Video_IDs': 'lzXASjvBPHQ', 'Youtube_..."
3,"{'Youtube_Video_IDs': 'mF8Pj9dSReQ', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'lvFzbTdxFMU', 'Youtube_..."
16,"{'Youtube_Video_IDs': 'Ayc4HcUzOdg', 'Youtube_..."
17,"{'Youtube_Video_IDs': 'VEGEIcyCE2I', 'Youtube_..."
18,"{'Youtube_Video_IDs': '1HuDjrAA86Q', 'Youtube_..."
19,{'Youtube_Video_IDs': 'mmcslYJQbdo&pp=0gcJCQYK...
20,"{'Youtube_Video_IDs': 'iEEKCOzAO0A', 'Youtube_..."


Comments:



Unnamed: 0,0,1
0,,
1,,
2,,
3,"{'Youtube_Video_IDs': 'mF8Pj9dSReQ', 'poster_O...",
4,,
16,"{'Youtube_Video_IDs': 'Ayc4HcUzOdg', 'poster_O...",
17,"{'Youtube_Video_IDs': 'VEGEIcyCE2I', 'poster_O...","{'Youtube_Video_IDs': 'VEGEIcyCE2I', 'poster_O..."
18,,
19,,
20,,


Metadata:



Unnamed: 0,0
0,{'Youtube_Video_IDs': 'LiFxYCLSvb4&pp=0gcJCQYK...
1,"{'Youtube_Video_IDs': 'N9SBRNEAnhI', 'Youtube_..."
2,"{'Youtube_Video_IDs': 'PLqT8W6JNDY', 'Youtube_..."
3,"{'Youtube_Video_IDs': '4UBrl92xXOU', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'cYtlA9Q7UAo', 'Youtube_..."
819,{'Youtube_Video_IDs': 'D9i2yjiz4Bg&pp=0gcJCQYK...
820,"{'Youtube_Video_IDs': 'ReOUO3ue-QU', 'Youtube_..."
821,{'Youtube_Video_IDs': 'gu0POYcmhS4&pp=0gcJCQYK...
822,"{'Youtube_Video_IDs': 'FdrMOpdw9Xk', 'Youtube_..."
823,"{'Youtube_Video_IDs': '8BrHpAFnEGM', 'Youtube_..."


Comments:



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
819,,,,,,,,,,,...,,,,,,,,,,
820,,,,,,,,,,,...,,,,,,,,,,
821,,,,,,,,,,,...,,,,,,,,,,
822,,,,,,,,,,,...,,,,,,,,,,
823,,,,,,,,,,,...,,,,,,,,,,


Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': 'YueA7snetLY', 'Youtube_..."
1,"{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'Youtube_..."
2,{'Youtube_Video_IDs': 'rhPIKZgQVZo&pp=0gcJCQYK...
3,"{'Youtube_Video_IDs': 'bhGsQRa3ESY', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'M6knHi8fOyw', 'Youtube_..."
463,"{'Youtube_Video_IDs': 'EkCY842PEtY', 'Youtube_..."
464,"{'Youtube_Video_IDs': 'J7OPAMPrCic', 'Youtube_..."
465,"{'Youtube_Video_IDs': 'Zgb8agaWNGI', 'Youtube_..."
466,"{'Youtube_Video_IDs': 'p53JpWvCOTE', 'Youtube_..."
467,"{'Youtube_Video_IDs': '_dnozEeK5KA', 'Youtube_..."


Comments:



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,600,601,602,603,604,605,606,607,608,609
0,"{'Youtube_Video_IDs': 'YueA7snetLY', 'poster_O...",,,,,,,,,,...,,,,,,,,,,
1,"{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...","{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...","{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...","{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...","{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...","{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...","{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...","{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...","{'Youtube_Video_IDs': 'VBiJ-hHYeo0', 'poster_O...",,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,"{'Youtube_Video_IDs': 'M6knHi8fOyw', 'poster_O...","{'Youtube_Video_IDs': 'M6knHi8fOyw', 'poster_O...","{'Youtube_Video_IDs': 'M6knHi8fOyw', 'poster_O...",,,,,,,,...,,,,,,,,,,
463,,,,,,,,,,,...,,,,,,,,,,
464,"{'Youtube_Video_IDs': 'J7OPAMPrCic', 'poster_O...","{'Youtube_Video_IDs': 'J7OPAMPrCic', 'poster_O...","{'Youtube_Video_IDs': 'J7OPAMPrCic', 'poster_O...","{'Youtube_Video_IDs': 'J7OPAMPrCic', 'poster_O...","{'Youtube_Video_IDs': 'J7OPAMPrCic', 'poster_O...",,,,,,...,,,,,,,,,,
465,,,,,,,,,,,...,,,,,,,,,,
466,"{'Youtube_Video_IDs': 'p53JpWvCOTE', 'poster_O...","{'Youtube_Video_IDs': 'p53JpWvCOTE', 'poster_O...",,,,,,,,,...,,,,,,,,,,
467,"{'Youtube_Video_IDs': '_dnozEeK5KA', 'poster_O...",,,,,,,,,,...,,,,,,,,,,


Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': '7GJgJUQ42Y4', 'Youtube_..."
1,{'Youtube_Video_IDs': 'TnqmRE2S_Qs&pp=0gcJCQYK...
2,{'Youtube_Video_IDs': 'zidZFWsoWnU&pp=0gcJCQYK...
3,"{'Youtube_Video_IDs': 'zdE_EIWCG1c', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'tpUXALvJU-k', 'Youtube_..."
201,"{'Youtube_Video_IDs': 'NAoquLRGc-s', 'Youtube_..."
202,"{'Youtube_Video_IDs': 'akLn0Qvev3M', 'Youtube_..."
203,"{'Youtube_Video_IDs': 'YtU2KRuBVTw', 'Youtube_..."
204,"{'Youtube_Video_IDs': 'Kkgg1-kY4HE', 'Youtube_..."
205,"{'Youtube_Video_IDs': 'm2rnVUVCQgA', 'Youtube_..."


Comments:



Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': '2zdhfXjpmyg', 'Youtube_..."
1,"{'Youtube_Video_IDs': '6tZ-JvQAXc8', 'Youtube_..."
2,{'Youtube_Video_IDs': 'N4CQkUvz7Ns&pp=0gcJCQYK...
3,"{'Youtube_Video_IDs': '2DMHEx5FSgE', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'S9dPCscz9NI', 'Youtube_..."
149,"{'Youtube_Video_IDs': 'UKSZhnUQ_bQ', 'Youtube_..."
150,"{'Youtube_Video_IDs': 'lZIfmkw7-3k', 'Youtube_..."
151,"{'Youtube_Video_IDs': 'r0m46N0SDxM', 'Youtube_..."
152,"{'Youtube_Video_IDs': 'my-V4BAXJcE', 'Youtube_..."
153,"{'Youtube_Video_IDs': 'ooUJtzDnMaY', 'Youtube_..."


Comments:



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,,,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,
149,"{'Youtube_Video_IDs': 'UKSZhnUQ_bQ', 'poster_O...",,,,,,,,,,,,,,,,
150,,,,,,,,,,,,,,,,,
151,,,,,,,,,,,,,,,,,
152,,,,,,,,,,,,,,,,,
153,,,,,,,,,,,,,,,,,


Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': 'm3aIQVfe53I', 'Youtube_..."
1,"{'Youtube_Video_IDs': 'iDKbYSvixE8', 'Youtube_..."
2,"{'Youtube_Video_IDs': 'kTcYf6uTnTU', 'Youtube_..."
3,"{'Youtube_Video_IDs': 'ZM9CollqVig', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'iBm1EQ1GmmI', 'Youtube_..."
183,"{'Youtube_Video_IDs': 'mD6gzw7f-PI', 'Youtube_..."
184,"{'Youtube_Video_IDs': 'oGYZ-QzVYDo', 'Youtube_..."
185,"{'Youtube_Video_IDs': 'SA7bBUWBntQ', 'Youtube_..."
186,"{'Youtube_Video_IDs': 'Ok82jOqMrn8', 'Youtube_..."
187,"{'Youtube_Video_IDs': 'aYVk7ngRmeY', 'Youtube_..."


Comments:



Unnamed: 0,0,1,2,3
0,,,,
1,,,,
2,,,,
3,,,,
4,,,,
183,,,,
184,,,,
185,,,,
186,,,,
187,,,,


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,2R3sBSehfbk,@AmBankTV,0:43,97,2022-06-14,Father's Day Promo - AmBank Cards Deals!,2,Experience the best AmBank Cards deals just in...,2025-11-02 19:49:23,False
0,2R3sBSehfbk,@AmBankTV,0:43,97,2022-06-14,Father's Day Promo - AmBank Cards Deals!,2,Experience the best AmBank Cards deals just in...,2025-11-02 19:49:23,False


Comments:



Unnamed: 0,Youtube_Video_IDs


Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': 'tpc7cx78S1M', 'Youtube_..."
1,"{'Youtube_Video_IDs': 'DkjlVuSsZYg', 'Youtube_..."
2,"{'Youtube_Video_IDs': 'm4qipvOn5Ro', 'Youtube_..."
3,"{'Youtube_Video_IDs': 'zUvlFzCHtbE', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'K1h_yRWEMOE', 'Youtube_..."
494,{'Youtube_Video_IDs': 'vJ1Ct1KBqhc&pp=0gcJCQYK...
495,"{'Youtube_Video_IDs': '46h7sv8bcfY', 'Youtube_..."
496,"{'Youtube_Video_IDs': 'J3RixGAqcxw', 'Youtube_..."
497,"{'Youtube_Video_IDs': 'DP1IL2uLGHo', 'Youtube_..."
498,"{'Youtube_Video_IDs': '51xgorUeyUM', 'Youtube_..."


Comments:



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,212,213,214,215,216,217,218,219,220,221
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,"{'Youtube_Video_IDs': 'm4qipvOn5Ro', 'poster_O...","{'Youtube_Video_IDs': 'm4qipvOn5Ro', 'poster_O...",,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
494,,,,,,,,,,,...,,,,,,,,,,
495,,,,,,,,,,,...,,,,,,,,,,
496,,,,,,,,,,,...,,,,,,,,,,
497,,,,,,,,,,,...,,,,,,,,,,
498,,,,,,,,,,,...,,,,,,,,,,


Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': '7_N4eJHUL8o', 'Youtube_..."
1,"{'Youtube_Video_IDs': 'rCdcnf12qKk', 'Youtube_..."
2,"{'Youtube_Video_IDs': 'sblTHxeSq1o', 'Youtube_..."
3,"{'Youtube_Video_IDs': 'yh_Ffddf91c', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'acFjC8zSOAk', 'Youtube_..."
3,"{'Youtube_Video_IDs': 'yh_Ffddf91c', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'acFjC8zSOAk', 'Youtube_..."
5,"{'Youtube_Video_IDs': '7lYfdJUZ-ZE', 'Youtube_..."
6,"{'Youtube_Video_IDs': 'HCD2h-lyvoc', 'Youtube_..."
7,"{'Youtube_Video_IDs': '2qNvymbihoU', 'Youtube_..."


Comments:



Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': 'JQ4mfx8jFQ4', 'Youtube_..."
1,"{'Youtube_Video_IDs': 'dBFpxf79fs0', 'Youtube_..."
2,"{'Youtube_Video_IDs': 'XQspWed5lsY', 'Youtube_..."
3,"{'Youtube_Video_IDs': 'HMWHmNmM0TI', 'Youtube_..."
4,"{'Youtube_Video_IDs': '3PeHEunv8Ts', 'Youtube_..."
91,{'Youtube_Video_IDs': '3MmwCVxXWks&pp=0gcJCQYK...
92,"{'Youtube_Video_IDs': 'xhX81v6p64k', 'Youtube_..."
93,"{'Youtube_Video_IDs': 'SPUo_4hOa5w', 'Youtube_..."
94,"{'Youtube_Video_IDs': 'Fll6MebIf8I', 'Youtube_..."
95,"{'Youtube_Video_IDs': 'tew6Ly8NDcc', 'Youtube_..."


Comments:



Metadata:



Unnamed: 0,0
0,"{'Youtube_Video_IDs': '_lMssbgr7gE', 'Youtube_..."
1,"{'Youtube_Video_IDs': 'V7Vd7Rs65Rw', 'Youtube_..."
2,"{'Youtube_Video_IDs': 'xFBxisokdQ8', 'Youtube_..."
3,"{'Youtube_Video_IDs': 'IzJxsEIvNr0', 'Youtube_..."
4,"{'Youtube_Video_IDs': 'gom4PuhRcow', 'Youtube_..."
351,"{'Youtube_Video_IDs': 'lGsm4jqhpnI', 'Youtube_..."
352,"{'Youtube_Video_IDs': 'O3CCSjG0NBI', 'Youtube_..."
353,"{'Youtube_Video_IDs': '7pB_uwqYGoA', 'Youtube_..."
354,"{'Youtube_Video_IDs': 'Bai4FR7Oc_g', 'Youtube_..."
355,"{'Youtube_Video_IDs': '-BwwqLthVVg', 'Youtube_..."


Comments:



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,72,73,74,75,76,77,78,79,80,81
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
351,"{'Youtube_Video_IDs': 'lGsm4jqhpnI', 'poster_O...",,,,,,,,,,...,,,,,,,,,,
352,"{'Youtube_Video_IDs': 'O3CCSjG0NBI', 'poster_O...","{'Youtube_Video_IDs': 'O3CCSjG0NBI', 'poster_O...",,,,,,,,,...,,,,,,,,,,
353,,,,,,,,,,,...,,,,,,,,,,
354,,,,,,,,,,,...,,,,,,,,,,
355,,,,,,,,,,,...,,,,,,,,,,


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,tFnzm8vhnbY,@maybankvideos,23:28,119,2025-10-21,Rich Conversations ‚Äì Episode 4: Going All-in ‚Äì...,3,In Episode 4 of the Rich Conversations podcast...,2025-10-30 20:26:51,0.0
1,-Jk6A2w2n4I,@maybankvideos,2:38,4841420,2025-10-16,"Maybank ""Light of Laughter"" (Deepavali 2025)",365,The true magic of Deepavali isn‚Äôt just in the ...,2025-10-30 20:27:08,1.0
2,oMYeO1P7eOw,@maybankvideos,2:39,168,2025-10-15,Maybank Investment Bank's Daily Technical Anal...,3,Broader market participation was seen as both ...,2025-10-30 20:27:31,
3,qnUyxwENgKY,@maybankvideos,2:53,177,2021-02-26,Maybank Investment Bank's Daily Technical Anal...,3,"Despite the weakness, the consumer sector stoo...",2025-10-30 20:28:09,
4,8CmxafBZ1EI,@maybankvideos,29:44,132,2025-10-13,Market Mondays | Trade War Redux?,2,00:00\n----- Intro\n00:07\n----- Report Links\...,2025-10-30 20:28:46,
225,R1blJ72RRvc,@maybankvideos,4:01,101,2025-03-06,Maybank Investment Bank's Daily Technical Anal...,2,Market participants now await the US Non-Farm ...,2025-11-01 01:37:09,
226,10yZ8xLVvEU,@maybankvideos,3:36,81,2021-02-26,Maybank Investment Bank's Daily Technical Anal...,4,Market sentiment remains cautious as the trade...,2025-11-01 01:37:46,
227,_rQQSwRJerI,@maybankvideos,3:43,82,2025-03-04,Maybank Investment Bank's Daily Technical Anal...,3,"While many sectors remain in a downtrend, the ...",2025-11-01 01:38:23,
228,8WmPwaCNTAY,@maybankvideos,3:55,90,2025-03-03,Maybank Investment Bank's Daily Technical Anal...,2,The financial sector stands out as one of the ...,2025-11-01 01:38:59,
229,aMEHTPhJO80,@maybankvideos,,1030492,2021-02-26,Maybank Islamic Ramadan 2025 | Luangkan Masa U...,2,Ramadan ini marilah kita meluangkan masa untuk...,2025-11-01 01:39:37,


Comments:



Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count
0,-Jk6A2w2n4I,@ChocdcRao,8 days ago,"‡ÆÖ‡Æµ‡Æ∞‡ØÅ ‡Æú‡Øã‡Æï‡Øç ‡Æö‡Øä‡Æ≤‡Øç‡Æ≤‡Æø, ‡Æ™‡Ææ‡Æ∞‡Øç‡Æµ‡Øà‡ÆØ‡Ææ‡Æ≥‡Æ∞‡Øç‡Æï‡Æ≥‡Øã‡Æü ‡Æµ‡Ææ‡Æ¥‡Øç‡Æï‡Øç‡Æï‡Øà ‡Æï‡Æ§‡Øà...",1
1,-Jk6A2w2n4I,@devitr9512,3 days ago,Great Happy Deepavali and you have done a grea...,0
2,-Jk6A2w2n4I,@KavinashSankar,8 days ago,"Thank you Maybank,Murty brother and video crea...",0
3,-Jk6A2w2n4I,@syameeraothman3891,10 days ago,i wish i can attend there to share all loves w...,1
4,-Jk6A2w2n4I,@listahir,3 days ago,Simple yet enjoying and fulfilling content ‚ù§,0
138,YwwRH5U2w38,@MUHAMMADFAQIFPUTRABINFAIRULFIZ,6 months ago,temberang,0
139,YwwRH5U2w38,@YgffhhBbbhh,7 months ago,Sibuk je buat iklan raya ..duit str lambat nk ...,1
140,BGCbgbDpL60,@mbut666,7 months ago,top up dana gagal mulu anjir,0
141,OS5yfjVD0z8,@MohdnorhishamEzuha,7 months ago,Mr mohd norhisham bin ezuha presiden,0
142,MqUx-SIwc_I,@MohdnorhishamEzuha,7 months ago,Vfg masuk mybank,1


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,xkGvTGF8iJ4,@BankofAmerica,2:40,278,2025-10-10,A Competitive Approach to Winning Talent,5,"In today‚Äôs hiring landscape, candidates are as...",2025-10-30 02:52:23,
1,Va-9TQFZTjI,@BankofAmerica,2:57,156,2025-10-09,Supporting Women in the Workplace,3,Are employer benefits really as comprehensive ...,2025-10-30 02:53:00,
2,X9_ZAaMIVxI,@BankofAmerica,1:31,388,2025-10-08,Welcome to Bank of America Workplace Benefits‚Ñ¢,3,"The future is changing, and so are employee be...",2025-10-30 02:53:37,
3,KceZziVoytk,@BankofAmerica,1:21,260,2025-10-01,Does saving money make you feel intimidated? W...,4,Take the pressure off yourself and start small...,2025-10-30 02:54:14,
4,mrgsvmnXl0I,@BankofAmerica,1:25,188,2025-09-29,Understanding Credit and How it Can Help You,8,Get the basics about how credit works and how ...,2025-10-30 02:54:51,
97,2Ut7ASj2GUM,@BankofAmerica,1:48,4492,2014-09-19,Bank of America Consumer MBA Associate,22,Jerry Decembre talks about his experience in t...,2025-10-30 03:51:41,
98,bjEI1fmwXzs,@BankofAmerica,3:06,2565,2014-06-26,Spring 2014 Bank of America Small Business Own...,14,We are pleased to share the results of the spr...,2025-10-30 03:52:18,
99,hohd63syjZI,@BankofAmerica,5:04,9800,2013-12-12,Bank of America Ally program sends message of ...,84,"At Bank of America, we strive to help all empl...",2025-10-30 03:52:55,
100,3SQirS6MjiU,@BankofAmerica,4:55,2793,2012-09-07,Big Idea for Small Businesses: Key Business In...,11,"Small business expert and columnist, Steve Str...",2025-10-31 06:04:48,
101,gHeAsNpIcn8,@BankofAmerica,8:30,2999,2012-08-11,Bank of America tech exec talks to Institution...,5,Bank of America technology executive Cathy Bes...,2025-10-31 06:05:25,


Comments:



Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,QV08nJWwly4,@standardchartered,0:38,123,2025-10-17,Africa Public Sector Institutions Summit ‚Äì Fai...,2,"On 1 July 2025, we invited our African Public ...",2025-10-29 23:58:32,False
1,T2WEOqovnBk,@standardchartered,0:31,29,2025-10-17,Africa Public Sector Institutions Summit ‚Äì Dhi...,0,"On 1 July 2025, we invited our African Public ...",2025-10-29 23:58:49,False
2,rcgkVvXEpVQ,@standardchartered,0:39,32,2025-10-17,Africa Public Sector Institutions Summit ‚Äì Cha...,0,"On 1 July 2025, we invited our African Public ...",2025-10-29 23:59:06,False
3,3uMX74AleW4,@standardchartered,0:20,1209563,2025-10-16,Now's Your Time For Wealth - Signature CIO Funds,6,It‚Äôs the final quarter of the year and the per...,2025-10-29 23:59:23,False
4,QRsBKphWOOE,@standardchartered,1:49,56,2025-10-16,Global trade solutions to unlock growth,1,With our comprehensive suite of global trade s...,2025-10-29 23:59:40,False
495,3psHFvkcoNI,@standardchartered,0:13,909,2022-05-25,Standard Chartered Pakistan - Spend PKR 2000 a...,11,"Download the SC Mobile App today, spend PKR 20...",2025-11-01 05:53:40,True
496,4RVht-pV2yw,@standardchartered,50:36,460,2022-05-19,Riding transformational waves: how GCC's diver...,5,To reduce their over-reliance on oil and adapt...,2025-11-01 05:53:57,True
497,db9yuapNnMA,@standardchartered,50:49,395,2022-05-18,On the crest of a digital wave: how digital tr...,2,Many GCC countries have become early adopters ...,2025-11-01 05:54:13,False
498,a-G5Jy0DVQk,@standardchartered,50:24,463,2022-05-18,Watershed moments: will 2022 be the end of an ...,6,"Geopolitics, soaring gas prices, and supply ch...",2025-11-01 05:54:29,False
499,A9DBwJRrzqg,@standardchartered,1:00:26,508,2022-05-18,Oil and gas: how geo-political events are impa...,8,The global energy narrative has shifted signif...,2025-11-01 05:54:46,False


Comments:



Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count
0,DRSiZZ90pIE,@AsjarAli-o8y,2 weeks ago,Mashallha subhanallha,1
1,DRSiZZ90pIE,@muhammadjamal4252,2 weeks ago,It‚Äôs not ‚Ä¶ why is the product linked to LIBOR,0
2,DRSiZZ90pIE,@TA-kz1jc,2 weeks ago,Sponsored by Standard Charted ?,0
3,DRSiZZ90pIE,@althea_is_smokin_hot,2 weeks ago,"Sir,islam is a heap of deceptions. Every day, ...",0
4,N8fFrxl9_aA,@artimall2714,3 weeks ago,Ohhh good üòØ ü§Ø üòä üëç,8
457,YK4C9Qc-bLg&pp=0gcJCQYKAYcqIYzv,@tramhonghong4244,3 years ago,So nice from Pakistan,0
458,3psHFvkcoNI,@habofficial6562,3 years ago,Terms and conditions kiya hain???,0
459,3psHFvkcoNI,@oppomobile2917,3 years ago,Sir abhi new account pe offer avil hai,0
460,4RVht-pV2yw,@786GAJ,3 years ago,Very informative some key points from Gautam i...,0
461,4RVht-pV2yw,@6rantwon9,3 years ago,you better get rid of those statues infront of...,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,5bu0SaFobcs,@GXBank,0:56,51,2025-10-20,GXBank - Product Experience,2,Everything else in life and business is instan...,2025-10-29 21:45:49,True
1,HmLEjPeomS0,@GXBank,2:24,207,2024-11-29,GX2.0 ICYMI Recap,3,This time last year we launched our bank - GXB...,2025-10-29 21:46:07,False
2,xSnNpsLTcks,@GXBank,42:19,666,2024-11-06,GX2.0 | Next Starts Now - Full Event Recording,8,Watch the full GX2.0 Event to learn about the ...,2025-10-29 21:46:24,True
3,g7Jzg7srT0U,@GXBank,0:36,1143,2024-11-05,GX Rewards,6,Why should banking be boring? Our upcoming GX ...,2025-10-29 21:46:41,True
4,YiSxB745GS4,@GXBank,1:01,361,2024-11-05,Thank you Malaysia for GX1.0,5,"Thank you, Malaysia, for a remarkable first ye...",2025-10-29 21:46:59,True
28,YQx1s-cd9S4&pp=0gcJCQYKAYcqIYzv,@GXBank,52:21,579,2023-12-25,GXBank Chillest How-To Guide: Stress Free Acco...,10,Skip the hassle of starting a regular bank acc...,2025-10-29 22:36:17,
29,4Iu3BJyPfnE,@GXBank,0:10,442,2023-12-06,GXBank: Security l We are safe and secure,2,No description has been added to this video,2025-10-29 22:36:54,False
30,qyOaQ37WaqQ,@GXBank,0:10,643,2023-12-06,GXBank: Grab Benefits l Unlock the best of Grab,5,No description has been added to this video,2025-10-29 22:37:11,False
31,H6ZOW3Z4g2o,@GXBank,0:15,5613126,2023-12-01,GXBank: Download Today!,5389,No secret handshakes required üòâ Just download\...,2025-10-29 22:37:27,False
32,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@GXBank,1:00,4779,2023-12-01,GXBank: Malaysia's First Digital Bank,49,"At GXBank, we believe all Malaysians deserve a...",2025-10-29 22:37:44,True


Comments:



Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count
0,5bu0SaFobcs,@SieyyaOfficial,4 days ago,I reset my phone because full of memory... And...,0
1,5bu0SaFobcs,@SieyyaOfficial,4 days ago,"Hello GX Bank, I cant login to my account how ...",0
2,xSnNpsLTcks,@owhdanny5270,11 months ago,Xde yg menarik dah Dislike,0
3,xSnNpsLTcks,@14bqdonk,11 months ago,lolok üí®,0
4,g7Jzg7srT0U,@14bqdonk,11 months ago,When this start?,0
42,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@UchihaDestiny09,1 year ago,1 Thank You GX Bank,2
43,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@momopeace875,1 year ago,Halo spa2 tau tolong2 dlu phone aku hilng.. le...,1
44,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@muhdfadillah2343,1 year ago,how to log in through PC,1
45,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@kokhieng1772,1 year ago,gx bank can withdraw cash at atmÔºü,1
46,lkdLW3sKJtY&pp=0gcJCQYKAYcqIYzv,@ykj131,1 year ago,Warga asing boleh buka akaun tak?,1


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,BQWdCqcg7n0,@bangkokbankchannel,0:30,334,2025-10-21,Bangkok Bank Merchant Pro,3,Bangkok Bank Merchant Pro \n‡πÅ‡∏≠‡∏õ‡∏£‡∏±‡∏ö‡∏ä‡∏≥‡∏£‡∏∞‡πÄ‡∏á‡∏¥‡∏ô‡∏™‡∏≥‡∏´‡∏£...,2025-10-31 00:17:47,
1,7S4W1OeKTlo,@bangkokbankchannel,0:45,246,2025-09-16,‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ó‡∏µ‡πà‡πÄ‡∏Ç‡πâ‡∏≤‡πÉ‡∏à‡∏ó‡∏∏‡∏Å‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï | ‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥...,5,‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏™‡∏∞‡∏™‡∏°‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå ‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ó‡∏µ‡πà‡πÄ‡∏Ç‡πâ‡∏≤‡πÉ...,2025-10-31 00:18:25,
2,rQuDMLr5PKc,@bangkokbankchannel,0:30,255,2025-09-16,‡∏ä‡πà‡∏ß‡∏¢‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏á‡∏¥‡∏ô ‡πÅ‡∏•‡∏∞‡∏Ñ‡∏∏‡πâ‡∏°‡∏Ñ‡∏£‡∏≠‡∏á 5 ‡πÇ‡∏£‡∏Ñ‡∏£‡πâ‡∏≤‡∏¢‡πÅ‡∏£‡∏á | ‡∏õ...,5,‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏™‡∏∞‡∏™‡∏°‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå ‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡πÄ‡∏ã‡∏ü‡∏ß‡∏¥‡πà‡∏á‡∏™‡πå ‡πÅ‡∏≠‡∏ô‡∏î...,2025-10-31 00:19:02,
3,qGTU-un5puM,@bangkokbankchannel,1:00,585,2021-02-26,‡∏ä‡πà‡∏ß‡∏¢‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏á‡∏¥‡∏ô ‡πÅ‡∏•‡∏∞‡∏Ñ‡∏∏‡πâ‡∏°‡∏Ñ‡∏£‡∏≠‡∏á 5 ‡πÇ‡∏£‡∏Ñ‡∏£‡πâ‡∏≤‡∏¢‡πÅ‡∏£‡∏á | ‡∏õ...,4,‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏™‡∏∞‡∏™‡∏°‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå ‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡πÄ‡∏ã‡∏ü‡∏ß‡∏¥‡πà‡∏á‡∏™‡πå ‡πÅ‡∏≠‡∏ô‡∏î...,2025-10-31 00:19:40,
4,E8npH3K99DE&pp=0gcJCQYKAYcqIYzv,@bangkokbankchannel,0:30,655,2025-09-15,‡∏ã‡∏±‡∏õ‡∏û‡∏≠‡∏£‡πå‡∏ï‡∏ó‡∏∏‡∏Å‡∏Ñ‡∏ß‡∏≤‡∏°‡∏ù‡∏±‡∏ô ‡πÅ‡∏•‡∏∞‡∏Å‡∏≤‡∏£‡πÄ‡∏ï‡∏¥‡∏ö‡πÇ‡∏ï‡∏Ç‡∏≠‡∏á‡∏•‡∏π‡∏Å | ‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô...,6,‡∏õ‡∏£‡∏∞‡∏Å‡∏±‡∏ô‡∏ä‡∏µ‡∏ß‡∏¥‡∏ï‡∏™‡∏∞‡∏™‡∏°‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå ‡πÄ‡∏Å‡∏ô‡πÄ‡∏ü‡∏¥‡∏™‡∏ï‡πå ‡πÄ‡∏ã‡∏ü‡∏ß‡∏¥‡πà‡∏á‡∏™‡πå & ‡πÅ‡∏Ñ...,2025-10-31 00:20:18,
93,x6TsChMFmH8,@bangkokbankchannel,6:18,549,2021-02-26,BBL Trade Expert Knowledge Sharing EP02 ‚Äì 6 ‡∏Ç‡πâ...,7,‡πÉ‡∏ô‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏ò‡∏∏‡∏£‡∏Å‡∏¥‡∏à‡∏™‡πà‡∏á‡∏≠‡∏≠‡∏Å‡∏™‡∏¥‡∏ô‡∏Ñ‡πâ‡∏≤‡πÑ‡∏õ‡∏¢‡∏±‡∏á‡∏ï‡πà‡∏≤‡∏á‡∏õ‡∏£‡∏∞‡πÄ‡∏ó‡∏®‡∏ô‡∏±‡πâ‡∏ô ‡∏Ñ...,2025-10-31 08:16:20,0.0
94,dzdj_ObwRhY,@bangkokbankchannel,2:05:05,7082,2018-01-29,"‡∏Ñ‡∏•‡∏¥‡∏õ‡∏á‡∏≤‡∏ô‡∏™‡∏±‡∏°‡∏°‡∏ô‡∏≤ ""‡∏Å‡∏≤‡∏£‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô‡∏à‡∏±‡∏î‡∏Å‡∏≤‡∏£‡∏ó‡∏£‡∏±‡∏û‡∏¢‡πå‡∏™‡∏¥‡∏ô‡πÉ‡∏ô‡∏ò‡∏∏‡∏£‡∏Å‡∏¥...",117,‡∏ö‡∏£‡∏£‡∏¢‡∏≤‡∏¢‡πÇ‡∏î‡∏¢ ‡∏≠‡∏≤‡∏à‡∏≤‡∏£‡∏¢‡πå‡∏ä‡∏¥‡∏ô‡∏†‡∏±‡∏ó‡∏£ ‡∏ß‡∏¥‡∏™‡∏∏‡∏ó‡∏ò‡∏¥‡πÅ‡∏û‡∏ó‡∏¢‡πå ‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∂‡∏Å‡∏©...,2025-10-31 08:16:37,
95,u6IyCJw-mQM&pp=0gcJCQYKAYcqIYzv,@bangkokbankchannel,5:05,10542,2017-11-10,BBL Trade Expert Knowledge Sharing - ‡∏û.‡∏£.‡∏ö. ‡∏®‡∏∏...,0,‡πÄ‡∏õ‡πá‡∏ô‡πÄ‡∏ß‡∏•‡∏≤‡∏Å‡∏ß‡πà‡∏≤ 90 ‡∏õ‡∏µ ‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡πÄ‡∏ó‡∏®‡πÑ‡∏ó‡∏¢‡πÉ‡∏ä‡πâ‡∏û‡∏£‡∏∞‡∏£‡∏≤‡∏ä‡∏ö‡∏±‡∏ç‡∏ç‡∏±‡∏ï...,2025-10-31 08:17:15,
96,Vqh3svmw5h4,@bangkokbankchannel,1:20,5585,2016-10-20,BIZ iBanking - ‡∏ï‡∏≠‡∏ô‡∏ó‡∏µ‡πà 3 ‡∏î‡∏π‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡∏£‡∏π‡∏î‡∏ö‡∏±‡∏ï‡∏£...,0,No description has been added to this video,2025-10-31 08:17:52,
97,ZYsREViKRoc,@bangkokbankchannel,1:45,18630,2016-10-20,BIZ iBanking - ‡∏ï‡∏≠‡∏ô‡∏ó‡∏µ‡πà 2 ‡πÇ‡∏≠‡∏ô‡∏ó‡∏±‡∏ô‡πÉ‡∏à ‡πÑ‡∏õ‡∏ó‡∏±‡πà‡∏ß‡πÇ‡∏•‡∏Å ‡πÇ‡∏≠‡∏ô...,0,No description has been added to this video,2025-10-31 08:18:29,


Comments:



Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count
0,hcf_6_g-t1c&pp=0gcJCQYKAYcqIYzv,@theinw1,3 years ago,‡∏ï‡∏≠‡∏ô‡∏ô‡∏µ‡πâ Update ‡πÄ‡∏õ‡πá‡∏ô Customs Trader Portal ‡πÅ‡∏•‡πâ‡∏ß‡∏£...,0
0,hcf_6_g-t1c&pp=0gcJCQYKAYcqIYzv,@theinw1,3 years ago,‡∏ï‡∏≠‡∏ô‡∏ô‡∏µ‡πâ Update ‡πÄ‡∏õ‡πá‡∏ô Customs Trader Portal ‡πÅ‡∏•‡πâ‡∏ß‡∏£...,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,tTDpKPTzlF8&pp=0gcJCQYKAYcqIYzv,@hongleongbankmy,26:14,100,2021-02-26,The Fed's Verdict: Navigating The Next Chapter,0,"In a world of constant market speculation, the...",2025-10-30 22:22:06,
1,qwHGks88pfA,@hongleongbankmy,0:30,212,2025-09-30,HLB Private Bank - Generations Ahead (30s),0,There is more wealth being generated now than ...,2025-10-30 22:22:43,
2,mX0YYhMF9_Q,@hongleongbankmy,,2113807,2021-02-26,HLB Private Bank - Generations Ahead,0,There is more wealth being generated now than ...,2025-10-30 22:23:08,
3,KjN0bNrWS7A&pp=0gcJCQYKAYcqIYzv,@hongleongbankmy,0:49,212,2025-09-23,How to activate your HLB Card using HLB Connect,0,Just got your HLB Credit Card approved? This v...,2025-10-30 22:23:46,
4,PaRDMbh0vHI&pp=0gcJCQYKAYcqIYzv,@hongleongbankmy,,2035447,2021-02-26,Akaun HLB Meezani-i,0,Buka Akaun HLB Meezani-i dan nikmati kehidupan...,2025-10-30 22:24:24,
192,s5H0u27N0_E,@hongleongbankmy,2:12,8881,2017-03-24,Hong Leong Bank CEO/GMD Domenic Fuda,0,"Interview with Domenic Fuda, Group Managing Di...",2025-10-31 00:15:02,
193,JBlEc5-spms,@hongleongbankmy,4:08,3665,2016-06-30,Sentiasa di Hati,0,"Sempena Hari Raya Aidilfitri yang mulia ini, g...",2025-10-31 00:15:39,
194,ey_HMSyeyUs,@hongleongbankmy,4:09,216627,2016-05-05,Si Jantung Hati,0,There's no love greater than mother‚Äôs love in ...,2025-10-31 00:16:16,
195,2ffpRrC1WSc,@hongleongbankmy,0:50,217007,2016-01-15,Huat The Fish with GSC Hong Leong Credit Card,20,Catch ' Huat The Fish' in cinemas this coming...,2025-10-31 00:16:53,0.0
196,BEqhK4SboRg,@hongleongbankmy,6:07,905383,2015-11-26,Amin Eh Mano By HLISB,0,The bliss and rewards of marriage in Islam are...,2025-10-31 00:17:10,


Comments:



Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,3S-E702D-c4,@labanquedunmondequichange,2025-11-02 00:15:00,712113,2025-10-24,Multi √©pargne MA,2,campagne essentielle epargne,2025-10-29 07:27:46,False
1,xMBJOEU29Zw,@labanquedunmondequichange,2025-11-02 00:30:00,652274,2025-10-13,"BNP Paribas - H√° 40 anos, uma gera√ß√£o em movim...",6,No description has been added to this video,2025-10-29 07:28:03,False
2,mdqMhRr_v3U,@labanquedunmondequichange,2025-11-02 16:13:00,593,2025-10-10,UnexpectedPeople #5 : S'engager en tant qu' ac...,1,"Dans ce cinqui√®me √©pisode, rencontrez Juergen ...",2025-10-29 07:28:19,False
3,PptbLdnUaoM,@labanquedunmondequichange,2025-11-02 19:18:00,197,2025-10-10,UnexpectedPeople #5: Getting involved as profe...,0,"In this fifth episode, meet Juergen Petrasch, ...",2025-10-29 07:28:36,False
4,duhODksusCA,@labanquedunmondequichange,2025-11-02 00:10:00,341316,2025-09-22,Banque au quotidien - Wero,3,"R√©alisez des virements rapides et gratuits, sa...",2025-10-29 07:28:52,False
95,rJ_Gdq4u5MA,@labanquedunmondequichange,2025-11-02 00:06:00,1083,2024-11-06,"Mon chat s√ªr x BNP Paribas, des virements inte...",1,"Avec mon chat s√ªr, mes virements sans frais m√™...",2025-10-29 19:57:56,True
96,rglNcTlUehM,@labanquedunmondequichange,2025-11-02 07:01:00,1535,2024-10-31,BNP Paribas au/in Canada | Driving Meaningful ...,5,üåü We are excited to share Episode 3 of our Dri...,2025-10-29 19:58:15,True
97,OLjB8-voojw,@labanquedunmondequichange,2025-11-02 05:28:00,2169,2024-10-30,BNP Paribas au/in Canada | Driving Meaningful ...,10,"üåü We are thrilled to share Episode 2, Part 2 o...",2025-10-29 19:58:34,False
98,l4A4Q_npdnE,@labanquedunmondequichange,2025-11-02 06:03:00,1016,2024-10-30,BNP Paribas au/in Canada | Driving Meaningful ...,1,"üåü Excited to launch Episode 2, Part 1 of our\n...",2025-10-29 19:58:51,Type 1 Error
99,LIWeW0q0lJo,@labanquedunmondequichange,2025-11-02 04:24:00,805,2024-10-30,BNP Paribas au/in Canada | Driving Meaningful ...,2,Excited to launch the first episode of our\n#l...,2025-10-29 19:59:08,False


Comments:



Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count
0,oR2zsinBhFA,@BICLHQ1,1 month ago,üòä,1
1,oR2zsinBhFA,@BICLHQ1,1 month ago,üëç,0
2,oR2zsinBhFA,@BICLHQ1,1 month ago,üòä,0
3,i9fdbZ-L_hA,@Angelboy99,1 month ago,Mountain data centers is a good idea but any d...,0
4,aqpdI98wGBE,@Globieyt,1 month ago,Vous pouvvez m'ouvrir un compte svp,0
61,c8OS-SRJcVw,@GaetanBrunet-r2f,9 months ago,?????,0
62,0pRpmWQdKBQ,@leevivian1576,11 months ago,de cr ap ar üòÇ ‚ù§ ‚ù§ ‚ù§,0
63,GogQ5hINSjY&pp=0gcJCQYKAYcqIYzv,@gracielahuert,11 months ago,Buenas tardes disculpe por favor necesito de s...,0
64,rJ_Gdq4u5MA,@gracielahuert,11 months ago,Por favor ay√∫dame a contactar al Sr jean Laur...,0
65,rglNcTlUehM,@AbrahamAbrahamyan-u7q,11 months ago,Bnp Bonjour,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,8HOTIQPfVmo,@Citi,2:21,204.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,6.0,Citi's Treasury and Finance Conference (TFC) 2...,2025-10-29 22:38:01,False
1,wPtt16HaQJI,@Citi,0:58,35.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,2.0,At our recent Treasury and Finance Conference ...,2025-10-29 22:43:47,False
2,eB4Ga22J9WQ,@Citi,0:57,37.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,2.0,Watch the video to hear industry leaders' pers...,2025-10-29 22:44:04,False
3,MvhedwHIJ-g,@Citi,,78.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,1.0,"Join Johanna Chua, Citi's Head of Emerging Mar...",2025-10-29 22:44:21,False
4,x0rQ502qskc,@Citi,,65.0,2025-10-23,Citi: Treasury and Finance Conference Singapor...,3.0,"As market dynamics rapidly evolve, companies a...",2025-10-29 22:44:39,False
497,DFwLgjSMdoU,@Citi,25:03,3348.0,2022-03-25,Citi's 2022 Investor Day: Consumer Payments an...,21.0,"Anand Selvakesari, CEO of Personal Banking and...",2025-11-01 04:34:34,True
498,BIYFkBDpNVI,@Citi,25:34,6683.0,2022-03-25,Citi's 2022 Investor Day: Transformation,36.0,"Citi's Chief Administrative Officer, Karen Pee...",2025-11-01 04:34:50,Type 1 Error
499,oyoB3l-5dXE,@Citi,18:49,5040.0,2022-03-25,Citi's 2022 Investor Day: Institutional Client...,32.0,Paco Ybarra presented a deep dive into Citi's ...,2025-11-01 04:35:06,True
500,HbUHS1Kzz9I,@Citi,1:22,58.0,2021-02-26,Welcome to Good Things Happen,0.0,How do you make good things happen? We think a...,2025-11-01 04:35:22,False
501,id__zw_DwGI&pp=0gcJCQYKAYcqIYzv,@Citi,9:16,907.0,2022-03-15,Citi: Leading the Charge: Dayu Dara Permata,13.0,Leading the Charge is a series featuring women...,2025-11-01 04:35:39,True


Comments:



Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count
0,5PWcyptUyBQ,@bobbybrown221-vh1,12 days ago,government shutdown is rigged long enough for ...,1
1,_nK_24F9sfM,@AbdulRehman-o3g8o,12 days ago,"Hello sir, I need to talk to you about somethi...",0
2,_nK_24F9sfM,@AbdulRehman-o3g8o,12 days ago,"Sir, I just need to talk to you for 5 minutes.",0
3,_nK_24F9sfM,@AbdulRehman-o3g8o,12 days ago,sir please help me,0
4,_nK_24F9sfM,@AbdulRehman-o3g8o,12 days ago,hlo,0
605,DFwLgjSMdoU,@byhisgrace_,1 year ago,Does Anand Selva run any WhatsApp group for st...,0
606,DFwLgjSMdoU,@janakiraman4824,3 years ago,C janakiraman Chief executive officer Dislike,0
607,oyoB3l-5dXE,@susanlandry7020,10 months ago,I believe I am the girl boy twins of the Smith...,0
608,id__zw_DwGI&pp=0gcJCQYKAYcqIYzv,@DeeplyConcerne,3 years ago,Stop cooperating with the aggressor country! I...,1
609,id__zw_DwGI&pp=0gcJCQYKAYcqIYzv,@Frostydoesthings2b,3 years ago,ü§≠ ü§≠ ü§≠ ü§≠ ü§≠ ü§≠ ü§≠ ü§≠ ü§≠,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,wxaej_sK_FI,@jpmorgan,1:37,487,2025-10-23,Why the World‚Äôs Top Institutional Investors Ch...,0,J.P. Morgan Securities Services delivers best-...,2025-10-30 00:56:57,
1,P0O7lbIsQQk,@jpmorgan,10:45,2717,2025-10-21,Investment Opportunities in Security & Tech Am...,0,How can investors navigate the potential chall...,2025-10-30 00:57:35,
2,uK3QAfCg8kk,@jpmorgan,17:41,492,2025-10-20,Trading Insights: Exploring trend-following st...,0,"In this episode, Martin Kallstr√∂m, CEO of Swed...",2025-10-30 00:57:59,
3,cij_EDnZ_K0,@jpmorgan,,422,2025-10-17,Inside Tech Stars 2025: From IPOs to defense tech,0,What‚Äôs driving record venture capital flows in...,2025-10-30 00:58:37,
4,SqN7D4tHoDY,@jpmorgan,14:08,615,2025-10-06,Trading Insights: US policy and the impact of ...,0,"In this episode, Eloise Goulder sits down with...",2025-10-30 00:59:14,
225,G3EREc8ZTrk,@jpmorgan,18:00,562,2024-10-29,How bond ETFs are shaping the trading landscape,0,As innovation in the fixed income landscape co...,2025-11-01 00:42:47,
226,DyDC7WHXLJg,@jpmorgan,2:08,2646,2024-10-25,Acquired Live at Chase Center: Presented by J....,0,Acquired hosts Ben Gilbert and David Rosenthal...,2025-11-01 00:43:25,
227,mviwMDbDWGA,@jpmorgan,13:57,393,2024-10-25,The surge in ETFs: A great growth story,0,What‚Äôs behind the ETF market‚Äôs dramatic growth...,2025-11-01 00:44:03,
228,2gXF3Yt20ao,@jpmorgan,24:27,281,2024-10-24,Tech Stars Conference: What's next for EMEA tech?,0,Get insights from the J.P. Morgan Tech Stars C...,2025-11-01 00:44:41,
229,QjnDnOv5a74,@jpmorgan,44:45,919,2021-02-26,How Bobbie CEO Laura Modi is Reducing Guilt an...,0,In this episode of the Women on the Move Podca...,2025-11-01 00:45:19,


Comments:



Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,E_xLZrttDVM,@DeutscheBank,4:39,128,2025-10-24,Phishing. Gesch√§ftsrisiko und Kostentreiber #E...,6,Entdecker J√ºrgen Schmitt trifft sich zum Thema...,2025-10-30 04:39:41,Type 1 Error
1,HSSp8POMUHY,@DeutscheBank,12:08,127,2025-10-23,PERSPEKTIVEN To Go ‚Äì der BoÃàrsenpodcast: Digit...,8,"Steigende Staatsverschuldung, Unsicherheit an ...",2025-10-30 04:39:58,False
2,EP7SYyVqdDs,@DeutscheBank,16:33,167,2025-10-21,We present Art:LIVE from Frieze London & Friez...,6,Catch all the highlights from this year's fair...,2025-10-30 04:40:14,False
3,-02nefEBp5s,@DeutscheBank,5:37,102,2025-10-21,Hier ist der Euro Stablecoin #ExpeditionFinance,7,Alles rund um den ersten von der BaFin zugelas...,2025-10-30 04:40:30,True
4,yPBxR0MnKXQ,@DeutscheBank,3:30,145,2025-10-17,Deutsche Bank Art & Culture presents - No√©mie ...,9,"üìπ Watch French visual artist, No√©mie Goudal (b...",2025-10-30 04:40:48,True
200,8zte-J2yxpE&pp=0gcJCQYKAYcqIYzv,@DeutscheBank,15:41,369,2021-02-26,Deutsche Immobilien nach dem Boom: Wohin geht ...,5,Jochen M√∂bert ist in seinem aktuellen Ausblick...,2025-10-31 21:27:04,
201,3_K1RvaMUHI,@DeutscheBank,20:30,264,2021-02-26,PERSPEKTIVEN To Go ‚Äì der BoÃàrsenpodcast: Ambit...,5,Die Berichtssaison beginnt: In den kommenden T...,2025-10-31 21:27:42,True
202,tJnKkIPRW8g,@DeutscheBank,12:26,195,2021-02-26,Endlich wieder mehr in der Tasche? Die Deutsch...,3,Kr√§ftige Gehaltssteigerungen und nachlassende ...,2025-10-31 21:27:58,
203,IyQpBAViyqU,@DeutscheBank,30:43,323,2021-02-26,Making Sense of Space: Role of AI in Space (Pa...,7,"In our series Making Sense of Space, we discus...",2025-10-31 21:28:23,
204,RC-wy3Cp9so,@DeutscheBank,13:04,427,2021-02-26,Hochspannung ums Stromnetz: Wer zahlt die 500-...,4,Deutschlands Strominfrastruktur rasch und effi...,2025-10-31 21:28:46,


Comments:



Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count
0,-02nefEBp5s,@cybi2684,6 days ago (edited),"Haha... Stablecoin, ja genau. Der EURO wird se...",0
1,yPBxR0MnKXQ,@tormagnuslarsen9870,12 days ago,WTM World Trade Marketing WTM ‚Ñ¢,1
2,a7pwVINtKV8,@‰øäÈúñÈô≥-k7x,12 days ago (edited),ÊàëÊÑõÊàëÊòØÂæ∑ÂúãÈäÄË°åÁî∑Â•≥ËÉΩÊ∫ê‰πãÊòü‰Ω†‰∫∫ÁúüÂ•Ω ‚ù§,1
3,a7pwVINtKV8,@MayurMisra,11 days ago,"""Introducing dbX - The Future of Corpoarte Tre...",0
4,NgdCIqcCdRc,@Belisar505,12 days ago,Anfang der 80er Jahre war die letzte zyklische...,0
84,3Y_apyNmUs0,@michi2525,1 year ago,I am lucky tow hav you üòä,1
85,zUyVd36XiFs,@stefanbarnikow7069,1 year ago,Sch√∂n das es diesen Podcast wieder gibt. Das W...,1
86,3_K1RvaMUHI,@TheGabrielbraga,1 year ago,Mclovin i love tu gato ‚ù§,0
87,3_K1RvaMUHI,@trggrt225,1 year ago,My ASSETS I WANT TO START UP MY INDEPENDENT PR...,0
88,3_K1RvaMUHI,@trggrt225,1 year ago,You need to know like I not ask any minister t...,0


Metadata:



Unnamed: 0,Youtube_Video_IDs,Youtube_Handle,video_length,views,date,title,likes,description,MM_timestamp,is_commented
0,JjMcghmK9ng,@MyBoostApp,2025-11-02 00:12:00,3740,2025-10-20,Why No One Tell Me I Can Get RM1.95/L for RON95?,1,"Many people ask, how is it possible to get RM1...",2025-10-30 03:53:32,False
1,uiEe-l1LfJM,@MyBoostApp,2025-11-02 00:15:00,15093,2025-10-15,Harga RON95 jadi RM1.95 jer seliter?,1,Harga RON95 jadi RM1.95 jer seliter?\n\nMemang...,2025-10-30 03:53:49,False
2,Fq7W7ytCMz0&pp=0gcJCQYKAYcqIYzv,@MyBoostApp,2025-11-02 00:20:00,52349,2025-10-10,Boost Epic Unstoppable Birthday,0,"Spend a minimum of RM30 using Boost, Boost Ban...",2025-10-30 03:54:06,False
3,gMbiTUi0dgw,@MyBoostApp,2025-11-02 00:14:00,81193,2025-10-09,Win Gold Dinar Boost Epic Unstoppable,2,üéâ More epic surprises are dropping for Boost‚Äôs...,2025-10-30 03:54:23,False
4,BwfBq2jzAyA,@MyBoostApp,2025-11-02 00:17:00,94484,2025-10-08,Boost Epic Unstoppable Birthday 2025,2,It‚Äôs an epic spending spree with Boost & Boost...,2025-10-30 03:54:40,False
225,C2Ikb1u3pSU,@MyBoostApp,2025-11-02 01:00:00,737,2021-02-26,#ShopMemangOnz with up to RM150 Boost Cashback...,11,2021 Shop Malaysia Online (SMO) Campaign is ba...,2025-11-01 01:07:20,
226,tIyV7Hf6RVY,@MyBoostApp,2025-11-02 01:00:00,74858,2021-08-06,8.8 Online Boost Day 2021 | #ShopMemangOnz wit...,6,Boost eWallet is bringing you the biggest Onli...,2025-11-01 01:07:43,
227,HjR8Am-CNhc,@MyBoostApp,2025-11-02 00:40:00,1627,2021-02-26,Boost Business ‚Äì Accept Boost Payment via your...,10,It‚Äôs important to stay safe and do your part t...,2025-11-01 01:08:08,
228,GaJ49C_iKYw,@MyBoostApp,2025-11-02 02:03:00,1038,2021-07-28,Boost Business - How to Setup Your Very Own Wh...,6,Are you facing the challenge of having to open...,2025-11-01 01:08:45,
229,61q1a3TCO-4,@MyBoostApp,2025-11-02 03:47:00,1206717,2021-05-10,Iklan Raya Boost 2021: Hadiah Buat Mama,251,Pandemik ini bukan mudah bagi semua. Tetapi de...,2025-11-01 01:09:09,


Comments:



Unnamed: 0,Youtube_Video_IDs,poster_OP,comment_age,full_comment,like_count
0,oiVhAT47iDs,@topethidayat1620,2 months ago,Merdeka kewangan! Merdeka kewangan! üéâ üéâ üéâ,3
1,oiVhAT47iDs,@eappieflag,2 months ago,bayar bill guna boost -bill tidak berjaya dib...,2
2,oiVhAT47iDs,@mz6228,2 months ago,Selamat Hari Kebangsaan Malaysia üá≤üáæ Merdeka!! ...,0
3,oiVhAT47iDs,@muhammadhazim1581,2 months ago,vfx need a raise,1
4,oiVhAT47iDs,@mdshaidik9275,1 month ago,What's with that fever patch? Hinting somethin...,0
87,W4DnaPnso8s&pp=0gcJCQYKAYcqIYzv,@masterdarkfender8272,1 year ago,My boost is broken. It's still loading even th...,0
88,W4DnaPnso8s&pp=0gcJCQYKAYcqIYzv,@OfficialP29,1 year ago,Nama ni,0
89,W4DnaPnso8s&pp=0gcJCQYKAYcqIYzv,@YukjingLim,1 year ago,Your boost credit team banyak masalah sudah sa...,0
90,W4DnaPnso8s&pp=0gcJCQYKAYcqIYzv,@chongweanne7818,1 year ago,"hello, your credit team banyak masalah dan tak...",0
91,W4DnaPnso8s&pp=0gcJCQYKAYcqIYzv,@wanchareesrinual7613,10 months ago,I open already 1 account and try to put money ...,0


## Delete statements for pickle

In [156]:
root_dirs = [
    "./Data/Database/",
    "./Data/Lists/MM_temp/",
    "./Data/Lists/COMMENT_temp/",
    "./Data/MM_database/",
    "./Data/COMMENT_database/"
]
delete_template = """find {placeholder} -name "*.pkl" -type f -delete"""

for e_directory in root_dirs[1:]:
    del_command = delete_template.format(placeholder=e_directory)
    print(del_command)

find ./Data/Lists/MM_temp/ -name "*.pkl" -type f -delete
find ./Data/Lists/COMMENT_temp/ -name "*.pkl" -type f -delete
find ./Data/MM_database/ -name "*.pkl" -type f -delete
find ./Data/COMMENT_database/ -name "*.pkl" -type f -delete


## Data Cleaning video_length/duration ?

In [177]:
COLLATE_MM.groupby(pd.Grouper(key='MM_timestamp', axis=0, freq='D'))['MM_timestamp'].count()

MM_timestamp
2025-10-30    2400
2025-10-31    2520
2025-11-01    7056
Freq: D, Name: MM_timestamp, dtype: int64

In [None]:
# Convert to nullable integer and boolean dtypes
df['col_a'] = df['col_a'].astype('Int64')
df['col_b'] = df['col_b'].astype('BooleanDtype')

In [768]:
nun = None
if nun:
    print('A')
else:
    print('Yes')

Yes


In [767]:
print(MM_df.dtypes)
print()
print(comment_df.dtypes)
display(MM_df.head())

views                    int64
date            datetime64[ns]
title                   object
likes                    int64
description             object
MM_timestamp    datetime64[ns]
is_commented            object
dtype: object

Series([], dtype: object)


Unnamed: 0,views,date,title,likes,description,MM_timestamp,is_commented
0,276,2024-11-22,SMBC Careers | Oge Udensi on securing the future of finance,4,"Meet Oge Udensi, the EMEA lead for Cyber Governance, Risk & Compliance. Learn about her journey and commitment to empowering women in technology. Discover how SMBC is prioritising innovation and transformation.\n\nSMBC is a global banking leader operating across three core divisions: Corporate & Investment Banking, Structured Finance and Global Markets. We empower our employees to think big on an international scale, while fostering a collaborative spirit that keeps everyone connected. We're constantly searching for curious and ambitious minds to join our team and drive our ongoing success. Discover what a future at SMBC looks like at:\nhttps://www.smbcgroup.com/emea/careers",2025-10-28 17:06:53,False


In [283]:
for el in comments:
    # Get any emojis
    emojies = el.find_elements(By.XPATH, ".//img[@alt]")
    for e_emoji in emojies:
        pass
        print(e_emoji.get_attribute('alt'))
    # Get the replies 
    replies = el.find_elements(*reply_comments_path)
    # print(replies.text)
    small_comment_dict = {}
    # Process the text
    li = el.text.split('\n')
    lit = list(filter(lambda x : not re.search(pattern=r"^Pinned", string=x), li))
    print(lit[-2])
    poster, comment_age, *comment_text_content, like_count, reply = lit
    full_comment =' '.join(comment_text_content)
    small_comment_dict.update({'poster_OP' : poster,
                               'comment_age' : comment_age,
                               'full_comment' : full_comment,
                               'like_count' : like_count,
                               'MM_timestamp' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
                              })
    COMMENT_DICT.append(small_comment_dict)
    print("-----")


üôè
‚ù§
17
-----

üòç
‚ù§
4
-----

üéâ
‚ù§
2
-----

‚ù§
üéâ
1
-----

Syabas for the tot
-----

üëç
üëå
ü•∞
ü•∞
ü•∞
1
-----

üòä
1 hour ago


ValueError: not enough values to unpack (expected at least 4, got 3)

In [298]:
for el in comments:
    small_comment_dict = {}

    # Get the HTML of the comment text section
    try:
        content_element = el.find_elements(By.XPATH, ".//yt-attributed-string[@id='content-text']")
        html_content = content_element.get_attribute("innerHTML")
    except Exception:
        html_content = ""
    print(html_content)

    # Replace emoji <img> tags with their alt text (like üòçüî•)
    # Example: <img ... alt="üî•"> ‚Üí "üî•"
    html_content = re.sub(r'<img[^>]+alt="([^"]+)"[^>]*>', r'\1', html_content)

    # Remove any HTML tags (like <a>, <b>, etc.)
    full_comment = re.sub(r'<[^>]+>', '', html_content).strip()

    # Now continue as before
    li = el.text.split('\n')
    lit = list(filter(lambda x : not re.search(pattern=r"^Pinned", string=x), li))

    try:
        poster, comment_age, *comment_text_content, like_count, reply = lit
    except ValueError:
        # fallback in case structure is inconsistent
        continue

    small_comment_dict.update({
        'poster_OP': poster,
        'comment_age': comment_age,
        'full_comment': full_comment,  # ‚úÖ includes emojis
        'like_count': like_count,
        'MM_timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    })

    COMMENT_DICT.append(small_comment_dict)
    print(full_comment)
    print("-----")




-----


-----


-----


-----


-----


-----



-----


-----


-----


## Appendix

In [None]:
 ## --------- SCROLLING -------------------------
    max_num_of_scrolls = None
    num_of_scrolls = 0
    pause_time=2
    max_attempts=3
    
    last_height = driver.execute_script("return document.documentElement.scrollHeight")
    same_height_attempts = 0

    
    print("üîÑ Starting infinite scroll...")

    is_scrape_comments = True
    
    while same_height_attempts < max_attempts:
        if num_of_scrolls == max_num_of_scrolls:
            print(f"Max num of scrolls reached. Breaking out.")
            break
        # Scroll to the bottom
        # driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
        
        # Wait for page to load new items
        time.sleep(pause_time)
        
        new_height = driver.execute_script("return document.documentElement.scrollHeight")
        
        # Wait for page to load new items
        time.sleep(pause_time)
        # (Optional) move browser window using pyautogui
        pyautogui.hotkey('ctrl', 'alt', 'left')
        # Wait for page to load new items
        time.sleep(pause_time)

        if num_of_scrolls == 0:
            print('First scroll, but not increment num_of_scrolls')
            
            # Wait for page to load new items
            time.sleep(pause_time)
            # (Optional) move browser window using pyautogui
            pyautogui.hotkey('ctrl', 'alt', 'left')
            # Wait for page to load new items
            time.sleep(pause_time)
            
            # First make a quick check that there are comments
            is_comments_path = (By.XPATH, "//ytd-comments-header-renderer[contains(@class, 'style-scope')]")
            is_comments = driver.find_elements(*is_comments_path)
            print("-----------COMMENTS-----------------------")
            for i, el in enumerate(is_comments):
                rect = el.rect
                displayed = el.is_displayed()
                print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
                if displayed:
                    clickable_element = el
            hope_text_list = clickable_element.find_elements(By.XPATH, ".//yt-formatted-string[contains(@class, 'count-text')]")
            if hope_text_list:
                for e_text in hope_text_list:
                    print(e_text.text)
                    match = re.search(pattern=r'^(\d+)\b\s+\bComments', string=e_text.text)
                    # print(match.group(0))
                    if match :
                        if match.group(1) == '0':
                            print('Uncommented video')
                            MM_collector.update({'is_commented' : False})
                            is_scrape_comments = False
                            break
                        else:
                            MM_collector.update({'is_commented' : True})
                            pass # We can scrape_comments
            else: # This means comments are diasbled
                print('Disabled_comment_video')
                MM_collector.update({'is_commented' : None})
                is_scrape_comments = False

        if new_height == last_height:
            same_height_attempts += 1
            print(f"‚ö†Ô∏è No new content... ({same_height_attempts}/{max_attempts})")
        else:
            same_height_attempts = 0
            last_height = new_height
            print("‚úÖ New content loaded.")
            num_of_scrolls += 1

In [325]:
url = r"https://www.youtube.com/watch?v=s9pNJYzRvxs&list=RDs9pNJYzRvxs&start_radio=1"

service = Service("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(service=service)
driver.get(url)

# (Optional) move browser window using pyautogui
pyautogui.hotkey('ctrl', 'alt', 'left')

wait = WebDriverWait(driver, 30)

more_info = (By.XPATH, '//tp-yt-paper-button[contains(@id, "expand")]')
class1 = (By.CLASS_NAME, "button style-scope ytd-text-inline-expander")
xpath_2 = (By.XPATH, "//yt-formatted-string[contains(@id, 'info')]")
comments_path = (By.XPATH, "//ytd-comment-view-model[contains(@id, 'comment')]")
reply_comments_path = (By.XPATH, ".//div[contains(@id, 'collapsed-threads')]")
like_button_path = (By.XPATH, "//button[contains(@aria-label, 'like')]")
replies_button = (By.XPATH, "//button[contains(@aria-label, 'replies')]")


MM_collector = {}

# Next we have to wait for the page to load
WebDriverWait(driver, 15).until(
    EC.presence_of_element_located(xpath_2)
)
# Still a bit of lag though
time.sleep(3)


## ---------------VIEWS_AND_DATE----------------------------
# Now that we know our element exists, we can click it -THIS DOES NOT WORK
# Check which elements are actually interactable
elements = driver.find_elements(*more_info) # use plural
for i, el in enumerate(elements):
    rect = el.rect
    displayed = el.is_displayed()
    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
    if displayed:
        clickable_element = el

# From above we know the last element is the interactable element we can click to expand : driver.find_elements(*more_info)[-1].click()
# Check if we found any interactable element
if clickable_element:
    # Scroll into view before clicking (good practice)
    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", clickable_element)
    clickable_element.click()
    print("‚úÖ Clicked visible 'expand' element.")

# Next we have to explicitly wait for the new content to render
time.sleep(3)
# Next we have to wait for the page to load
WebDriverWait(driver, 15).until(
    EC.presence_of_element_located(xpath_2)
)

# Collect all loaded elements
views_and_date = driver.find_elements(*xpath_2)[-1].find_elements(By.TAG_NAME, 'span')

for idx, e_child_ele in enumerate(views_and_date, start=1):
    some_info = e_child_ele.text
    if idx == 1:
        MM_collector.update({'views' : some_info})
    elif idx == 2:
        MM_collector.update({'date' : some_info})


## ---------------LIKE-BUTTON ----------------------------
# First grab all buttons with aria-label attribute
like_button = driver.find_elements(*like_button_path)
for i, el in enumerate(like_button):
    rect = el.rect
    displayed = el.is_displayed()
    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
    if displayed:
        clickable_element = el
        break

# Then filter with regex
pattern = re.compile(r"^like.*", re.IGNORECASE)
like_info_match = pattern.search(string=clickable_element.get_attribute("aria-label"))
if like_info_match:
    like_string = clickable_element.get_attribute('aria-label')
    print(like_string)
    thousand_comma_separation_pattern = re.compile(r"\d{1,3}(?:,\d{3})*(?:\.\d*)?", re.IGNORECASE)
    number = thousand_comma_separation_pattern.search(string=like_string)
    print(f"Here is the like-count : {correct_number(number.group(0))}")
    MM_collector.update({'likes' : correct_number(number.group(0))})

##---------------DESCRIPTION ----------------------------
# Inspect which elements are interactable
description = driver.find_elements(By.XPATH, "//div[contains(@id, 'expanded')]") # First path
for i, el in enumerate(description):
    rect = el.rect
    displayed = el.is_displayed()
    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
    if displayed:
        clickable_element = el

    
# Use relative ".//" to search within nested elements/descendants 'under' descripton[-1]
many_spans = clickable_element.find_element(By.XPATH, ".//yt-attributed-string[contains(@class, 'ytd-text-inline-expander')]")\
                              .find_elements(By.XPATH, ".//span[contains(@class, 'yt-core-attributed-string--link-inherit-color')]") # third path

description_list = []
count = 0
for e_span in many_spans:
    clean_str = e_span.text.strip()
    description_list.append(clean_str)
    # try:
    #     if re.search(pattern=r':\s*$', string=clean_str):
    #         description_string.append
    #     elif re.search(pattern=r'[\.\s]\s*$', string=clean_str):
    #         description_string +=  e_span.text + '\n'
    #     else:
    #         description_string += e_span.text
    # except NoSuchElementException:
    #     external_webisite_link = e_span.find_element(By.XPATH, "//a[@href]")
    #     description_string += e_span.text

    description_string = '\n'.join(description_list)

    count += 1

print(f"This is the description:\n{description_string}")
MM_collector.update({'description' : description_string})

max_num_of_scrolls = 2
num_of_scrolls = 0
pause_time=2
max_attempts=2

last_height = driver.execute_script("return document.documentElement.scrollHeight")
same_height_attempts = 0
print("üîÑ Starting infinite scroll...")

while same_height_attempts < max_attempts:
    if num_of_scrolls == max_num_of_scrolls:
        print(f"Max num of scrolls reached. Breaking out.")
        break
    # Scroll to the bottom
    # driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
    
    # Wait for page to load new items
    time.sleep(pause_time)
    
    new_height = driver.execute_script("return document.documentElement.scrollHeight")
    
    # Wait for page to load new items
    time.sleep(pause_time)
    
    if new_height == last_height:
        same_height_attempts += 1
        print(f"‚ö†Ô∏è No new content... ({same_height_attempts}/{max_attempts})")
    else:
        same_height_attempts = 0
        last_height = new_height
        print("‚úÖ New content loaded.")
        num_of_scrolls += 1

    #     # Grab all buttons with aria-label
    #     buttons = driver.find_elements(By.XPATH, "//button[@aria-label]")
        
    #     pattern = re.compile(r"repl(y|ies)", re.IGNORECASE)
        
    #     for btn in buttons:
    #         label = btn.get_attribute("aria-label") or ""
    #         if pattern.search(label):
    #             try:
    #                 # Scroll into view first
    #                 driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", btn)
    #                 time.sleep(0.5)
    #                 # Fallback: JS click
    #                 driver.execute_script("arguments[0].click();", btn)
    #                 print(f"‚úÖ JS clicked button: {label}")
                    
                    
        
    #             except Exception as e:
    #                 # Try normal click
    #                 btn.click()
    #                 print(f"Clicked button: {label}")
    #                 print(f"‚ö†Ô∏è Normal click failed on '{label}': {e}")
                    
    #             num_of_scrolls += 1

        



        # buttons = driver.find_elements(By.XPATH, "//button[@aria-label]")
        # for i, el in enumerate(buttons):
        #     print('Reply buttons')
        #     rect = el.rect
        #     displayed = el.is_displayed()
        #     print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
        #     if displayed:
        #         clickable_element = el
        #         # Click the clickable reply button 
        #         if clickable_element:
        #             pattern = re.compile(r"reply|replies", re.IGNORECASE)
        #             match = pattern.search(string=clickable_element.get_attribute("aria-label"))
        #             if match:
        #                 driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", clickable_element)
        #                 clickable_element.click()
        #                 print("‚úÖ Clicked visible 'reply' element.")
                        
        #             time.sleep(2)
        #     else:
        #         continue
            
                
            # reply_string = clickable_element.get_attribute('aria-label')
            # # Then filter with regex
            # pattern = re.compile(r"repl(y|ies)", re.IGNORECASE)
        

print("üõë Finished scrolling.")
print("üîç Expanding first-level replies...")

reply_pattern = re.compile(r"\b\d+\s*repl(y|ies)\b", re.IGNORECASE)
max_scrolls = 6
expanded_labels = set()
total_clicked = 0

expanded_threads = set()

for scroll_round in range(max_scrolls):
    print(f"üåÄ Pass {scroll_round+1}: scanning for reply buttons...")

    buttons = driver.find_elements(By.XPATH, "//button[@aria-label]")
    click_candidates = []

    for btn in buttons:
        label = btn.get_attribute("aria-label") or ""
        if reply_pattern.search(label):
            # Identify unique ancestor comment container
            ancestor = btn.find_element(By.XPATH, "./ancestor::ytd-comment-thread-renderer")
            comment_id = ancestor.get_attribute("id") or str(ancestor.id)

            if comment_id not in expanded_threads:
                click_candidates.append((btn, comment_id))

    if not click_candidates:
        print("‚ö†Ô∏è No new reply buttons found.")
        break
    else:
        print('CLICK CANDIDATES')
        print(click_candidates)

    for btn, comment_id in click_candidates:
        try:
            if comment_id not in expanded_threads:
                label = btn.get_attribute("aria-label")
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", btn)
                time.sleep(0.7)
                driver.execute_script("arguments[0].click();", btn)
                expanded_threads.add(comment_id)
                total_clicked += 1
                print(f"‚úÖ Expanded replies: {label}")
                time.sleep(2)
            else:
                continue
        except Exception as e:
            print(f"‚ö†Ô∏è Failed to click '{label}': {e}")
            continue

    # Scroll a bit further to load more comment threads
    driver.execute_script("window.scrollBy(0, 800);")
    time.sleep(2)

print(f"üéØ Finished expanding {total_clicked} first-level reply threads.")

# --- Step 2: Collect comments + first-level replies ---
print("üß© Extracting comments and replies...")
COMMENT_DICT = []


comments = driver.find_elements(By.XPATH, "//ytd-comment-view-model[contains(@id, 'comment')]")
for i, el in enumerate(comments):
    print("COMMENT DIV")
    rect = el.rect
    displayed = el.is_displayed()
    print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")


for idx, el in enumerate(comments):
    try:
        li = el.text.split('\n')
        lit = list(filter(lambda x : not re.search(pattern=r"^Pinned", string=x), li))
        # Handle variable-length lists gracefully
        if len(lit) < 4:
            continue
        poster, comment_age, *comment_content = lit[:-2]
        like_count, reply_text = lit[-2:]
        full_comment = ' '.join(comment_content).strip()

        small_comment_dict = {
            'poster_OP': poster,
            'comment_age': comment_age,
            'full_comment': full_comment,
            'like_count': like_count,
            'replies': [],
            'MM_timestamp': datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        }

        # Step 3: Find replies under this comment (relative path)
        # reply_divs = el.find_elements(By.XPATH, ".//div[@id='expander-contents']")
        # reply_divs = el.find_elements(By.XPATH, ".//ytd-comment-view-model[contains(@class, 'style-scope yrd-comment-replies-renderer')]")
        # reply_divs = el.find_elements(By.XPATH, ".//span[contains(@class, 'yt-core-attributed-string yt-core-attributed-string--white-space-pre-wrap')]")
        # # Run Diagnostic failes to produce reply_divs
        # for i, el in enumerate(reply_divs):
        #     print("REPLY DIVS")
        #     rect = el.rect
        #     displayed = el.is_displayed()
        #     print(f"[{i}] Tag: {el.tag_name}, Visible: {displayed}, Rect: {rect}")
        #     if displayed:
        #         clickable_element = el
        
        #     hope_text = clickable_element.find_element(By.XPATH, ".//yt-attributed-string[contains(@id, 'content-text')]") 
        #     try:
        #         reply_lines = hope_text.text #split('\n')
        #         if len(reply_lines) >= 4:
        #             reply_poster = reply_lines[0]
        #             reply_age = reply_lines[1]
        #             reply_content = ' '.join(reply_lines[2:-2]).strip()
        #             reply_likes = reply_lines[-2]
        #             small_comment_dict['replies'].append({
        #                 'poster_reply': reply_poster,
        #                 'comment_age': reply_age,
        #                 'full_comment': reply_content,
        #                 'like_count': reply_likes
        #             })
        #     except Exception as e:
        #         print(f"‚ö†Ô∏è Failed to parse reply: {e}")

        # Find reply containers within the comment
        reply_divs = el.find_elements(
            By.XPATH,
            ".//ytd-comment-replies-renderer//yt-attributed-string[@id='content-text']"
        )
        
        print("üîç Found", len(reply_divs), "reply_divs")
        for i, reply_el in enumerate(reply_divs):
            rect = reply_el.rect
            displayed = reply_el.is_displayed()
            print(f"[{i}] Tag: {reply_el.tag_name}, Visible: {displayed}, Rect: {rect}")
        
            if not displayed:
                continue  # skip hidden ones
        
            # Extract text and emojis in DOM order
            children = reply_el.find_elements(By.XPATH, "./*")
            reconstructed = []
            for node in children:
                tag = node.tag_name.lower()
                if tag == "span":
                    reconstructed.append(node.text.strip())
                elif tag == "img":
                    reconstructed.append(node.get_attribute("alt") or "")
            text = "".join(reconstructed)
        
            print(f"üí¨ Reply #{i}: {text}")

        COMMENT_DICT.append(small_comment_dict)
        print(f"üó®Ô∏è [{idx}] Collected comment by {poster} with {len(small_comment_dict['replies'])} replies.")
    except Exception as e:
        print(f"‚ö†Ô∏è Failed to parse comment #{idx}: {e}")

print(f"\n‚úÖ Total top-level comments collected: {len(COMMENT_DICT)}")


## Add function to click replies
# First grab all buttons with aria-label attribute


    
#     target_buttons = [b for b in buttons if pattern.search(b.get_attribute("aria-label"))]
# # # Click them
# for btn in target_buttons:
#     driver.find_element(btn).click()
#     time.sleep(2)
#     print(f'Clicked the reply button')

# for btn in buttons:
#     label = btn.get_attribute("aria-label") or ""
#     if pattern.search(label):
#         try:
#             # Scroll into view first
#             driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", btn)
#             time.sleep(0.5)
            
#             # # Try normal click
#             # btn.click()
#             # print(f"Clicked button: {label}")
#             driver.execute_script("arguments[0].click();", btn)
#             print(f"‚úÖ JS clicked button: {label}")

#         except Exception as e:
#             print(e)
#             # print(f"‚ö†Ô∏è Normal click failed on '{label}': {e}")
#             # Fallback: JS click
            


# Collect all loaded comments


# COMMENT_DICT = []
# comments = driver.find_elements(*comments_path)

# for el in comments:
#     # Get the replies 
#     replies = el.find_elements(*reply_comments_path)
#     # print(replies.text)
#     small_comment_dict = {}
#     # Process the text
#     li = el.text.split('\n')
#     lit = list(filter(lambda x : not re.search(pattern=r"^Pinned", string=x), li))
#     # print(lit)
#     poster, comment_age, *comment_content, like_count, reply = lit
#     full_comment =' '.join(comment_content)
#     small_comment_dict.update({'poster_OP' : poster,
#                                'comment_age' : comment_age,
#                                'full_comment' : full_comment,
#                                'like_count' : like_count,
#                                'MM_timestamp' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
#                               })
#     COMMENT_DICT.append(small_comment_dict)
#     print("-----")



    # my_string = el.find_element(By.XPATH, '//yt-attributed-string[contains(@id, "content-text")]')
    # attribute_value = my_string.find_element(By.XPATH, '//span[contains(@role, "text")]')
    # print(attribute_value.text)


    # return views_and_date, description_string, driver  # return driver if you want to extract text/attributes later
        

[0] Tag: tp-yt-paper-button, Visible: False, Rect: {'height': 0, 'width': 0, 'x': 0, 'y': 0}
[1] Tag: tp-yt-paper-button, Visible: False, Rect: {'height': 0, 'width': 0, 'x': 0, 'y': 0}
[2] Tag: tp-yt-paper-button, Visible: False, Rect: {'height': 20, 'width': 44, 'x': 188.1796875, 'y': 1163.75}
[3] Tag: tp-yt-paper-button, Visible: True, Rect: {'height': 20, 'width': 44, 'x': 188, 'y': 1163.75}
‚úÖ Clicked visible 'expand' element.
[0] Tag: button, Visible: False, Rect: {'height': 0, 'width': 0, 'x': 0, 'y': 768.5}
[1] Tag: button, Visible: False, Rect: {'height': 0, 'width': 0, 'x': 0, 'y': 768.5}
[2] Tag: button, Visible: False, Rect: {'height': 0, 'width': 0, 'x': 0, 'y': 768.5}
[3] Tag: button, Visible: False, Rect: {'height': 0, 'width': 0, 'x': 0, 'y': 768.5}
[4] Tag: button, Visible: True, Rect: {'height': 36, 'width': 80, 'x': 329.765625, 'y': 1040.5}
like this video along with 353 other people
Here is the like-count : 353
[0] Tag: div, Visible: False, Rect: {'height': 0, 'wid

In [326]:
COMMENT_DICT

[{'poster_OP': '@kaminimanikam', 'comment_age': '9 days ago (edited)', 'full_comment': 'Thank you Ambank for celebrating Bharatanatyam ‚Äî and for choosing me to embody its spirit in this meaningful tapestry of light, rhythm, and story. I enjoyed the filming of this advertisement. Happy Deepavali everyone! -Kamini Manikam-', 'like_count': '17', 'replies': [], 'MM_timestamp': '10/27/2025, 01:43:32'}, {'poster_OP': '@RoseEsrosy', 'comment_age': '11 days ago', 'full_comment': 'So beautiful   well done Ambank', 'like_count': '4', 'replies': [], 'MM_timestamp': '10/27/2025, 01:43:32'}, {'poster_OP': '@keshavbaralofficial', 'comment_age': '11 days ago', 'full_comment': 'Wow  love from Nepal üá≥üáµÔ∏è', 'like_count': '3', 'replies': [], 'MM_timestamp': '10/27/2025, 01:43:32'}, {'poster_OP': '@lalithaolaal6879', 'comment_age': '9 days ago', 'full_comment': 'Vauuuuuu,beautiful  Am bank', 'like_count': '1', 'replies': [], 'MM_timestamp': '10/27/2025, 01:43:32'}, {'poster_OP': '@premagopalan227

In [246]:
for el in comments:
    # Get the replies 
    replies = el.find_elements(*reply_comments_path)
    print(replies)
    small_comment_dict = {}
    # Process the text
    li = el.text.split('\n')
    lit = list(filter(lambda x : not re.search(pattern=r"^Pinned", string=x), li))
    # print(lit)
    poster, comment_age, *comment_content, like_count, reply = lit
    full_comment =' '.join(comment_content)
    small_comment_dict.update({'poster_OP' : poster,
                               'comment_age' : comment_age,
                               'full_comment' : full_comment,
                               'like_count' : like_count,
                               'MM_timestamp' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
                              })
    COMMENT_DICT.append(small_comment_dict)
    print("-----")

[]
-----
[]
-----
[]
-----
[]
-----
[]
-----
[]
-----
[]
-----
[]
-----
[]
-----


In [230]:
clickable_element.get_attribute('aria-label')

In [None]:
from 

In [117]:
my_string = "This is a sentence"
word_list = my_string.split()
print(word_list)

# word_list_2 = comments[0].text.split('\n')
# print(word_list_2)

lengths = []
COMMENT_DICT = []

for el in comments:
    small_comment_dict = {}
    li = el.text.split('\n')
    lit = list(filter(lambda x : not re.search(pattern=r"^Pinned", string=x), li))
    # if len(lit) > 5:
    print(lit)
    poster, comment_age, *comment_content, like_count, reply = lit
    full_comment =' '.join(comment_content)
    small_comment_dict.update({'poster' : poster,
                       'comment-age' : comment_age,
                       'full_comment' : full_comment,
                       'like_count' : like_count,
                       'MM_timestamp' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
                      })
    COMMENT_DICT.append(small_comment_dict)
    
        
    lengths.append(len(li))

['This', 'is', 'a', 'sentence']
['@PrisonPlanetLive', '4 months ago', 'Go to https://ground.news/pjw or download the app and use my code P J W to access all perspectives and see through biased media. Subscribe through my link for 40% off unlimited access.', '194', 'Reply']
['@therealjoemullin', '4 months ago', 'In South africa, the blacks ransacked an entire mall and left one shop utterly pristine and untouched. A book store.', '4K', 'Reply']
['@mrthewubbie', '4 months ago', '"During the Soviet period, you could take someone that believes in the party, place them in the middle of a gulag, show them the truth, and they still would refuse to believe you". Yuri Besmenov', '3.1K', 'Reply']
['@AryBoy1998', '4 months ago', '‚ÄúI can‚Äôt speak English‚Äù', '', 'Right so you‚Äôre living in England but can‚Äôt speak English? Well done.', '1.4K', 'Reply']
['@bluestone476', '4 months ago', "It doesn't suck if they get what they voted for.... what really sucks is that WE get what THEY voted for.",

In [118]:
COMMENT_DICT

[{'poster': '@PrisonPlanetLive',
  'comment-age': '4 months ago',
  'full_comment': 'Go to https://ground.news/pjw or download the app and use my code P J W to access all perspectives and see through biased media. Subscribe through my link for 40% off unlimited access.',
  'like_count': '194',
  'MM_timestamp': '10/24/2025, 17:12:50'},
 {'poster': '@therealjoemullin',
  'comment-age': '4 months ago',
  'full_comment': 'In South africa, the blacks ransacked an entire mall and left one shop utterly pristine and untouched. A book store.',
  'like_count': '4K',
  'MM_timestamp': '10/24/2025, 17:12:50'},
 {'poster': '@mrthewubbie',
  'comment-age': '4 months ago',
  'full_comment': '"During the Soviet period, you could take someone that believes in the party, place them in the middle of a gulag, show them the truth, and they still would refuse to believe you". Yuri Besmenov',
  'like_count': '3.1K',
  'MM_timestamp': '10/24/2025, 17:12:50'},
 {'poster': '@AryBoy1998',
  'comment-age': '4 mo

In [155]:
example = ['Pinned by @PrisonPlanetLive', '@PrisonPlanetLive', '1 year ago', 'Go to https://sheathunderwear.com/PRISONPLANET or use the code PRISONPLANET to get 20% off your Sheath underwear today.', '247', 'Reply']

ex = list(filter(lambda x : not re.search(pattern=r'^@', string=x), example))
poster, comment_age, *comment_content, lie_count, reply = lit


['Imagine a country without Kimmel.']

In [85]:
from bs4 import BeautifulSoup
my_html="""<button class="yt-spec-button-shape-next yt-spec-button-shape-next--text yt-spec-button-shape-next--call-to-action yt-spec-button-shape-next--size-m yt-spec-button-shape-next--icon-leading yt-spec-button-shape-next--align-by-text yt-spec-button-shape-next--enable-backdrop-filter-experiment" title="" aria-label="32 replies"><div aria-hidden="true" class="yt-spec-button-shape-next__icon"><span class="ytIconWrapperHost" style="width: 24px; height: 24px;"><span class="yt-icon-shape ytSpecIconShapeHost"><div style="width: 100%; height: 100%; display: block; fill: currentcolor;"><svg xmlns="http://www.w3.org/2000/svg" height="24" viewBox="0 0 24 24" width="24" focusable="false" aria-hidden="true" style="pointer-events: none; display: inherit; width: 100%; height: 100%;"><path d="m18 9.28-6.35 6.35-6.37-6.35.72-.71 5.64 5.65 5.65-5.65z"></path></svg></div></span></span></div><div class="yt-spec-button-shape-next__button-text-content"><span class="yt-core-attributed-string yt-core-attributed-string--white-space-no-wrap" role="text">32 replies</span></div><yt-touch-feedback-shape aria-hidden="true" class="yt-spec-touch-feedback-shape yt-spec-touch-feedback-shape--touch-response"><div class="yt-spec-touch-feedback-shape__stroke"></div><div class="yt-spec-touch-feedback-shape__fill"></div></yt-touch-feedback-shape></button>"""
soup = BeautifulSoup(my_html, 'html.parser')

# Get the prettified HTML
prettified_html = soup.prettify()
print(prettified_html)

<button aria-label="32 replies" class="yt-spec-button-shape-next yt-spec-button-shape-next--text yt-spec-button-shape-next--call-to-action yt-spec-button-shape-next--size-m yt-spec-button-shape-next--icon-leading yt-spec-button-shape-next--align-by-text yt-spec-button-shape-next--enable-backdrop-filter-experiment" title="">
 <div aria-hidden="true" class="yt-spec-button-shape-next__icon">
  <span class="ytIconWrapperHost" style="width: 24px; height: 24px;">
   <span class="yt-icon-shape ytSpecIconShapeHost">
    <div style="width: 100%; height: 100%; display: block; fill: currentcolor;">
     <svg aria-hidden="true" focusable="false" height="24" style="pointer-events: none; display: inherit; width: 100%; height: 100%;" viewbox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
      <path d="m18 9.28-6.35 6.35-6.37-6.35.72-.71 5.64 5.65 5.65-5.65z">
      </path>
     </svg>
    </div>
   </span>
  </span>
 </div>
 <div class="yt-spec-button-shape-next__button-text-content">
  

In [None]:
<button class="yt-spec-button-shape-next yt-spec-button-shape-next--text yt-spec-button-shape-next--call-to-action yt-spec-button-shape-next--size-m yt-spec-button-shape-next--icon-leading yt-spec-button-shape-next--align-by-text yt-spec-button-shape-next--enable-backdrop-filter-experiment" title="" aria-label="32 replies"><div aria-hidden="true" class="yt-spec-button-shape-next__icon"><span class="ytIconWrapperHost" style="width: 24px; height: 24px;"><span class="yt-icon-shape ytSpecIconShapeHost"><div style="width: 100%; height: 100%; display: block; fill: currentcolor;"><svg xmlns="http://www.w3.org/2000/svg" fill="currentColor" height="24" viewBox="0 0 24 24" width="24" focusable="false" aria-hidden="true" style="pointer-events: none; display: inherit; width: 100%; height: 100%;"><path d="M18.707 8.793a1 1 0 00-1.414 0L12 14.086 6.707 8.793a1 1 0 10-1.414 1.414L12 16.914l6.707-6.707a1 1 0 000-1.414Z"></path></svg></div></span></span></div><div class="yt-spec-button-shape-next__button-text-content"><span class="yt-core-attributed-string yt-core-attributed-string--white-space-no-wrap" role="text">32 replies</span></div><yt-touch-feedback-shape aria-hidden="true" class="yt-spec-touch-feedback-shape yt-spec-touch-feedback-shape--touch-response"><div class="yt-spec-touch-feedback-shape__stroke"></div><div class="yt-spec-touch-feedback-shape__fill"></div></yt-touch-feedback-shape>
</button>

In [78]:
for el in comments:
    print(el.text)
    print('-------------------------------------')
    # my_string = el.find_elements(By.XPATH, '//yt-attributed-string[contains(@id, "content-text")]')[0]
    # print(my_string.text)
    # attribute_value = my_string.find_elements(By.XPATH, '//span[contains(@role, "text")]')[-1]
    # print(attribute_value.text)

Pinned by @PrisonPlanetLive
@PrisonPlanetLive
1 year ago
Go to https://sheathunderwear.com/PRISONPLANET or use the code PRISONPLANET to get 20% off your Sheath underwear today.
247
Reply
-------------------------------------
@Chicken_Wing91
1 year ago
Jimmy Kimmel has never been accused of being the smartest guy in the room to start with
1.2K
Reply
-------------------------------------
@Patrick_919
1 year ago (edited)
I feel sorry for the Japanese people. Having Jimmy Kimmel in their country must've been awful.
3.5K
Reply
-------------------------------------
@OneMeanArtist
1 year ago
Aside from Japan, I love how smoothbrain Kimmel assumes the entire US is just as filthy and third world as CA.
651
Reply
-------------------------------------
@cannonball666
1 year ago
Japan and Russia don't use criminals and drug addicts as voting blocks.
1.2K
Reply
-------------------------------------
@beckvitt2997
1 year ago
"Japan was the future, but it's stuck in the past."

Being stuck in the past 

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

url = "https://www.youtube.com/watch?v=kunuFjyvwoE"

service = Service("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(service=service)
driver.get(url)

wait = WebDriverWait(driver, 15)
wait.until(EC.presence_of_element_located((By.TAG_NAME, "ytd-text-inline-expander")))

# Give YouTube time to hydrate the shadow DOM
time.sleep(3)

expand_button = driver.execute_script("""
    return document
      .querySelector('ytd-text-inline-expander')
      .shadowRoot
      .querySelector('tp-yt-paper-button#expand');
""")

if expand_button:
    expand_button.click()
    print("Clicked expand button successfully!")
else:
    print("Expand button not found inside shadow DOM.")


## Hello

In [42]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

url = "https://www.youtube.com/watch?v=kunuFjyvwoE"

service = Service("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(service=service)
driver.get(url)

# (Optional) move browser window using pyautogui
pyautogui.hotkey('ctrl', 'alt', 'left')

wait = WebDriverWait(driver, 20)

# Wait until at least one expander exists
wait.until(EC.presence_of_element_located((By.TAG_NAME, "ytd-text-inline-expander")))
time.sleep(3)

expand_button = None
for attempt in range(5):
    try:
        expand_button = driver.execute_script("""
            const expander = document.querySelector('ytd-text-inline-expander');
            if (!expander) return null;
            const root = expander.shadowRoot;
            if (!root) return null;
            return root.querySelector('tp-yt-paper-button#expand');
        """)
        if expand_button:
            print(f"Found expand button on attempt {attempt+1}")
            expand_button.click()
            print("Clicked expand button successfully.")
            break
        else:
            print(f"Attempt {attempt+1}: expand button not found yet.")
            time.sleep(2)
    except Exception as e:
        print(f"Attempt {attempt+1}: JS error -> {e}")
        time.sleep(2)

if not expand_button:
    print("Expand button not found after retries.")


Attempt 1: expand button not found yet.
Attempt 2: expand button not found yet.
Attempt 3: expand button not found yet.
Attempt 4: expand button not found yet.
Attempt 5: expand button not found yet.
Expand button not found after retries.


In [None]:
<tp-yt-paper-button id="expand" class="button style-scope ytd-text-inline-expander" style-target="host" role="button" tabindex="0" animated="" elevation="0" aria-disabled="false" style="left: 436px;"><!--css-build:shady--><!--css_build_scope:tp-yt-paper-button--><!--css_build_styles:video.youtube.src.web.polymer.shared.ui.styles.yt_base_styles.yt.base.styles.css.js,third_party.javascript.youtube_components.tp_yt_paper_button.tp.yt.paper.button.css.js-->...more
<tp-yt-paper-ripple class="style-scope tp-yt-paper-button"><!--css-build:shady--><!--css_build_scope:tp-yt-paper-ripple--><!--css_build_styles:video.youtube.src.web.polymer.shared.ui.styles.yt_base_styles.yt.base.styles.css.js,third_party.javascript.youtube_components.tp_yt_paper_ripple.tp.yt.paper.ripple.css.js--><div id="background" class="style-scope tp-yt-paper-ripple"></div>
<div id="waves" class="style-scope tp-yt-paper-ripple"></div>
</tp-yt-paper-ripple></tp-yt-paper-button>

In [19]:
## 
empty = ""
empty + 'a'

'a'

In [None]:
<tr class="description-item style-scope ytd-about-channel-renderer">
        <td class="style-scope ytd-about-channel-renderer">
          <yt-icon icon="info_outline" class="style-scope ytd-about-channel-renderer"><!--css-build:shady--><!--css_build_scope:yt-icon--><!--css_build_styles:video.youtube.src.web.polymer.shared.ui.styles.yt_base_styles.yt.base.styles.css.js,video.youtube.src.web.polymer.shared.core.yt_icon.yt.icon.css.js--><span class="yt-icon-shape style-scope yt-icon ytSpecIconShapeHost"><div style="width: 100%; height: 100%; display: block; fill: currentcolor;"><svg xmlns="http://www.w3.org/2000/svg" height="24" viewBox="0 0 24 24" width="24" focusable="false" aria-hidden="true" style="pointer-events: none; display: inherit; width: 100%; height: 100%;"><path d="M13 17h-2v-6h2v6zm0-10h-2v2h2V7zm-1-4c-4.96 0-9 4.04-9 9s4.04 9 9 9 9-4.04 9-9-4.04-9-9-9m0-1c5.52 0 10 4.48 10 10s-4.48 10-10 10S2 17.52 2 12 6.48 2 12 2z"></path></svg></div></span></yt-icon>
        </td>
        <td class="style-scope ytd-about-channel-renderer">
          <yt-attributed-string class="style-scope ytd-about-channel-renderer"><span class="yt-core-attributed-string yt-core-attributed-string--white-space-pre-wrap" role="text"><span class="" style="">Joined Jan 28, 2007</span></span></yt-attributed-string>
        </td>
      </tr>

In [8]:
import os 
print(os.getcwd())

/Users/Malcolm/Desktop/Malaysian_Banking_Youtube_Wars/CAIE_PROJECT


In [None]:
<yt-touch-feedback-shape aria-hidden="true" class="yt-spec-touch-feedback-shape yt-spec-touch-feedback-shape--touch-response"><div class="yt-spec-touch-feedback-shape__stroke"></div><div class="yt-spec-touch-feedback-shape__fill"></div></yt-touch-feedback-shape>