In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
import time


def scrape_kworb_with_direct_links():
    videos = []
    url = 'https://kworb.net/youtube/topvideos.html'
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        table = soup.find('table')
        if not table:
            print("Could not find table")
            return videos
        
        rows = table.find_all('tr')        
        rank = 1
        for row in rows:
            cells = row.find_all('td')
            
            if len(cells) != 3:
                continue
            
            try:
                title_cell = cells[0]
                title = title_cell.get_text(strip=True)
                views = cells[1].get_text(strip=True)
                
                if not title or len(title) < 3:
                    continue
                
                youtube_link = None
                link_elem = title_cell.find('a')
                
                if link_elem:
                    href = link_elem.get('href', '')
                    
                    if rank <= 3:
                        print(f"Row {rank}: Title='{title[:50]}'")
                        print(f"  href='{href}'")
                    
                    if href:
                        if 'youtube.com' in href:
                            youtube_link = href if href.startswith('http') else 'https://' + href
                        elif 'youtu.be' in href:
                            youtube_link = href if href.startswith('http') else 'https://' + href
                        else:
                            vid_match = re.search(r'([a-zA-Z0-9_-]{11})', href)
                            if vid_match:
                                video_id = vid_match.group(1)
                                youtube_link = f"https://www.youtube.com/watch?v={video_id}"
                                if rank <= 3:
                                    print(f"  Extracted video ID: {video_id}")
                
                views_num = None
                try:
                    views_clean = views.replace(',', '')
                    views_num = int(float(views_clean))
                except:
                    views_num = views
                
                video_info = {
                    'rank': rank,
                    'title': title,
                    'views': views,
                    'views_num': views_num,
                    'youtube_url': youtube_link
                }
                
                videos.append(video_info)
                rank += 1
                
                
            except Exception as e:
                continue
        
        return videos
        
    except Exception as e:
        print(f"Error: {e}")
        return videos


def save_to_csv(videos, csv_filename='top_videos_kworb.csv'):
    if not videos:
        print("No videos to save")
        return
    
    df = pd.DataFrame(videos)
    df.to_csv(csv_filename, index=False)


def extract_urls_to_file(csv_filename='top_videos_kworb.csv', output_filename='video_urls.txt'):
    try:
        df = pd.read_csv(csv_filename)
        urls = df['youtube_url'].dropna().tolist()
        
        urls = [url for url in urls if url != 'nan' and isinstance(url, str) and url.startswith('http')]
        
        with open(output_filename, 'w') as f:
            for url in urls:
                f.write(url + '\n')

        
    except Exception as e:
        print(f"Error: {e}")


if __name__ == "__main__":
    
    videos = scrape_kworb_with_direct_links()
    
    if videos:
        save_to_csv(videos, 'top_videos_kworb.csv')
        extract_urls_to_file('top_videos_kworb.csv', 'video_urls.txt')

    else:
        print("Failed to scrape videos")

In [None]:
def scrape_kworb_with_direct_links():
    videos = []
    url = 'https://kworb.net/youtube/topvideos.html'
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        table = soup.find('table')
        if not table:
            print("Could not find table")
            return videos
        
        rows = table.find_all('tr')        
        rank = 1
        for row in rows:
            cells = row.find_all('td')
            
            if len(cells) != 3:
                continue
            
            try:
                title_cell = cells[0]
                title = title_cell.get_text(strip=True)
                views = cells[1].get_text(strip=True)
                
                if not title or len(title) < 3:
                    continue
                
                youtube_link = None
                link_elem = title_cell.find('a')
                
                if link_elem:
                    href = link_elem.get('href', '')
                    
                    if rank <= 3:
                        print(f"Row {rank}: Title='{title[:50]}'")
                        print(f"  href='{href}'")
                    
                    if href:
                        if 'youtube.com' in href:
                            youtube_link = href if href.startswith('http') else 'https://' + href
                        elif 'youtu.be' in href:
                            youtube_link = href if href.startswith('http') else 'https://' + href
                        else:
                            vid_match = re.search(r'([a-zA-Z0-9_-]{11})', href)
                            if vid_match:
                                video_id = vid_match.group(1)
                                youtube_link = f"https://www.youtube.com/watch?v={video_id}"
                                if rank <= 3:
                                    print(f"  Extracted video ID: {video_id}")
                
                views_num = None
                try:
                    views_clean = views.replace(',', '')
                    views_num = int(float(views_clean))
                except:
                    views_num = views
                
                video_info = {
                    'rank': rank,
                    'title': title,
                    'views': views,
                    'views_num': views_num,
                    'youtube_url': youtube_link
                }
                
                videos.append(video_info)
                rank += 1
                
                
            except Exception as e:
                continue
        
        return videos
        
    except Exception as e:
        print(f"Error: {e}")
        return videos


def save_to_csv(videos, csv_filename='top_videos_kworb.csv'):
    if not videos:
        print("No videos to save")
        return
    
    df = pd.DataFrame(videos)
    df.to_csv(csv_filename, index=False)


def extract_urls_to_file(csv_filename='top_videos_kworb.csv', output_filename='video_urls.txt'):
    try:
        df = pd.read_csv(csv_filename)
        urls = df['youtube_url'].dropna().tolist()
        
        urls = [url for url in urls if url != 'nan' and isinstance(url, str) and url.startswith('http')]
        
        with open(output_filename, 'w') as f:
            for url in urls:
                f.write(url + '\n')

        
    except Exception as e:
        print(f"Error: {e}")


if __name__ == "__main__":
    
    videos = scrape_kworb_with_direct_links()
    
    if videos:
        save_to_csv(videos, 'top_videos_kworb.csv')
        extract_urls_to_file('top_videos_kworb.csv', 'video_urls.txt')

    else:
        print("Failed to scrape videos")

In [None]:
def save_to_csv(videos, csv_filename='top_videos_kworb.csv'):
    if not videos:
        print("No videos to save")
        return
    
    df = pd.DataFrame(videos)
    df.to_csv(csv_filename, index=False)


def extract_urls_to_file(csv_filename='top_videos_kworb.csv', output_filename='video_urls.txt'):
    try:
        df = pd.read_csv(csv_filename)
        urls = df['youtube_url'].dropna().tolist()
        
        urls = [url for url in urls if url != 'nan' and isinstance(url, str) and url.startswith('http')]
        
        with open(output_filename, 'w') as f:
            for url in urls:
                f.write(url + '\n')

        
    except Exception as e:
        print(f"Error: {e}")

In [None]:
if __name__ == "__main__":
    
    videos = scrape_kworb_with_direct_links()
    
    if videos:
        save_to_csv(videos, 'top_videos_kworb.csv')
        extract_urls_to_file('top_videos_kworb.csv', 'video_urls.txt')

    else:
        print("Failed to scrape videos")