In [1]:
import requests
from bs4 import BeautifulSoup
import csv

In [2]:
# Define the URL
url = 'https://en.wikipedia.org/wiki/List_of_most-viewed_YouTube_videos'

In [12]:
try:
    # Send an HTTP GET request to the URL
    response = requests.get(url)
    response.raise_for_status()  # Check for any HTTP request errors

    # Create a BeautifulSoup object to parse the page
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the most viewed videos
    table = soup.find('table', {'class': 'wikitable'})

    # Initialize lists to store the scraped data
    ranks = []
    names = []
    artists = []
    upload_dates = []
    views = []

    # Iterate through the rows in the table
    for row in table.find_all('tr')[1:]:
        columns = row.find_all('td')

        # Check if there are enough columns to extract data
        if len(columns) >= 6:
            ranks.append(columns[0].text.strip())
            names.append(columns[1].text.strip())
            artists.append(columns[2].text.strip())
            # Skip column 3 (Notes column)
            upload_dates.append(columns[4].text.strip())
            views.append(columns[5].text.strip())

    # Save the data to a CSV file with UTF-8 encoding
    with open('youtube_videos.csv', 'w', newline='', encoding='utf-8') as file:
        fieldnames = ['Rank', 'Name', 'Artist', 'Upload Date', 'Views']
        writer = csv.DictWriter(file, fieldnames=fieldnames)

        writer.writeheader()
        for i in range(len(ranks)):
            writer.writerow({
                'Rank': ranks[i],
                'Name': names[i],
                'Artist': artists[i],
                'Upload Date': upload_dates[i],
                'Views': views[i]
            })

    print("Data has been saved to youtube_videos.csv.")

except requests.exceptions.RequestException as e:
    print(f"HTTP request error: {e}")

except Exception as e:
    print(f"An error occurred: {e}")


Data has been saved to youtube_videos.csv.


In [18]:
# Define the URL
url = "https://en.wikipedia.org/wiki/List_of_most-viewed_YouTube_videos"

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content of the page using BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Find the table containing the video details
    table = soup.find("table", {"class": "wikitable"})
    
    # Initialize empty lists to store the details
    ranks = []
    names = []
    artists = []
    upload_dates = []
    views = []
    
    # Iterate through rows in the table
    for row in table.find_all("tr")[1:]:  # Skip the header row
        columns = row.find_all("td")
        if len(columns) >= 5:  # Ensure the row has enough columns
            rank = columns[0].text.strip()
            name = columns[1].text.strip()
            artist = columns[2].text.strip()
            upload_date = columns[4].text.strip()
            view_count = columns[3].text.strip()
            
            # Append the details to the respective lists
            ranks.append(rank)
            names.append(name)
            artists.append(artist)
            upload_dates.append(upload_date)
            views.append(view_count)
    
    # Print or process the scraped data
    for i in range(len(ranks)):
        print(f"Rank: {ranks[i]}, Name: {names[i]}, Artist: {artists[i]}, Upload Date: {upload_dates[i]}, Views: {views[i]}")
else:
    print("Failed to retrieve the page. Status code:", response.status_code)
    

Rank: 1., Name: "Baby Shark Dance"[6], Artist: Pinkfong Baby Shark - Kids' Songs & Stories, Upload Date: June 17, 2016, Views: 13.48
Rank: 2., Name: "Despacito"[9], Artist: Luis Fonsi, Upload Date: January 12, 2017, Views: 8.28
Rank: 3., Name: "Johny Johny Yes Papa"[17], Artist: LooLoo Kids - Nursery Rhymes and Children's Songs, Upload Date: October 8, 2016, Views: 6.82
Rank: 4., Name: "Bath Song"[18], Artist: Cocomelon - Nursery Rhymes, Upload Date: May 2, 2018, Views: 6.45
Rank: 5., Name: "Shape of You"[19], Artist: Ed Sheeran, Upload Date: January 30, 2017, Views: 6.11
Rank: 6., Name: "See You Again"[22], Artist: Wiz Khalifa, Upload Date: April 6, 2015, Views: 6.05
Rank: 7., Name: "Wheels on the Bus"[27], Artist: Cocomelon - Nursery Rhymes, Upload Date: May 24, 2018, Views: 5.62
Rank: 8., Name: "Phonics Song with Two Words"[28], Artist: ChuChu TV Nursery Rhymes & Kids Songs, Upload Date: March 6, 2014, Views: 5.52
Rank: 9., Name: "Uptown Funk"[29], Artist: Mark Ronson, Upload Date: 

In [17]:
for row in table.find_all("tr")[1:]:
    print(row)

<tr>
<td align="center">1.</td>
<td>"<a href="/wiki/Baby_Shark" title="Baby Shark">Baby Shark Dance</a>"<sup class="reference" id="cite_ref-Baby_Shark_Dance_6-0"><a href="#cite_note-Baby_Shark_Dance-6">[6]</a></sup></td>
<td><a href="/wiki/Pinkfong" title="Pinkfong">Pinkfong Baby Shark - Kids' Songs &amp; Stories</a></td>
<td align="center">13.48</td>
<td align="right">June 17, 2016</td>
<td align="center"><sup class="reference" id="cite_ref-9"><a href="#cite_note-9">[A]</a></sup>
</td></tr>
<tr>
<td align="center">2.</td>
<td>"<a href="/wiki/Despacito#Music_video" title="Despacito">Despacito</a>"<sup class="reference" id="cite_ref-Despacito_(feat._Daddy_Yankee)_10-0"><a href="#cite_note-Despacito_(feat._Daddy_Yankee)-10">[9]</a></sup></td>
<td><a href="/wiki/Luis_Fonsi" title="Luis Fonsi">Luis Fonsi</a></td>
<td align="center">8.28</td>
<td align="right">January 12, 2017</td>
<td align="center"><sup class="reference" id="cite_ref-despacitoview_18-0"><a href="#cite_note-despacitoview-1