In [1]:
!pip install bs4



### Extracting from a local HTML file

In [6]:
from bs4 import BeautifulSoup

with open("simple_page.html", "r", encoding="utf-8") as page1:
    html = page1.read()
    soup = BeautifulSoup(html, "html.parser")

print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   My Simple Page
  </title>
 </head>
 <body>
  <h1>
   Welcome to My Page
  </h1>
  <div id="about">
   <h2>
    About Me
   </h2>
   <p>
    I enjoy learning web development and Python.
   </p>
  </div>
  <div class="section">
   <h2>
    My Hobbies
   </h2>
   <ul>
    <li>
     Reading
    </li>
    <li>
     Coding
    </li>
    <li>
     Gaming
    </li>
   </ul>
  </div>
 </body>
</html>



In [21]:
title_1 = soup.title.text
print("Title of the page is:", title_1)

Title of the page is: My Simple Page


Find first element

In [23]:
div_tags = soup.find("div")
div_tags

<div id="about">
<h2>About Me</h2>
<p>I enjoy learning web development and Python.</p>
</div>

In [27]:
motivation = div_tags.p.text
print("My motivation is:", motivation)

My motivation is: I enjoy learning web development and Python.


Find all elements

In [15]:
div_tags = soup.find_all("div")
div_tags

[<div id="about">
 <h2>About Me</h2>
 <p>I enjoy learning web development and Python.</p>
 </div>,
 <div class="section">
 <h2>My Hobbies</h2>
 <ul>
 <li>Reading</li>
 <li>Coding</li>
 <li>Gaming</li>
 </ul>
 </div>]

In [16]:
section_div = soup.find("div", class_="section")
section_div

<div class="section">
<h2>My Hobbies</h2>
<ul>
<li>Reading</li>
<li>Coding</li>
<li>Gaming</li>
</ul>
</div>

In [17]:
about_div = soup.find("div", id="about")
about_div

<div id="about">
<h2>About Me</h2>
<p>I enjoy learning web development and Python.</p>
</div>

In [18]:
hobby_items = soup.select("div.section ul li")
hobby_items

[<li>Reading</li>, <li>Coding</li>, <li>Gaming</li>]

In [37]:
#Using a loop and lists
list_of_hobbies = []
for i in range(len(hobby_items)):
    var1 = hobby_items[i].text
    list_of_hobbies.append(var1)

list_of_hobbies

['Reading', 'Coding', 'Gaming']

In [38]:
# Using list comprehension
[i.text for i in hobby_items]

['Reading', 'Coding', 'Gaming']

In [39]:
from bs4 import BeautifulSoup
with open("simple_page.html", "r", encoding="utf-8") as f:
    html = f.read()
    soup = BeautifulSoup(html, "html.parser")
    print("Title:", soup.title.text)
    about_div = soup.find("div", id="about")
    print("About Me:", about_div.p.text)
    hobby_items = soup.select("div.section ul li")
    hobbies = [item.text for item in hobby_items]
    print("Hobbies:", hobbies)

Title: My Simple Page
About Me: I enjoy learning web development and Python.
Hobbies: ['Reading', 'Coding', 'Gaming']


### Extracting from an external website

In [40]:
import requests
from bs4 import BeautifulSoup

In [42]:
# Step 1: Send a GET request to the page
url = "https://www.python.org"
response = requests.get(url)
response.text



In [43]:
# Step 2: Parse the HTML content
soup = BeautifulSoup(response.text, 'lxml')
soup

<!DOCTYPE html>
<!--[if lt IE 7]>   <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9">   <![endif]--><!--[if IE 7]>      <html class="no-js ie7 lt-ie8 lt-ie9">          <![endif]--><!--[if IE 8]>      <html class="no-js ie8 lt-ie9">                 <![endif]--><!--[if gt IE 8]><!--><html class="no-js" dir="ltr" lang="en"> <!--<![endif]-->
<head>
<script data-domain="python.org" defer="" src="https://analytics.python.org/js/script.outbound-links.js"></script>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<link href="//ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js" rel="prefetch"/>
<link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.12.1/jquery-ui.min.js" rel="prefetch"/>
<meta content="Python.org" name="application-name"/>
<meta content="The official home of the Python Programming Language" name="msapplication-tooltip"/>
<meta content="Python.org" name="apple-mobile-web-app-title"/>
<meta content="yes" name="apple-mobile-web-app-capable"/>
<meta

In [None]:
# Step 3: Find the event list
event_section = soup.find('div', {'class': 'event-widget'})
# print(event_section)

events = event_section.find_all('li')
print(events)

<div class="medium-widget event-widget last">
<div class="shrubbery">
<h2 class="widget-title"><span aria-hidden="true" class="icon-calendar"></span>Upcoming Events</h2>
<p class="give-me-more"><a href="/events/calendars/" title="More Events">More</a></p>
<ul class="menu">
<li>
<time datetime="2025-08-08T00:00:00+00:00"><span class="say-no-more">2025-</span>08-08</time>
<a href="/events/python-user-group/2081/">Buea - Creating Python Communities and outreach</a></li>
<li>
<time datetime="2025-08-11T00:00:00+00:00"><span class="say-no-more">2025-</span>08-11</time>
<a href="/events/python-events/2011/">DjangoCon Africa 2025</a></li>
<li>
<time datetime="2025-08-13T00:00:00+00:00"><span class="say-no-more">2025-</span>08-13</time>
<a href="/events/python-events/2077/">PyCon Somalia 2025</a></li>
<li>
<time datetime="2025-08-15T00:00:00+00:00"><span class="say-no-more">2025-</span>08-15</time>
<a href="/events/python-events/1973/">PyCon Korea 2025</a></li>
<li>
<time datetime="2025-08-18T

In [6]:
import requests
from bs4 import BeautifulSoup

# Step 1: Send a GET request to the page
url = "https://www.python.org"
response = requests.get(url)

# Step 2: Parse the HTML content
soup = BeautifulSoup(response.text, 'lxml')

# Step 3: Find the event list
event_section = soup.find('div', {'class': 'event-widget'})
events = event_section.find_all('li')

# Step 4: Print upcoming exvents
print("Upcoming Python Events:")
for event in events:
    date = event.find('time').text
    name = event.find('a').text
    link = event.find('a')['href']
    print(f"{date} - {name} ({url}{link})")


Upcoming Python Events:
2025-08-08 - Buea - Creating Python Communities and outreach (https://www.python.org/events/python-user-group/2081/)
2025-08-11 - DjangoCon Africa 2025 (https://www.python.org/events/python-events/2011/)
2025-08-13 - PyCon Somalia 2025 (https://www.python.org/events/python-events/2077/)
2025-08-15 - PyCon Korea 2025 (https://www.python.org/events/python-events/1973/)
2025-08-18 - EuroSciPy 2025 (https://www.python.org/events/python-events/1971/)
