In [None]:
# Virtualenv is a tool to create isolated Python environments.
# It allows you to manage dependencies for different projects separately.
# Not essential while running code in a Jupyter Notebook environmen.
!pip install virtualenv

# Selenium is a web automation tool commonly used for testing web applications.
# It can also be used for web scraping tasks.
!pip install selenium

# Requests is a Python library for making HTTP requests.
# It simplifies the process of sending HTTP requests and handling responses.
!pip install requests

# Urllib3 is a powerful HTTP client for Python.
# It provides features such as connection pooling, retries, and timeouts.
# Also A dependency for requests, often installed automatically.
!pip install urllib3

# BeautifulSoup is a Python library for parsing HTML and XML documents.
# It provides tools for navigating and manipulating the parse tree.
!pip install bs4

Collecting virtualenv
  Downloading virtualenv-20.26.2-py3-none-any.whl (3.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting distlib<1,>=0.3.7 (from virtualenv)
  Downloading distlib-0.3.8-py2.py3-none-any.whl (468 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.9/468.9 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: distlib, virtualenv
Successfully installed distlib-0.3.8 virtualenv-20.26.2
Collecting selenium
  Downloading selenium-4.21.0-py3-none-any.whl (9.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.25.1-py3-none-any.whl (467 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m467.7/467.7 kB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trio-websocket~=0.9 (from selenium)

In [None]:
# Imports the requests module, which was installed earlier.
# This module allows the Python script to make HTTP requests to web servers.
import requests

In [None]:
#Makes a GET request (to retrieve data) to the specified URL (the Wikipedia page).
# The response from the website is stored in the "page" variable.
page = requests.get("https://en.wikipedia.org/wiki/Bangladesh")

# (Optional) Prints the HTTP status code returned by the server.
# A status code of 200 typically indicates a successful request.
# (Debug)
print(page.status_code)

# Prints the entire raw HTML content of the webpage.
# (Debug)
print(page.content)

200
b'<!DOCTYPE html>\n<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-0 vector-feature-appearance-disabled vector-feature-appearance-pinned-clientpref-0 vector-feature-night-mode-disabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8">\n<title>Bangladesh - Wikipedia</title>\n<script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-f

In [None]:
# This line imports the BeautifulSoup class from the bs4 module.
# This class is used to parse HTML documents and extract data from them.
from bs4 import BeautifulSoup

In [None]:
# Creates a BeautifulSoup object (soup) by parsing the raw HTML content (page.content) using the built-in HTML parser.
soup = BeautifulSoup(page.content, 'html.parser')

# Uses the find_all method of the soup object to search for all HTML elements with the tag <p> (representing paragraphs).
# The results are stored in a list named "text".
text = soup.find_all('p')

# (Debug)
print(text)

[<p class="mw-empty-elt">
</p>, <p><b>Bangladesh</b>,<sup class="reference" id="cite_ref-22"><a href="#cite_note-22">[a]</a></sup> officially the <b>People's Republic of Bangladesh</b>,<sup class="reference" id="cite_ref-23"><a href="#cite_note-23">[b]</a></sup> is a country in <a href="/wiki/South_Asia" title="South Asia">South Asia</a>. It is the <a href="/wiki/List_of_countries_and_dependencies_by_population" title="List of countries and dependencies by population">eighth-most populous</a> country in the world and is among the <a href="/wiki/List_of_countries_and_dependencies_by_population_density" title="List of countries and dependencies by population density">most densely populated countries</a> with a population of nearly 170 million in an area of 148,460 square kilometres (57,320 sq mi). Bangladesh shares land borders with <a href="/wiki/India" title="India">India</a> to the north, west, and east, and <a href="/wiki/Myanmar" title="Myanmar">Myanmar</a> to the southeast. To the 

In [None]:
# Creating a .txt file that stores the texts.
with open("output.txt", "w") as file:
    for paragraph in text:
        # get_text() method of the Beautiful Soup library extracts the text content within the HTML tags of a paragraph element (<p>).
        # It ignores the HTML tags.
        file.write(paragraph.get_text() + "\n")

