Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion Web Scraping with BeautifulSoup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,38 @@
#pip3 install requests
#pip3 install bs4

#run in the browser also what are you doing with the help of chrome driver

# ## Basic fundamentals of web scraping

# import these two modules bs4 for selecting HTML tags easily
from bs4 import BeautifulSoup
# requests module is easy to operate some people use urllib but I prefer this one because it is easy to use.
import requests
from selenium import webdriver

# I put here my own blog url ,you can change it.
url="https://getpython.wordpress.com/"

BASE_URL = "https://getpython.wordpress.com/"
#Requests module use to data from given url
source=requests.get(url)


def get_chrome_web_driver(options):
return webdriver.Chrome("./chromedriver", chrome_options=options)


def get_web_driver_options():
return webdriver.ChromeOptions()


def set_ignore_certificate_error(options):
options.add_argument('--ignore-certificate-errors')


def set_browser_as_incognito(options):
options.add_argument('--incognito')

# BeautifulSoup is used for getting HTML structure from requests response.(craete your soup)
soup=BeautifulSoup(source.text,'html')

Expand Down