From 92b8d7938ff8b64b1090ad15d500f86ac1327199 Mon Sep 17 00:00:00 2001 From: sumitsisodiya <44099064+sumitsisodiya@users.noreply.github.com> Date: Tue, 6 Oct 2020 10:32:23 +0530 Subject: [PATCH] Update Web Scraping with BeautifulSoup.py --- Web Scraping with BeautifulSoup.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Web Scraping with BeautifulSoup.py b/Web Scraping with BeautifulSoup.py index da4ce1e..36a7959 100644 --- a/Web Scraping with BeautifulSoup.py +++ b/Web Scraping with BeautifulSoup.py @@ -5,6 +5,7 @@ #pip3 install requests #pip3 install bs4 +#run in the browser also what are you doing with the help of chrome driver # ## Basic fundamentals of web scraping @@ -12,13 +13,30 @@ from bs4 import BeautifulSoup # requests module is easy to operate some people use urllib but I prefer this one because it is easy to use. import requests +from selenium import webdriver # I put here my own blog url ,you can change it. url="https://getpython.wordpress.com/" - +BASE_URL = "https://getpython.wordpress.com/" #Requests module use to data from given url source=requests.get(url) + +def get_chrome_web_driver(options): + return webdriver.Chrome("./chromedriver", chrome_options=options) + + +def get_web_driver_options(): + return webdriver.ChromeOptions() + + +def set_ignore_certificate_error(options): + options.add_argument('--ignore-certificate-errors') + + +def set_browser_as_incognito(options): + options.add_argument('--incognito') + # BeautifulSoup is used for getting HTML structure from requests response.(craete your soup) soup=BeautifulSoup(source.text,'html')