Required modules:
- request: load web page data
- BeautifulSoup: extract data  
  
!pip install pandas  
!pip install bs4  
!pip install requests

In [74]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Loading data from webpage

In [75]:
url = 'https://codedamn.com'
res = requests.get(url=url)

print(res.status_code)

200


### Extract data with BeautifulSoup

In [78]:
page = requests.get("https://codedamn.com")
soup = BeautifulSoup(page.content, 'html.parser')
print(soup.title)

<title>Learn To Code For Free — Interactive Courses And Practice Problems</title>


### Select items

Using select() method

In [25]:
page = requests.get('https://codedamn-classrooms.github.io/webscraper-python-codedamn-classroom-website/')
soup = BeautifulSoup(page.content, 'html.parser')

heading01 = soup.select('h1')
print(heading01)
print()
print(type(heading01))
print()
print(heading01[0].text)

[<h1>Test Sites</h1>, <h1>E-commerce training site</h1>]

<class 'bs4.element.ResultSet'>

Test Sites


Using find() method

In [22]:
print(soup.find('h1'))

<h1>Test Sites</h1>


Using find_all() method

In [27]:
heading02 = soup.find_all('h1')
print(heading02)
print()
print(type(heading02))
print()
print(heading02[0].text)

[<h1>Test Sites</h1>, <h1>E-commerce training site</h1>]

<class 'bs4.element.ResultSet'>

Test Sites


#### Example top items

In [46]:
page = requests.get('https://codedamn-classrooms.github.io/webscraper-python-codedamn-classroom-website/')

soup = BeautifulSoup(page.content)

top_items = []
products = soup.select('div.thumbnail')

for item in products:
    title = item.select('h4 > a.title')[0].text.strip()
    rating = item.select('div.ratings')[0].text.strip()
    top_items.append({'title: ': title.strip(), 'reviewed': rating})

top_items


[{'title: ': 'Asus AsusPro Adv...', 'reviewed': '7 reviews'},
 {'title: ': 'Asus ROG Strix G...', 'reviewed': '4 reviews'},
 {'title: ': 'Acer Aspire 3 A3...', 'reviewed': '2 reviews'}]

### Example extract links

In [58]:
page = requests.get('https://codedamn-classrooms.github.io/webscraper-python-codedamn-classroom-website')

soup = BeautifulSoup(page.content, 'html.parser')

products = soup.select('a')
all_links = []

for item in products:
    item_label = item.text.strip()
    item_link = item.get('href')
    item_link = item_link.strip() if item_link is not None else ''
    all_links.append({'label': item_label, 'link': item_link})

all_links


[{'label': 'Toggle navigation', 'link': ''},
 {'label': '', 'link': '/webscraper-python-codedamn-classroom-website/'},
 {'label': '', 'link': '#page-top'},
 {'label': 'Web Scraper',
  'link': '/webscraper-python-codedamn-classroom-website/'},
 {'label': 'Cloud Scraper',
  'link': '/webscraper-python-codedamn-classroom-website/cloud-scraper'},
 {'label': 'Pricing',
  'link': '/webscraper-python-codedamn-classroom-website/pricing'},
 {'label': 'Learn', 'link': '#section3'},
 {'label': 'Documentation',
  'link': '/webscraper-python-codedamn-classroom-website/documentation'},
 {'label': 'Video Tutorials',
  'link': '/webscraper-python-codedamn-classroom-website/tutorials'},
 {'label': 'How to',
  'link': '/webscraper-python-codedamn-classroom-website/how-to-videos'},
 {'label': 'Test Sites',
  'link': '/webscraper-python-codedamn-classroom-website/test-sites'},
 {'label': 'Forum', 'link': 'https://forum.webscraper.io/'},
 {'label': 'Install',
  'link': 'https://chrome.google.com/webstore/d

### Generating CSV file from data

In [73]:
import re

page = requests.get('https://codedamn-classrooms.github.io/webscraper-python-codedamn-classroom-website')

soup = BeautifulSoup(page.content, 'html.parser')

products = soup.select('div.thumbnail')
all_items = []

for item in products:
    item_name = item.select('h4 > a.title')[0].text
    item_price = item.select('h4.price')[0].text
    item_desctiption = re.sub('\s+', '', item.select('p.description')[0].text)
    item_review = item.select('div.ratings')[0].text.strip()
    item_image = item.select('img.img-responsive')[0].get('src')

    all_items.append({'name': item_name, 'price': item_price, 'desciption': item_desctiption, 'review': item_review, 'image': item_image})


data_frame = pd.DataFrame(all_items)
data_frame.to_csv('list_products.csv')
