In [3]:
import requests  # Importing the 'requests' library to make HTTP requests
from bs4 import BeautifulSoup  # Importing 'BeautifulSoup' from the 'bs4' library to parse HTML
import pandas as pd  # Importing 'pandas' library as 'pd' to work with data in DataFrame format


In [4]:
link = 'https://quotes.toscrape.com/'  # URL of the webpage to scrape

res = requests.get(link)  # Sends an HTTP GET request to the URL and stores the response in 'res'


In [5]:
soup = BeautifulSoup(res.text, 'html.parser')  # 'soup' is an object that parses the HTML content of 'res.text'

In [6]:
data = []

for sp in soup.find_all('div', class_ = 'quote'):  # sp refers to each 'div' element with class 'quote'

    quote     = sp.find('span', class_ = 'text').text[1:-1]  # Extracts the quote text from the 'span' element
    author    = sp.find('small').text  # Extracts the author's name from the 'small' element
    author_id = sp.find('a').get('href')  # Extracts the author's ID from the 'a' tag's href attribute
    
    tags = []
    for tag in sp.find_all('a', class_ = 'tag'):  # Extracts all tags associated with the quote
        tags.append(tag.text)
    tags = ','.join(tags)  # Joins the tags into a single string, separated by commas

    data.append([quote, author, author_id, tags])  # Appends the extracted data to the 'data' list


In [7]:
data[0]

['The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.',
 'Albert Einstein',
 '/author/Albert-Einstein',
 'change,deep-thoughts,thinking,world']

In [8]:
df = pd.DataFrame(data, columns=['quote', 'author', 'author_id', 'tags'])  # 'df' is a DataFrame created from 'data' with specified column names

In [10]:
df.to_csv('Quotes.csv', index=False)  # Saves the DataFrame 'df' to a CSV file named 'Quotes.csv' without writing row indices