In [15]:
# Import the necessary libraries
import requests             # For making HTTP requests to websites
import pandas as pd         # For data manipulation and analysis
from tqdm import tqdm       # For displaying progress bars in loops
from bs4 import BeautifulSoup  # For parsing HTML and extracting data

In [16]:
# Initialize an empty list to store the scraped data
data = []

# Loop through pages 1 to 10 (inclusive) to scrape data
for page in tqdm(range(1, 11)):
    
    # Construct the URL for each page
    link = 'https://quotes.toscrape.com/page/' + str(page)
    
    # Send a GET request to the URL and store the response
    res = requests.get(link)
    
    # Parse the HTML content of the page using BeautifulSoup
    soup = BeautifulSoup(res.text, 'html.parser')
    
    # Loop through all 'div' elements with the class 'quote' to extract quotes
    for sp in soup.find_all('div', class_='quote'):
        
        # Extract the quote text, remove the enclosing quotation marks
        quote = sp.find('span', class_='text').text[1:-1]
        
        # Extract the author's name
        author = sp.find('small').text
        
        # Extract the author's ID (link to author's page)
        author_id = sp.find('a').get('href')
        
        # Initialize a list to store tags associated with the quote
        tags = []
        
        # Loop through all 'a' elements with the class 'tag' to extract tags
        for tag in sp.find_all('a', class_='tag'):
            tags.append(tag.text)
        
        # Join the list of tags into a single string, separated by commas
        tags = ','.join(tags)
        
        # Append the extracted data (quote, author, author_id, tags) to the data list
        data.append([quote, author, author_id, tags])


  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [00:30<00:00,  3.09s/it]


In [17]:
len(data)

100

In [18]:
df = pd.DataFrame(data, columns=['quote', 'author', 'author_id', 'tags'])  # 'df' is a DataFrame created from 'data' with specified column names

In [19]:
df.to_csv('total_Quotes.csv', index=False)  # Saves the DataFrame 'df' to a CSV file named 'Quotes.csv' without writing row indices

In [20]:
df

Unnamed: 0,quote,author,author_id,tags
0,The world as we have created it is a process o...,Albert Einstein,/author/Albert-Einstein,"change,deep-thoughts,thinking,world"
1,"It is our choices, Harry, that show what we tr...",J.K. Rowling,/author/J-K-Rowling,"abilities,choices"
2,There are only two ways to live your life. One...,Albert Einstein,/author/Albert-Einstein,"inspirational,life,live,miracle,miracles"
3,"The person, be it gentleman or lady, who has n...",Jane Austen,/author/Jane-Austen,"aliteracy,books,classic,humor"
4,"Imperfection is beauty, madness is genius and ...",Marilyn Monroe,/author/Marilyn-Monroe,"be-yourself,inspirational"
...,...,...,...,...
95,You never really understand a person until you...,Harper Lee,/author/Harper-Lee,better-life-empathy
96,You have to write the book that wants to be wr...,Madeleine L'Engle,/author/Madeleine-LEngle,"books,children,difficult,grown-ups,write,write..."
97,Never tell the truth to people who are not wor...,Mark Twain,/author/Mark-Twain,truth
98,"A person's a person, no matter how small.",Dr. Seuss,/author/Dr-Seuss,inspirational


In [21]:
df['author_link'] = 'https://quotes.toscrape.com' + df['author_id']

In [22]:
df.head()

Unnamed: 0,quote,author,author_id,tags,author_link
0,The world as we have created it is a process o...,Albert Einstein,/author/Albert-Einstein,"change,deep-thoughts,thinking,world",https://quotes.toscrape.com/author/Albert-Eins...
1,"It is our choices, Harry, that show what we tr...",J.K. Rowling,/author/J-K-Rowling,"abilities,choices",https://quotes.toscrape.com/author/J-K-Rowling
2,There are only two ways to live your life. One...,Albert Einstein,/author/Albert-Einstein,"inspirational,life,live,miracle,miracles",https://quotes.toscrape.com/author/Albert-Eins...
3,"The person, be it gentleman or lady, who has n...",Jane Austen,/author/Jane-Austen,"aliteracy,books,classic,humor",https://quotes.toscrape.com/author/Jane-Austen
4,"Imperfection is beauty, madness is genius and ...",Marilyn Monroe,/author/Marilyn-Monroe,"be-yourself,inspirational",https://quotes.toscrape.com/author/Marilyn-Monroe


In [23]:
df.to_csv('total_Quotes.csv', index= False)