# Web Scrapping

The objective of this project is to scrape Name of the author, Quotes and the Tags from a "Quotes to Scrape" website.

In [1]:
# Importing Libraries

import requests   # HTTP requests and content retrieval for web scraping.
from bs4 import BeautifulSoup   # Parsing HTML content and extracting specific elements for web scraping.
import pandas as pd   # Data manipulation and analysis using data frames.

In [2]:
# Create an empty list to colleect all data
Content = []

for i in range(1,11):
    
    # Create URL to collect links from paginated data
    url = f"https://quotes.toscrape.com/page/{i}/"
    
     # Collect HTML data from this page
    response = requests.get(url)
    
    # Parse content
    response = response.content
    soup = BeautifulSoup(response,'html.parser')
    
    # Find the div containing all the quotes
    div = soup.find('div')
    
    # Find all the quotes
    rows = div.find_all('div',class_ = 'quote')
    
    # Loop through each quote
    for row in rows:
        
        # Find the name of the author
        name = row.find('small',class_ = 'author').text
        
        # Find the quote
        quote = row.find('span',class_ = 'text').text
        
        # Find the tags
        tag = row.find('meta',class_='keywords')
        tags = tag.attrs['content']
        
        # Append the name, quote, and tags to the Content list
        Content.append([name,quote,tags])

In [3]:
# Create a pandas dataframe from a Content list
Quote_df = pd.DataFrame(Content,columns = ['Name','Quote','Tags'])

In [4]:
Quote_df.head()    #  Displays the first rows of the DataFrame.

Unnamed: 0,Name,Quote,Tags
0,Albert Einstein,“The world as we have created it is a process ...,"change,deep-thoughts,thinking,world"
1,J.K. Rowling,"“It is our choices, Harry, that show what we t...","abilities,choices"
2,Albert Einstein,“There are only two ways to live your life. On...,"inspirational,life,live,miracle,miracles"
3,Jane Austen,"“The person, be it gentleman or lady, who has ...","aliteracy,books,classic,humor"
4,Marilyn Monroe,"“Imperfection is beauty, madness is genius and...","be-yourself,inspirational"


In [5]:
Quote_df.tail()    #  displays the last rows of the DataFrame.

Unnamed: 0,Name,Quote,Tags
95,Harper Lee,“You never really understand a person until yo...,better-life-empathy
96,Madeleine L'Engle,“You have to write the book that wants to be w...,"books,children,difficult,grown-ups,write,write..."
97,Mark Twain,“Never tell the truth to people who are not wo...,truth
98,Dr. Seuss,"“A person's a person, no matter how small.”",inspirational
99,George R.R. Martin,“... a mind needs books as a sword needs a whe...,"books,mind"


In [6]:
Quote_df.info()    # Prints a concise summary of the DataFrame

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    100 non-null    object
 1   Quote   100 non-null    object
 2   Tags    100 non-null    object
dtypes: object(3)
memory usage: 2.5+ KB


In [7]:
Quote_df.describe()    # Generates descriptive statistics for the numerical columns in the DataFrame

Unnamed: 0,Name,Quote,Tags
count,100,100,100
unique,50,100,84
top,Albert Einstein,“The world as we have created it is a process ...,love
freq,10,1,4


In [8]:
Quote_df.size    # Returns the number of elements in the DataFrame.

300

In [9]:
Quote_df.shape    # Returns a tuple representing the dimensions of the DataFrame.

(100, 3)

In [10]:
Quote_df.to_csv('Quotes_Scrapped.csv')    # Writes the DataFrame to a CSV file named 'Quotes_Scrapped.csv'.

In [None]:
# End of Web Scrapping