In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

###### WEB SCRAPPING 
Web scraping is the process of extracting and parsing data from websites in an automated fashion using a computer program. It's a useful technique for creating datasets for research and learning. 
Follow these steps to build a web scraping project from scratch using Python and its ecosystem of libraries:

1. Pick a website and describe your objective
2. Use the requests library to download web pages
3. Use Beautiful Soup to parse and extract information
4. Create CSV file(s) with the extracted information

Here are the steps we'll follow:

- We're going to scrape https://flipkart.com/search?q=laptop&
- We will define the number of pages needed to be scraped
- We'll get a list of laptops. For each laptop, we'll get laptop name, laptop price, laptop rating and laptop page URL
- For each topic we'll create a CSV file in the following format:

```
prod name, prod rate, prod price, prod link
HP 14s Core i3 11th Gen - (8 GB/256 GB SSD/Windows 10 Home) 14s- DY2501TU Thin and Light Laptop,
4.4,
"₹41,890",
www.flipkart.com/hp-14s-core-i3-11th-gen-8-gb-256-gb-ssd-windows-10-home-14s-dy2501tu-thin-light-laptop/p/itmdce0f13dd3a4e?
```

In [133]:
laptop_link = "https://www.flipkart.com/search?q=laptop&page="


def get_document(page):
    # To get the html page 
    res = requests.get(laptop_link + str(page))
    if res.status_code != 200:
        raise Exception("Failed to load page {}".format(laptop_link))
    # Parsing it with beautiful soup
    document = BeautifulSoup(res.content, "html.parser")
    return document

In [134]:
prod_name_list = []
prod_rate_list = []
prod_price_list = []
prod_link_list = []

print("Enter the number of pages needed: ")
pages = input()
for page in range(1, int(pages) + 1):
    document = get_document(page)

    # To get the name of product
    prod_name = "_4rR01T"
    product_name_tags = document.find_all("div", {"class": prod_name})

    # To get the rating of the product
    prod_rating_selection_class = "_3LWZlK"
    prod_rating_tag = document.find_all("div", {"class": prod_rating_selection_class})

    # To get the price of the product
    prod_price_selection_price = "_30jeq3 _1_WHN1"  # class id
    prod_price_tags = document.find_all("div", {"class": prod_price_selection_price})

    # To get the link of the product
    prod_link_selection_class = "_1fQZEK"
    prod_link_tags = document.find_all("a", {"class": prod_link_selection_class})

    for prod_name, prod_rate, prod_price, prod_link in zip(product_name_tags, prod_rating_tag, prod_price_tags, prod_link_tags):
        prod_name_list.append(prod_name.text)
        prod_rate_list.append(prod_rate.text)
        prod_price_list.append(prod_price.text)
        prod_link_list.append("www.flipkart.com" + prod_link["href"])




Enter the number of pages needed: 
4


In [135]:
columns = {"product name": prod_name_list, "product rate": prod_rate_list, "product price": prod_price_list, "product link": prod_link_list}
laptop_df = pd.DataFrame(columns)

In [136]:
laptop_df.to_csv("laptop_price.csv", index = None)