---

# **Importing of Libraries**

- In this section, we have imported all the libraries that we will used to scrap flipkart products.

---

In [1]:
import pandas as pd
from selenium import webdriver
from bs4 import BeautifulSoup

---

# **Flipkart Scraper**

- In this section, we will scrap the items as highlighted in the red box in the following image.

![img](./images/flipkart-webpage.PNG?raw=true)

- We will use the following user defined function to extract products from flipkart website.

---

In [2]:
def get_url(search_item, page):
    """Search the item using the url."""

    init_url = "https://www.flipkart.com/search?q={}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
    search_item = search_item.replace(" ", "+")
    final_url = init_url.format(search_item)
    final_url = final_url + "&page={}"
    return final_url.format(page)

def extractRow(item):
    """Extract details of each row of Flipkart Product."""

    title = item.find('div', attrs={'class': '_4rR01T'}).text

    url = "https://www.flipkart.com/" + item.get('href')

    try:
        price = item.find('div', attrs={'class': '_30jeq3 _1_WHN1'}).text.replace('₹', 'Rs.')
    except AttributeError:
        price = ''

    try:
        ratings = item.find('div', attrs={'class': '_3LWZlK'}).text
    except AttributeError:
        ratings = ''

    try:
        rating_reviews = item.find('span', attrs={'class': '_2_R_DZ'}).text.split(' ')
        total_ratings = rating_reviews[0]
        total_reviews = rating_reviews[2]
    except AttributeError:
        total_ratings = ''
        total_reviews = ''

    return (title, price, ratings, total_ratings, total_reviews, url)

records = list()

def main(search, pages):
    """Executes the main program."""

    # Initialize the Chrome driver
    driver = webdriver.Chrome("chromedriver")

    record = list()

    for page in range(1, pages+1):
        driver.get(get_url(search, page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.findAll('a', href=True, attrs={'class':'_1fQZEK'})

        for result in results:
            record = extractRow(result)

            if record:
                records.append(record)

    driver.close()

    # # Saving file to a CSV
    data = pd.DataFrame(data=records, columns=['Title', 'Price', 'Ratings', 'Total Ratings', 'Total Reviews', 'URL'])
    data.to_csv('results.csv', index=False)
    return data.head()

In [3]:
# Search and get the data associated with the product
search = input("Enter the Product:")
main(search=search, pages=21)

Unnamed: 0,Title,Price,Ratings,Total Ratings,Total Reviews,URL
0,acer Aspire 7 Core i5 10th Gen - (8 GB/512 GB ...,"Rs.54,990",4.5,2809,Reviews,https://www.flipkart.com//acer-aspire-7-core-i...
1,Lenovo IdeaPad 3 Core i3 10th Gen - (8 GB/256 ...,"Rs.39,990",4.4,275,Reviews,https://www.flipkart.com//lenovo-ideapad-3-cor...
2,HP Ryzen 3 Dual Core 3250U - (8 GB/256 GB SSD/...,"Rs.38,990",4.6,110,Reviews,https://www.flipkart.com//hp-ryzen-3-dual-core...
3,Lenovo IdeaPad 3 Core i3 10th Gen - (8 GB/1 TB...,"Rs.39,990",4.2,390,Reviews,https://www.flipkart.com//lenovo-ideapad-3-cor...
4,HP Core i3 11th Gen - (8 GB/256 GB SSD/Windows...,"Rs.40,490",4.3,164,Reviews,https://www.flipkart.com//hp-core-i3-11th-gen-...
