In [29]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import pandas as pd

In [30]:
# utility to print any error passed
def log_error(e):
    print(e)

In [31]:
# check if the response returned is good or not
def is_good_response(resp):
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 and content_type is not None and content_type.find('html') > -1)

In [32]:
# utility to get html content from a given url
def simple_get(url):
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else :
                return None
    except RequestException as re:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None

In [33]:
# JioMart data scraping - Dals and Pulses

raw_html_dp = simple_get('https://www.jiomart.com/category/staples/dals-pulses')

# get html parsed using Beautiful soup
html_soup = BeautifulSoup(raw_html_dp, 'html.parser')
print(html_soup)

<!DOCTYPE html>
 <html lang="en-US"> <head> <title>Dals &amp; Pulses - JioMart</title> <meta charset="utf-8"/> <meta content="INDEX,FOLLOW" name="robots"/> <meta content="Buy Grocery Online in Mumbai, Pune, Bangalore at Reliance Smart online grocery store. Best price on fresh fruits &amp; vegetables, dairy &amp; bakery, packaged food." name="description"> <meta content="" name="keywords"> <meta content="width=device-width, height=device-height, initial-scale=1.0, maximum-scale=1.0, user-scalable=0" name="viewport"/> <meta content="JioMart" property="og:site_name"> <meta content="website" property="og:type"/> <meta content="Dals &amp; Pulses - JioMart" property="og:title"/> <meta content="Buy Grocery Online in Mumbai, Pune, Bangalore at Reliance Smart online grocery store. Best price on fresh fruits &amp; vegetables, dairy &amp; bakery, packaged food." property="og:description"/> <meta content="https://www.jiomart.com/category/staples/dals-pulses" property="og:url"/> <meta content="http

In [34]:
containers = html_soup.findAll("div", {"class" : "col-md-3 p-0"})
print (BeautifulSoup.prettify(containers[0]))

<div class="col-md-3 p-0">
 <div class="cat-item">
  <a class="category_name" href="https://www.jiomart.com/good-life-dal-mix-500-gm-491187271" title="Good Life Dal Mix 500 gm">
   <span class="cat-img">
    <img alt="Good Life Dal Mix 500 gm" class="product-image-photo" src="https://www.jiomart.com/images/product/150x150/491187271/good-life-dal-mix-500-gm-pp-0-20200518.jpg"/>
   </span>
   <span class="dis_section">
    <span>
     21
     <span class="per_txt">
      %
     </span>
    </span>
    <br/>
    off
   </span>
   <span class="clsgetname">
    Good Life Dal Mix 500 gm
   </span>
  </a>
  <a href="https://www.jiomart.com/manufacturers/private-label" title="Private Label">
   <span class="drug-varients ellipsis">
    Mfr: Private Label
   </span>
  </a>
  <div class="clearfix">
  </div>
  <span class="price-box">
   <strike id="price">
    Rs. 75.00
   </strike>
   <span id="final_price">
    Rs. 59.00
   </span>
  </span>
  <div class="cart_btn">
   <form class="cart-form">

In [35]:
filename = "JioMart_1.csv"
f = open(filename, "w")
headers = "product, price, final_price\n"
f.write(headers)

28

In [36]:
for container in containers:
    product = container.div.img["alt"]
    
    price_container = container.findAll("strike", {"id" : "price"})
    price = price_container[0].text.strip()
    
    final_price_container = container.findAll("span", {"id" : "final_price"})
    final_price = final_price_container[0].text.strip()
    
    #print ("Product:" + product)
    #print ("Price:" + price)
    #print ("Final Price:" + final_price)
    
    print (product + "," + price + "," + final_price + "\n")
    f.write(product + "," + price + "," + final_price + "\n")
    
f.close()

Good Life Dal Mix 500 gm,Rs. 75.00,Rs. 59.00

Plain Toor Dal 2 kg,Rs. 280.00,Rs. 190.00

Raw Peanuts 1 Kg (Loose),Rs. 127.00,Rs. 125.00

Good Life Raw Peanuts 1 kg,Rs. 180.00,Rs. 160.00

Good Life Toor Dal 1 kg,Rs. 137.00,Rs. 108.00

Loose Toor Dal Value 1 kg,Rs. 101.00,Rs. 95.00

Good Life Moong Whole 500 gm,Rs. 86.00,Rs. 75.00

Plain Moong Dal 2 kg,Rs. 320.00,Rs. 260.00

Good Life Urad Dal Chhilka 500 gm,Rs. 79.00,Rs. 68.00

Loose Chana Dal 1 kg,Rs. 68.00,Rs. 61.00

Pick N Cook Maha Toor Dal 1 kg,Rs. 144.00,Rs. 120.00

Good Life Chana Dal 500 gm,Rs. 49.00,Rs. 40.00

Loose Moong Dal 1 kg,Rs. 132.00,Rs. 130.00

Good Life Toor Dal 500 gm,Rs. 70.00,Rs. 56.00

Good Life Roasted Split Bengal Gram 200 gm,Rs. 35.00,Rs. 25.00

Plain Chana Dal 2 kg,Rs. 180.00,Rs. 122.00

Pick N Cook Regular Kabuli Chana 500 gm,Rs. 70.00,Rs. 63.00

Tata Sampann High Protein Unpolished Moong Dal 1 kg,Rs. 190.00,Rs. 176.70

Good Life Masoor Dal 500 gm,Rs. 62.00,Rs. 55.00



In [37]:
# JioMart - Vegetables
raw_html_veg = simple_get('https://www.jiomart.com/category/fruits-vegetables/fresh-vegetables')
html_soup_veg = BeautifulSoup(raw_html_veg, 'html.parser')
print(html_soup_veg)

<!DOCTYPE html>
 <html lang="en-US"> <head> <title>Fresh Vegetables - JioMart</title> <meta charset="utf-8"/> <meta content="INDEX,FOLLOW" name="robots"/> <meta content="Buy Grocery Online in Mumbai, Pune, Bangalore at Reliance Smart online grocery store. Best price on fresh fruits &amp; vegetables, dairy &amp; bakery, packaged food." name="description"> <meta content="" name="keywords"> <meta content="width=device-width, height=device-height, initial-scale=1.0, maximum-scale=1.0, user-scalable=0" name="viewport"/> <meta content="JioMart" property="og:site_name"> <meta content="website" property="og:type"/> <meta content="Fresh Vegetables - JioMart" property="og:title"/> <meta content="Buy Grocery Online in Mumbai, Pune, Bangalore at Reliance Smart online grocery store. Best price on fresh fruits &amp; vegetables, dairy &amp; bakery, packaged food." property="og:description"/> <meta content="https://www.jiomart.com/category/fruits-vegetables/fresh-vegetables" property="og:url"/> <meta 

In [38]:
containers_veg = html_soup_veg.findAll("div", {"class" : "col-md-3 p-0"})
print (BeautifulSoup.prettify(containers_veg[0]))

<div class="col-md-3 p-0">
 <div class="cat-item">
  <a class="category_name" href="https://www.jiomart.com/potato-per-kg-590000090" title="Potato per Kg">
   <span class="cat-img">
    <img alt="Potato per Kg" class="product-image-photo" src="https://www.jiomart.com/images/product/150x150/590000090/potato-kg-0-20200518.jpg"/>
   </span>
   <span class="clsgetname">
    Potato per Kg
   </span>
  </a>
  <a href="https://www.jiomart.com/manufacturers/na" title="-NA-">
   <span class="drug-varients ellipsis">
    Mfr: -NA-
   </span>
  </a>
  <div class="clearfix">
  </div>
  <span class="price-box">
   <span id="final_price">
    Rs. 27.00
   </span>
  </span>
  <div class="cart_btn">
   <form class="cart-form">
    <input class="cart_qty" name="qty" type="hidden" value="1"/>
    <input name="sku" type="hidden" value="590000090"/>
    <button class="toCart cartbag" data-change="no" data-sku="590000090" title="ADD TO CART" type="submit">
     Add to Cart
    </button>
   </form>
  </div>

In [39]:
filename = "JioMart_2.csv"
f = open(filename, "w")

In [40]:
for container in containers:
    product = container.div.img["alt"]

    #     price_container = container.findAll("strike", {"id" : "price"})
    #     price = price_container[0].text.strip()

    final_price_container = container.findAll("span", {"id" : "final_price"})
    final_price = final_price_container[0].text

    #     print ("Product:" + product)
    #     print ("Final Price:" + final_price)

    #     print (final_price)
    print(product + ","  + final_price + "\n")
    f.write(product + "," + price + "," + final_price + "\n")
    
f.close()

Good Life Dal Mix 500 gm,Rs. 59.00

Plain Toor Dal 2 kg,Rs. 190.00

Raw Peanuts 1 Kg (Loose),Rs. 125.00

Good Life Raw Peanuts 1 kg,Rs. 160.00

Good Life Toor Dal 1 kg,Rs. 108.00

Loose Toor Dal Value 1 kg,Rs. 95.00

Good Life Moong Whole 500 gm,Rs. 75.00

Plain Moong Dal 2 kg,Rs. 260.00

Good Life Urad Dal Chhilka 500 gm,Rs. 68.00

Loose Chana Dal 1 kg,Rs. 61.00

Pick N Cook Maha Toor Dal 1 kg,Rs. 120.00

Good Life Chana Dal 500 gm,Rs. 40.00

Loose Moong Dal 1 kg,Rs. 130.00

Good Life Toor Dal 500 gm,Rs. 56.00

Good Life Roasted Split Bengal Gram 200 gm,Rs. 25.00

Plain Chana Dal 2 kg,Rs. 122.00

Pick N Cook Regular Kabuli Chana 500 gm,Rs. 63.00

Tata Sampann High Protein Unpolished Moong Dal 1 kg,Rs. 176.70

Good Life Masoor Dal 500 gm,Rs. 55.00

