## 1. Install package

In [None]:
!pip install selenium
!pip install webdriver_manager

## 2. Import packages

In [1]:
import json
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

## 3. Define functions 

In [2]:
def tiki_crawer(webpage):
    driver = webdriver.Chrome(ChromeDriverManager().install())
    driver.get(webpage)

    #get product_name
    product_name = driver.find_element_by_xpath('//*[@id="product-name"]').text
    #get product_url
    product_url = driver.current_url
    #get product info dictionary
    default_product = driver.execute_script('return defaultProduct')
    #current_seller is a key in default_product, get seller
    current_seller = default_product["current_seller"]
    seller_id = current_seller["store_id"]
    store_name = current_seller["name"]
    seller = {"store_id": seller_id, "name": store_name}
    #get brand_name
    brand_name = driver.find_element_by_class_name("item-brand").text.split(":")[-1][1] #Thương hiệu: abc xyz
    #get category
    cat_ele = driver.find_element_by_xpath("//ul[@class='breadcrumb']")
    cat = cat_ele.text.split("\n")[1:-1] #0: Trang chủ, -1: sản phẩm hiện tại
    #get item_id
    the_id = current_seller["product_id"]
    sku = current_seller["sku"]
    product_id = driver.execute_script('return masterProductId')
    item_id = {"id": int(the_id), "sku": sku, "product_id": product_id}
    #get price_original
    price_original = default_product["list_price"]
    #get price_sale
    price_sale = default_product["price"]
    #get discount
    discount = default_product["discount_percent"]
    #stock information dictionary
    stock_dict = default_product["stock_item"]
    #get stock
    stock = stock_dict["qty"]
    #get min_stock_sale
    min_stock_sale = stock_dict["min_sale_qty"]
    #get max_stock_sale
    max_stock_sale = stock_dict["max_sale_qty"]
    #result dictionary
    output = {"product_name": product_name, "product_url": product_url, "seller": seller, 
              "brand_name": brand_name, "cat": cat, "seller_id": seller_id, "item_id": item_id, 
              "price_original": price_original, "price_sale": price_sale, "discount": discount, 
              "stock": stock, "min_stock_sale": min_stock_sale, "max_stock_sale": max_stock_sale}
    return output

## 4. Start the crawling process

In [3]:
webpage = r"https://tiki.vn/p39089308.html" # edit me
output = tiki_crawer(webpage)


Looking for [chromedriver 85.0.4183.87 win32] driver in cache 
File found in cache by path [C:\Users\DELL\.wdm\drivers\chromedriver\85.0.4183.87\win32\chromedriver.exe]


In [4]:
print(output)

{'product_name': 'Sữa Bột Enfagrow A+ 4 (400g)', 'product_url': 'https://tiki.vn/p39089308.html', 'seller': {'store_id': 40395, 'name': 'Tiki Trading'}, 'brand_name': 'E', 'cat': ['Đồ Chơi - Mẹ & Bé', 'Dinh dưỡng cho bé', 'Sữa bột cho bé', 'Sữa cho bé dưới 24 tháng'], 'seller_id': 40395, 'item_id': {'id': 39089309, 'sku': '6204438643240', 'product_id': 39089308}, 'price_original': 236000, 'price_sale': 218000, 'discount': 8, 'stock': 18, 'min_stock_sale': 1, 'max_stock_sale': 30}


## 5. Write result to json

In [5]:
with open('output.json', 'w', encoding='utf-8') as file:
    json.dump(output, file, ensure_ascii=False)