In [1]:
# ---- SCRAPE BUKUWARUNG -----

# Preparation #
from appium import webdriver
from appium.options.android import UiAutomator2Options
from appium.webdriver.common.appiumby import AppiumBy
from appium.webdriver.common.touch_action import TouchAction
from selenium.webdriver.common.action_chains import ActionChains, ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interaction

from bs4 import BeautifulSoup as bs
import re
import numpy as np
import pandas as pd
import time
from datetime import datetime

swipe_delay = .5
back_delay = 1
action_delay = 1
launch_delay = 2

def swipe(driver, start, end):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(start[0], start[1])
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.move_to_location(end[0], end[1])
    actions.w3c_actions.pointer_action.release()
    actions.perform()

def start_app(app):
    # open the app then close it again to deal with the pop up then reopen
    driver.activate_app(app)
    time.sleep(launch_delay)
    driver.press_keycode(187)
    time.sleep(back_delay)
    swipe(driver, (500, 1250), (500, 750))
    time.sleep(swipe_delay)
    driver.activate_app(app)
    time.sleep(launch_delay)

def process_data(rows, type, brand):
    return (
        pd.DataFrame(rows, columns=['SKU', 'price'])
        .assign(
            price = lambda x: x.price.str.replace('\.', '', regex=True),
            type = type,
            brand = brand
        )
    )
    

In [5]:
# DRIVER #
options = UiAutomator2Options()
options.automationName = 'UiAutomator2'
options.udid = 'emulator-5554'
options.platformName = 'Android'
options.platformVersion = '12'
options.deviceName = 'bwphone'

driver = webdriver.Remote('http://127.0.0.1:4723', options=options)

In [6]:
# start the app
start_app('com.bukuwarung')

# popup will appear. we will click back and if we are out then will just go back in
driver.press_keycode(4)
time.sleep(back_delay)
if driver.current_package != 'com.bukuwarung':
    driver.activate_app('com.bukuwarung')
    time.sleep(launch_delay)

# click the "Lihat Semua" button
driver.find_element(by=AppiumBy.ID, value='com.bukuwarung:id/others_img').click()
time.sleep(action_delay)

# swipe up to find the "Atur Harga Jual" button
while True:
    try:
        swipe(driver, (350, 1750), (350, 1300)) # swipe up to open the bottom drawer
        time.sleep(swipe_delay) # wait for some time
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[contains(@text, 'Atur Harga')]")
        if el.location['y'] > 1200: # this is checking that we are clicking the correct box, since the correct one is located below
            break
    except:
        swipe(driver, (350, 1750), (350, 1300))
        time.sleep(swipe_delay)

# click the "Atur Harga Jual" button
driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[contains(@text, 'Atur Harga')]").click()
time.sleep(action_delay)

# scrape the data for each ppob types
ppob_types = ['Pulsa', 'Listrik', 'eMoney', 'Paket Data', "Voucher Game"]
results = []
for ppob in ppob_types:
    # delay for switch between ppob types
    time.sleep(action_delay)
    # choose the ppob type thru the filter
    driver.find_element(by=AppiumBy.ID, value='com.bukuwarung:id/iv_filter').click()
    time.sleep(action_delay)
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='{ppob}']").click()
    time.sleep(action_delay)

    brands = []
    while True:
        new_brands = [tuple(re.findall('text="(.+)"', str(i)))[0] for i in bs(driver.page_source, 'html.parser').find_all(attrs={'resource-id':'com.bukuwarung:id/tv_name'})]
        if set(new_brands).intersection(brands) == set(new_brands):
            break
        for new_brand in new_brands:
            # first brand is automatically opened
            if new_brands.index(new_brand) == 0:
                try:
                    items = []
                    while True:
                        new_items = [tuple(re.findall('text="(.+)"', str(i)))[:2] for i in bs(driver.page_source, 'html.parser').find_all(attrs={'resource-id':'com.bukuwarung:id/cl_layout'})]
                        if set(new_items).intersection(items) == set(new_items):
                            break
                        swipe(driver, (500, 1400), (500, 1000))
                        time.sleep(swipe_delay)
                        items.extend(new_items)
                    results.append(process_data(items, type=f"{ppob}", brand=f"{new_brand}"))
                except:
                    time.sleep(swipe_delay)
                    continue
            # others need to be clicked
            else:
                try:
                    driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='{new_brand}']").click()
                    time.sleep(action_delay)
                    items = []
                    while True:
                        new_items = [tuple(re.findall('text="(.+)"', str(i)))[:2] for i in bs(driver.page_source, 'html.parser').find_all(attrs={'resource-id':'com.bukuwarung:id/cl_layout'})]
                        if set(new_items).intersection(items) == set(new_items):
                            break
                        swipe(driver, (500, 1400), (500, 1000))
                        time.sleep(swipe_delay)
                        items.extend(new_items)
                    results.append(process_data(items, type=f"{ppob}", brand=f"{new_brand}"))
                except:
                    time.sleep(swipe_delay)
                    continue
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_delay)
        brands.extend(new_brands)

results = (
    pd.concat(results)
    .drop_duplicates()
    .dropna() # remove NaN
)

results = results.loc[lambda x: x.price != "Atur Harga"]
results






Unnamed: 0,SKU,price,type,brand
0,Telkomsel 15.000,Rp15115,Pulsa,Telkomsel
1,Telkomsel 25.000,Rp24900,Pulsa,Telkomsel
2,Telkomsel 100.000,Rp98100,Pulsa,Telkomsel
6,Telkomsel 5.000,Rp5350,Pulsa,Telkomsel
7,Telkomsel 1.000.000,Rp990900,Pulsa,Telkomsel
...,...,...,...,...
1,6480 Genesis Crystal,Rp1648900,Voucher Game,Genshin Impact Powered by Google Play
2,980 Genesis Crystal,Rp273900,Voucher Game,Genshin Impact Powered by Google Play
3,300 Genesis Crystal,Rp86900,Voucher Game,Genshin Impact Powered by Google Play
4,1980 Genesis Crystal,Rp526900,Voucher Game,Genshin Impact Powered by Google Play


In [7]:
# results = (
#     pd.concat(results)
#     .drop_duplicates()
#     .dropna() # remove NaN
# )

# results = results.loc[lambda x: x.price != "Atur Harga"]
# results