In [2]:
from appium import webdriver
from appium.options.android import UiAutomator2Options
from appium.webdriver.common.appiumby import AppiumBy
from appium.webdriver.common.touch_action import TouchAction
from selenium.webdriver.common.action_chains import ActionChains, ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interaction

from bs4 import BeautifulSoup as bs
import re
import numpy as np
import pandas as pd
import time
from datetime import datetime

swipe_delay = 1
action_delay = 5
launch_delay = 3

# define swipe
def swipe(driver, start, end):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(start[0], start[1])
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.move_to_location(end[0], end[1])
    actions.w3c_actions.pointer_action.release()
    actions.perform()

def click(driver, x, y):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(x, y)
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.pause(0.1)
    actions.w3c_actions.pointer_action.release()
    actions.perform()

def process_bukalapak(rows, category, brand):
    return (
        pd.DataFrame(rows)
        .rename(columns={
            0:'SKU', 
            1:'price',
            2:'note',
        })
        .assign(
            price = lambda x: x.price.str.replace('[Rp\.]','', regex=True),
            category = category,
            brand = brand,
        )
    )

In [8]:
# dont forget to run "appium --allow-cors" in terminal

options = UiAutomator2Options()
options.automationName = 'UiAutomator2'
options.udid = 'emulator-5554'
options.platformName = 'Android'
options.platformVersion = '12'
options.deviceName = 'bwphone'

# setup the driver
driver = webdriver.Remote('http://127.0.0.1:4723', options=options)

In [4]:
# remove the warnings
import warnings
warnings.filterwarnings('once')

In [5]:
# keeps clicking back until reach home screen
while driver.current_activity != '.NexusLauncherActivity':
    driver.press_keycode(5)
    time.sleep(action_delay)

# open the bukalapak app then wait
driver.activate_app('com.bukalapak.mitra')
time.sleep(launch_delay)

# click back
driver.press_keycode(4)
time.sleep(action_delay)

# click the burger icon
click(driver, 1000, 150)
time.sleep(action_delay)
# swipe up
swipe(driver, (500, 1400), (500, 800))
time.sleep(swipe_delay)
# click "Harga Jual Produk Virtual"
driver.find_element(by=AppiumBy.XPATH, value="/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.view.ViewGroup/android.widget.FrameLayout/android.widget.LinearLayout/androidx.recyclerview.widget.RecyclerView/android.widget.FrameLayout[9]/android.widget.LinearLayout").click()
time.sleep(action_delay)

products = {1:"Pulsa", 2:"Paket Data", 3:"Token Listrik", 4:'Top Up Digital'}
for prod in products:
    # click the product then wait
    driver.find_element(by=AppiumBy.XPATH, value=f"/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/android.widget.LinearLayout[2]/androidx.recyclerview.widget.RecyclerView/android.widget.FrameLayout[{prod}]/android.widget.LinearLayout/android.widget.LinearLayout/android.widget.LinearLayout/android.widget.TextView").click()
    time.sleep(action_delay)

    rows = []
    sku_list = []
    while True:
        source = driver.page_source
        soup = bs(source, 'html')
        new_rows = pd.concat([pd.Series(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))], axis=1).T
        rows.append(new_rows)
        sku_list.extend(new_rows[0])
        if set(new_rows[0]).intersection(sku_list) == set(new_rows[0]):
            break
        swipe(driver, (500, 1400), (500, 800))
        time.sleep(swipe_delay)
        
# -- PULSA
# click the product then wait
driver.find_element(by=AppiumBy.XPATH, value="/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/android.widget.LinearLayout[2]/androidx.recyclerview.widget.RecyclerView/android.widget.FrameLayout[1]/android.widget.LinearLayout/android.widget.LinearLayout/android.widget.LinearLayout/android.widget.TextView").click()
time.sleep(action_delay)

rows = []
sku_list = []
while True:
    source = driver.page_source
    soup = bs(source, 'html')
    new_rows = pd.concat([pd.Series(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))], axis=1).T
    rows.append(new_rows)
    sku_list.extend(new_rows[0])
    if set(new_rows[0]).intersection(sku_list) == set(new_rows[0]):
        break
    swipe(driver, (500, 1400), (500, 800))
    time.sleep(swipe_delay)





KeyboardInterrupt: 

In [5]:
products = {1:"Pulsa", 2:"Paket Data", 3:"Token Listrik", 4:'Top Up Digital'}
for prod in products:
    print(prod)

1
2
3
4


In [9]:
source = driver.page_source
soup = bs(source, 'html')
new_rows = pd.concat([pd.Series(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))], axis=1).T



In [5]:
### OLD CODE - REPLACED IN SEPT 14, 2023
# # keeps clicking back until reach home screen
# while driver.current_activity != '.NexusLauncherActivity':
#     driver.press_keycode(4)
#     time.sleep(action_delay)

# # open the bukalapak app then wait
# driver.activate_app('com.bukalapak.mitra')
# time.sleep(launch_delay)

# # dismiss login
# driver.find_element(by=AppiumBy.XPATH, value='//android.widget.TextView[@text="Gabung sekarang!"]').click()
# time.sleep(action_delay)
# driver.find_element(by=AppiumBy.XPATH, value='//android.widget.TextView[@text="Lewati"]').click()
# time.sleep(action_delay)

# # press pulsa -> access to pulsa, paket, token listrik
# driver.find_element(by=AppiumBy.XPATH, value='//android.widget.TextView[@text="Pulsa"]').click()
# time.sleep(action_delay)

# results = []
# for category in ['Pulsa', 'Paket Data']:
#     driver.find_element(by=AppiumBy.XPATH, value=f'//android.widget.TextView[@text="{category}"]').click()    

#     brands = [i.text for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.widget.LinearLayout/android.widget.TextView')]
#     for brand in brands:
#         print(f'scrap {brand}')
#         driver.find_element(by=AppiumBy.XPATH, value=f'//android.widget.LinearLayout/android.widget.TextView[@text="{brand}"]').click()

#         rows = []
#         while True:
#             source = driver.page_source
#             soup = bs(source, 'html.parser')
#             new_rows = [tuple(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))]
#             if set(new_rows).intersection(rows) == set(new_rows):
#                 break
#             swipe(driver, (500, 1400), (500, 800))
#             time.sleep(swipe_down_delay)
#             rows.extend(new_rows)
#         results.append(process_bukalapak(rows, category, brand))

#         while True:
#             try:
#                 el = driver.find_element(by=AppiumBy.XPATH, value=f'//android.widget.LinearLayout/android.widget.TextView[@text="{brand}"]')
#                 time.sleep(action_delay)
#                 el.click()
#                 break
#             except:
#                 swipe(driver, (500, 800), (500, 1400))
#                 time.sleep(swipe_up_delay)

# driver.find_element(by=AppiumBy.XPATH, value=f'//android.widget.TextView[@text="Token Listrik"]').click()
# time.sleep(action_delay)
# rows = []
# while True:
#     source = driver.page_source
#     soup = bs(source, 'html.parser')
#     new_rows = [tuple(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))]
#     if set(new_rows).intersection(rows) == set(new_rows):
#         break
#     swipe(driver, (500, 1400), (500, 800))
#     time.sleep(swipe_down_delay)
#     rows.extend(new_rows)
# results.append(process_bukalapak(rows, category='Token Listrik', brand=None))

# results_df = (
#     pd.concat(results)
#     .drop_duplicates()
#     .assign(
#         platform = 'mitra bukalapak',
#     )
# )

NoSuchElementException: Message: An element could not be located on the page using the given search parameters.; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
NoSuchElementError: An element could not be located on the page using the given search parameters.
    at AndroidUiautomator2Driver.findElOrEls (C:\Users\bukuw\.appium\node_modules\appium-uiautomator2-driver\node_modules\appium-android-driver\lib\commands\find.js:75:11)
    at processTicksAndRejections (node:internal/process/task_queues:95:5)
    at AndroidUiautomator2Driver.findElOrElsWithProcessing (C:\Users\bukuw\AppData\Roaming\npm\node_modules\appium\node_modules\@appium\base-driver\lib\basedriver\commands\find.ts:60:12)
    at AndroidUiautomator2Driver.findElement (C:\Users\bukuw\AppData\Roaming\npm\node_modules\appium\node_modules\@appium\base-driver\lib\basedriver\commands\find.ts:75:12)

In [5]:
from datetime import date
today = date.today().strftime("%Y_%m_%d")
results_df.to_csv(f'result/bukalapak_scrape_{today}.csv')