In [11]:
from appium import webdriver
from appium.options.android import UiAutomator2Options
from appium.webdriver.common.appiumby import AppiumBy
from appium.webdriver.common.touch_action import TouchAction
from selenium.webdriver.common.action_chains import ActionChains, ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interaction

from bs4 import BeautifulSoup as bs
import re
import numpy as np
import pandas as pd
import time
from datetime import datetime

swipe_delay = 1
action_delay = 2
launch_delay = 3

# define swipe
def swipe(driver, start, end):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(start[0], start[1])
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.move_to_location(end[0], end[1])
    actions.w3c_actions.pointer_action.release()
    actions.perform()

def process_payfazz(rows, category, brand=None):
    if category in ['Uang Elektronik']:
        return (
            pd.DataFrame(rows, columns=['entry'])
            .assign(
                SKU = lambda x: x.entry.str.extract('^(.+)\\nRp.+\\nRp', expand=False),
                price = lambda x: x.entry.str.extract('^.+\\nRp (.+)\\nRp ', expand=False).str.replace('\.','', regex=True),
                admin_fee = lambda x: x.entry.str.extract('\\nRp ([0-9\.]+)\\nAtur$', expand=False).str.replace('\.','', regex=True),
                category = category, 
                brand=brand, 
            )
            .drop(columns=['entry'])
        )
    if category in ['Pulsa', 'Data', 'Voucher Game', 'Transfer Bank', 'Kirim Tunai', 'Tarik Tunai', 'Isi Deposit Aplikasi', 
                   'Bayar E-Commerce', 'Token PLN', 'Pulsa Pascabayar', 'Multifinance', 'Tagihan PLN', 'PDAM', 'Tagihan Gas', 
                   'TV Kabel Berlangganan', 'BPJS', 'PBB', 'TELKOM']:
        return (
            pd.DataFrame(rows, columns=['entry'])
            .assign(
                SKU = lambda x: x.entry.str.extract('^(.+)\\nRp ', expand=False),
                price = lambda x: x.entry.str.extract('Rp ([0-9\.]+)\\nAtur', expand=False).str.replace('\.','', regex=True),
                category = category, 
                brand = brand,
            )
            .drop(columns=['entry'])
        )



In [12]:
# dont forget to run "appium --allow-cors" in terminal

options = UiAutomator2Options()
options.automationName = 'UiAutomator2'
options.udid = 'emulator-5554'
options.platformName = 'Android'
options.platformVersion = '12'
options.deviceName = 'bwphone'

# setup the driver
driver = webdriver.Remote('http://127.0.0.1:4723', options=options)

In [13]:
# remove the warnings
import warnings
warnings.filterwarnings('once')

In [158]:
temp = [x for x in content1 if (str(x) != 'None') & ('Axis' in str(x))]

In [166]:
temp

['Axis\nAxis 5.000\nMasa Aktif 7 Hari \nRp 5.990\nPROMO - DC Axis 5.000\nAxis 5.000 - Bonus sd 1GB di aplikasi AXISnet (Masa berlaku bonus 1 hari)\nRp 6.120\nPromo\nAxis 10.000\nMasa Aktif 15 Hari \nRp 10.970\nRp 10.870\nPromo\nPROMO - DC Axis 10.000\nAxis 10.000 - Bonus sd 2GB di aplikasi AXISnet (Masa berlaku bonus 3 hari)\nRp 11.082\nRp 10.982\nPromo\nPROMO - DC Axis 15.000\nAxis 15.000 - Bonus sd 3GB di aplikasi AXISnet (Masa berlaku bonus 3 hari)\nRp 15.100\nRp 15.000\nPromo\nPROMO - DC Axis 25.000\nAxis 25.000 - Bonus sd 5GB di aplikasi AXISnet (Masa berlaku bonus 3 hari)\nRp 25.050\nRp 24.900\nPromo\nPROMO - DC Axis 30.000\nAxis 30.000 - Bonus sd 5GB di AXISnet (Masa berlaku bonus 3 hari)\nRp 29.990\nRp 29.890\nPromo\nPROMO - DC Axis 50.000\nAxis 50.000 - Bonus sd 3GB di aplikasi AXISnet (Masa berlaku bonus 3 hari)\nRp 49.990\nRp 49.840\nPromo\nPROMO - DC Axis 100.000\nAxis 100.000 - Bonus sd 5GB di aplikasi AXISnet(Masa berlaku bonus 3 hari)\nRp 99.500\nRp 99.350\nPROMO - DC Ax

In [186]:
(
    pd.DataFrame(temp, columns=['entry'])
    .assign(
        SKU = lambda x: x.entry.str.extract('^(?:\\nAxis)', expand=False),
        price = lambda x: x.entry.str.extract('Rp ([0-9\.]+)', expand=False).str.replace('\.','', regex=True),
        category = 'Pulsa', 
        brand = 'Axis',
    )
)

  price = lambda x: x.entry.str.extract('Rp ([0-9\.]+)', expand=False).str.replace('\.','', regex=True),


ValueError: pattern contains no capture groups

In [None]:
# keeps clicking back until reach home screen
while driver.current_activity != '.NexusLauncherActivity':
    driver.press_keycode(4)
    time.sleep(action_delay)

# open the payfazz app then wait
driver.activate_app('com.payfazz.android')
time.sleep(launch_delay)

# click the "Masuk tanpa daftar" text
driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value="Masuk tanpa Daftar").click()
time.sleep(action_delay)

# popup will appear. we will click back and if we are out then will just go back in
driver.press_keycode(4)
time.sleep(action_delay)
if driver.current_package != 'com.payfazz.android':
    driver.activate_app('com.payfazz.android')
    time.sleep(launch_delay)

# click the "Masuk tanpa daftar" text
driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value="Masuk tanpa Daftar").click()
time.sleep(action_delay)

# click pulsa 
driver.find_element(by=AppiumBy.XPATH, value="//android.view.View[@content-desc=\"%\nPulsa\"]/android.widget.ImageView").click()
time.sleep(action_delay)

coupon_types = ['Pulsa', 'Data', 'Token PLN', 'Voucher Game', 'Uang Elektronik', 'Kode Voucher Google Play']
results = []
for type_ in coupon_types:
    print(type_)
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.Button[@content-desc='{type_}']").click()
    swipe(driver, (450, 300), (300, 300))
    time.sleep(swipe_delay)
    rows = []
    while True:
        new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')]
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        swipe(driver, (500, 1400), (500, 800))
        time.sleep(swipe_delay)
        rows.extend(new_rows)
    rows = pd.Series(rows).dropna().drop_duplicates().tolist()
    rows = [i for i in rows if i not in ['Daftar Harga', 'Masuk sekarang juga!', 'Mulai Jualan Biar Makin Cuan']]
    
    for row in rows:
        driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value=row)
        time.sleep(action_delay)

        content = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')]
        content = [x for x in content if (str(x) != 'None') & ('Axis' in str(x))] 

        # while True:
        #     try:
        #         driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[@content-desc='{i}']").click()
        #         time.sleep(action_delay)
        #         break
        #     except:
        #         swipe(driver, (500, 1200), (500, 1400))
        #         time.sleep(swipe_delay)
        # content1 = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')]
        # driver.find_element(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "KETENTUAN")]').click()
        # content2 = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')][-1:]
        # results.append([type_] + content1 + content2)

        # go back
        # driver.press_keycode(4)
        # time.sleep(action_delay)


In [5]:
coupon_types = ['Pulsa', 'Data', 'Token PLN', 'Voucher Game', 'Uang Elektronik', 'Kode Voucher Google Play']


In [5]:
# keeps clicking back until reach home screen
while driver.current_activity != '.NexusLauncherActivity':
    driver.press_keycode(4)
    time.sleep(action_delay)

# open the payfazz app then wait
driver.activate_app('com.payfazz.android')
time.sleep(launch_delay)

# click the "Masuk tanpa daftar" text
driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value="Masuk tanpa Daftar").click()
time.sleep(action_delay)

# popup will appear. we will click back and if we are out then will just go back in
driver.press_keycode(4)
time.sleep(action_delay)
if driver.current_package != 'com.payfazz.android':
    driver.activate_app('com.payfazz.android')
    time.sleep(launch_delay)

# click the "Masuk tanpa daftar" text
driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value="Masuk tanpa Daftar").click()
time.sleep(action_delay)

### Scrap promotions
results = []
## Click on coupon
driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[@index='1']").click()

coupon_types = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.widget.Button')]
coupon_types = list(set(coupon_types).difference(['Semua']))
for type_ in coupon_types:
    print(type_)
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.Button[@content-desc='{type_}']").click()
    rows = []
    while True:
        new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.widget.ImageView')]
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        swipe(driver, (500, 1400), (500, 800))
        time.sleep(swipe_down_delay)
        rows.extend(new_rows)
    rows = pd.Series(rows).dropna().drop_duplicates().tolist()
    
    for k, i in enumerate(rows[::-1]):
        print(k)
        while True:
            try:
                driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[@content-desc='{i}']").click()
                time.sleep(action_delay)
                break
            except:
                swipe(driver, (500, 1200), (500, 1400))
                time.sleep(swipe_up_delay)
        content1 = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')]
        driver.find_element(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "KETENTUAN")]').click()
        content2 = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')][-1:]
        results.append([type_] + content1 + content2)

        driver.press_keycode(4)
        time.sleep(action_delay)
        
driver.press_keycode(4)
time.sleep(action_delay)
results = (
    pd.DataFrame(results)
    [[0,7,8,13,14]]
    .assign(
        first_detected = datetime.now().strftime('%Y-%m-%d'),
        Platform = 'payfazz'
    )
    .rename(columns={
        0:'Product',
        7:'Header',
        8:'Expiry',
        13:'Description',
        14:'Terms & Condition',
        'first_detected':'First detected',
    })
)
results.to_pickle('data/google_scrapping/payfazz_promo.pkl')

results = []
### Scroll to prabayar and click on atur harga jual
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='Alat Warung']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='Atur Harga Jual']").click()

##### scrap finance products
keuangan = ['Transfer Bank', 'Kirim Tunai', 'Tarik Tunai', 'Isi Deposit Aplikasi', 'Bayar E-Commerce']
for category in keuangan:
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='{category}']").click()
    time.sleep(action_delay)
    rows = []
    while True:
        new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "Rp")]')]
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        rows.extend(new_rows)

    driver.press_keycode(4)
    time.sleep(action_delay)
    results.append(process_payfazz(rows, category))

# scroll to prabayar
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='Prabayar']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)

#### Scrap prabayar
prabayar = ['Uang Elektronik', 'Pulsa', 'Data', 'Voucher Game']#, 'Token PLN'
for category in prabayar:
    print(f'click on {category}')
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='{category}']").click()
    time.sleep(action_delay)

    # identify brands within products
    print('search brands')
    brands = []
    while True:
        source = driver.page_source
        soup = bs(source, 'html')
        new_brands = [i['content-desc'].replace('\n', ' ') for i in list(soup.find_all(attrs={'content-desc': re.compile(r'^.+$')}))]
        if set(new_brands).intersection(brands) == set(new_brands):
            break
        swipe(driver, (500, 1400), (500, 700))
        time.sleep(swipe_down_delay)
        brands.extend(new_brands)
    print(brands)

    # scrap each SKU 
    for brand in pd.Series(brands).loc[lambda x: x!=category].drop_duplicates().tolist():
        # print(brand)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[contains(@text, 'Cari')]")
        el.click()
        el.send_keys(brand)
        time.sleep(action_delay)
        driver.press_keycode(4)
        time.sleep(action_delay)

        rows = []
        cat_item = driver.find_elements(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[contains(@content-desc, '{brand}')]")
        for item in cat_item:
            item.click()
            time.sleep(action_delay)
            while True:
                new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "Rp")]')]
                # print(new_rows)
                if set(new_rows).intersection(rows) == set(new_rows):
                    break
                swipe(driver, (500, 1400), (500, 1000))
                time.sleep(swipe_down_delay)
                rows.extend(new_rows)
            # process result
            results.append(process_payfazz(rows, category, brand))
            # go back
            driver.press_keycode(4)
            time.sleep(action_delay)
            # print(f'{brand} complete')
    
    driver.press_keycode(4)
    time.sleep(action_delay)
    driver.press_keycode(4)
    time.sleep(action_delay)

category = 'Token PLN'
print(f'click on {category}')
driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='{category}']").click()
time.sleep(action_delay)
rows = []
while True:
    new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "Rp")]')]
    if set(new_rows).intersection(rows) == set(new_rows):
        break
    swipe(driver, (500, 1400), (500, 800))
    time.sleep(swipe_down_delay)
    rows.extend(new_rows)
# process result
results.append(process_payfazz(rows, category))
# go back
driver.press_keycode(4)
time.sleep(action_delay)

#### Scrap pascabayar
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='Pascabayar']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)

pascabayar = ['Pulsa Pascabayar', 'Multifinance', 'Tagihan PLN', 'PDAM', 'Tagihan Gas', 'TV Kabel Berlangganan', 'BPJS', 'PBB', 'TELKOM']
for category in pascabayar:
    print(f'click on {category}')
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='{category}']").click()
    time.sleep(action_delay)
    
    rows = []
    while True:
        new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "Rp")]')]
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        swipe(driver, (500, 1400), (500, 800))
        time.sleep(swipe_down_delay)
        rows.extend(new_rows)
    # process result
    results.append(process_payfazz(rows, category))
    # go back
    driver.press_keycode(4)
    time.sleep(action_delay)

driver.press_keycode(4)
time.sleep(action_delay)

results = (
    pd.concat(results)
    .drop_duplicates()
    .assign(
        scrap_date = datetime.now().strftime('%Y-%m-%d'),
        platform = 'payfazz',
    )
)
results.to_pickle('data/google_scrapping/payfazz.pkl')
results


Kendala Nomor Handphone?


StaleElementReferenceException: Message: Cached elements 'By.xpath: //android.widget.ImageView' do not exist in DOM anymore; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
io.appium.uiautomator2.common.exceptions.StaleElementReferenceException: Cached elements 'By.xpath: //android.widget.ImageView' do not exist in DOM anymore
	at io.appium.uiautomator2.model.ElementsCache.restore(ElementsCache.java:78)
	at io.appium.uiautomator2.model.ElementsCache.get(ElementsCache.java:153)
	at io.appium.uiautomator2.handler.GetElementAttribute.safeHandle(GetElementAttribute.java:23)
	at io.appium.uiautomator2.handler.request.SafeRequestHandler.handle(SafeRequestHandler.java:59)
	at io.appium.uiautomator2.server.AppiumServlet.handleRequest(AppiumServlet.java:277)
	at io.appium.uiautomator2.server.AppiumServlet.handleHttpRequest(AppiumServlet.java:271)
	at io.appium.uiautomator2.http.ServerHandler.channelRead(ServerHandler.java:68)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:366)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:352)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:345)
	at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:366)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:352)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:345)
	at io.netty.channel.CombinedChannelDuplexHandler$DelegatingChannelHandlerContext.fireChannelRead(CombinedChannelDuplexHandler.java:435)
	at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:293)
	at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:267)
	at io.netty.channel.CombinedChannelDuplexHandler.channelRead(CombinedChannelDuplexHandler.java:250)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:366)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:352)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:345)
	at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:266)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:366)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:352)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:345)
	at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:366)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:352)
	at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)
	at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)
	at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:611)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:552)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:466)
	at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:438)
	at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140)
	at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
	at java.lang.Thread.run(Thread.java:920)
