In [1]:
from appium import webdriver
from appium.options.android import UiAutomator2Options
from appium.webdriver.common.appiumby import AppiumBy
from appium.webdriver.common.touch_action import TouchAction
from selenium.webdriver.common.action_chains import ActionChains, ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interaction

from bs4 import BeautifulSoup as bs
import re
import numpy as np
import pandas as pd
import time
from datetime import datetime

swipe_delay = 1
action_delay = 2
launch_delay = 3

# define swipe
def swipe(driver, start, end):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(start[0], start[1])
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.move_to_location(end[0], end[1])
    actions.w3c_actions.pointer_action.release()
    actions.perform()

# def process_payfazz(rows, category, brand=None):
#     if category in ['Uang Elektronik']:
#         return (
#             pd.DataFrame(rows, columns=['entry'])
#             .assign(
#                 SKU = lambda x: x.entry.str.extract('^(.+)\\nRp.+\\nRp', expand=False),
#                 price = lambda x: x.entry.str.extract('^.+\\nRp (.+)\\nRp ', expand=False).str.replace('\.','', regex=True),
#                 admin_fee = lambda x: x.entry.str.extract('\\nRp ([0-9\.]+)\\nAtur$', expand=False).str.replace('\.','', regex=True),
#                 category = category, 
#                 brand=brand, 
#             )
#             .drop(columns=['entry'])
#         )
#     if category in ['Pulsa', 'Data', 'Voucher Game', 'Transfer Bank', 'Kirim Tunai', 'Tarik Tunai', 'Isi Deposit Aplikasi', 
#                    'Bayar E-Commerce', 'Token PLN', 'Pulsa Pascabayar', 'Multifinance', 'Tagihan PLN', 'PDAM', 'Tagihan Gas', 
#                    'TV Kabel Berlangganan', 'BPJS', 'PBB', 'TELKOM']:
#         return (
#             pd.DataFrame(rows, columns=['entry'])
#             .assign(
#                 SKU = lambda x: x.entry.str.extract('^(.+)\\nRp ', expand=False),
#                 price = lambda x: x.entry.str.extract('Rp ([0-9\.]+)\\nAtur', expand=False).str.replace('\.','', regex=True),
#                 category = category, 
#                 brand = brand,
#             )
#             .drop(columns=['entry'])
#         )

def extract_five(stri):
    return " ".join(re.findall(r'\w+', stri)[:5])

def process_payfazz(results, brand, typ):
    # temp = [item for sublist in results for item in sublist]
    tempdf =\
    (
        pd.DataFrame({'raw': results})
        .drop_duplicates()
        .assign(
            SKU = lambda x: x.raw.str.extract('^(.+)- '),
            SKU_2 = lambda x: x.raw.map(extract_five),
            price = lambda x: x.raw.str.extract('Rp ([0-9\.]+)'),
            brand = brand,
            type = typ,
            note = lambda x: x.raw.str.extract('- (.+)'),
        )
        # fill empty SKU with SKU_2
        .assign(
            SKU = lambda x: x.SKU.fillna(x.SKU_2)
        )
        .drop(
            columns=['raw', 'SKU_2']
        )
    ) 
    return tempdf

In [2]:
# dont forget to run "appium --allow-cors" in terminal

options = UiAutomator2Options()
options.automationName = 'UiAutomator2'
options.udid = 'emulator-5554'
options.platformName = 'Android'
options.platformVersion = '12'
options.deviceName = 'bwphone'

# setup the driver
driver = webdriver.Remote('http://127.0.0.1:4723', options=options)

In [3]:
# remove the warnings
import warnings
warnings.filterwarnings('once')

In [4]:
# keeps clicking back until reach home screen
# while driver.current_activity != '.NexusLauncherActivity':
#     driver.press_keycode(4)
#     time.sleep(action_delay)

# # open the payfazz app then wait
# driver.activate_app('com.payfazz.android')
# time.sleep(launch_delay)

# # click the "Masuk tanpa daftar" text
# driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value="Masuk tanpa Daftar").click()
# time.sleep(action_delay)

# # popup will appear. we will click back and if we are out then will just go back in
# driver.press_keycode(4)
# time.sleep(action_delay)
# if driver.current_package != 'com.payfazz.android':
#     driver.activate_app('com.payfazz.android')
#     time.sleep(launch_delay)

# # click the "Masuk tanpa daftar" text
# driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value="Masuk tanpa Daftar").click()
# time.sleep(action_delay)

# # click pulsa 
# driver.find_element(by=AppiumBy.XPATH, value="//android.view.View[@content-desc=\"%\nPulsa\"]/android.widget.ImageView").click()
# time.sleep(action_delay)



coupon_types = ['Pulsa', 'Data', 'Token PLN', 'Voucher Game', 'Uang Elektronik', 'Kode Voucher Google Play']
results = []
for type_ in coupon_types:
    print(type_)
    driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value=type_).click()
    time.sleep(action_delay)
    # swipe the category part
    swipe(driver, (450, 300), (300, 300))
    time.sleep(swipe_delay)

    brands = []
    while True:
        # list all available brands on current display into 'new_rows'
        new_brands = pd.Series([i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')]).dropna().drop_duplicates().tolist()
        
        # if the current available brands are already listed, break the loop
        if set(new_brands).intersection(brands) == set(new_brands):
            break

        # add all found brands into 'rows' and clean it into 'new_brands'
        brands.extend(new_brands)
        brands = pd.Series(brands).dropna().drop_duplicates().tolist() 

        # loop for each brand
        for new_brand in [i for i in new_brands if i not in ['Daftar Harga', 'Masuk sekarang juga!', 'Mulai Jualan Biar Makin Cuan']]:
            # click the brand box
            driver.find_element(by=AppiumBy.ACCESSIBILITY_ID, value=new_brand).click()
            time.sleep(action_delay)

            # scrape the new xpath
            content_raw = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')]
            content_raw = [x for x in content_raw if (str(x) != 'None') & (new_brand in str(x))] 
            content_list = ''.join(content_raw).replace('Promo', '').replace('PROMO - DC ', '').replace('\nM', ' - M').replace(f'\n{new_brand[0]}', f'\n{new_brand[0] + new_brand[0]}').split(f'\n{new_brand[0]}')
            content_list = [c for c in content_list if "Rp" in c]

            # click back the box with the xpath
            driver.find_element(by=AppiumBy.XPATH, value=f'//android.view.View[@content-desc="{content_raw[0]}"]/android.widget.ImageView[1]').click()
            time.sleep(action_delay)
            results.append(process_payfazz(content_list, brand=new_brand, typ=type_))

        # swipe down once
        swipe(driver, (500, 1400), (500, 800))
        time.sleep(swipe_delay)

res_df = pd.concat(results).drop_duplicates()


Pulsa
Data
Token PLN
Voucher Game
Uang Elektronik
Kode Voucher Google Play


In [5]:
from datetime import date
today = date.today().strftime("%Y_%m_%d")
res_df.to_csv(f'result/payfazz_scrape_{today}.csv')