In [None]:
from appium import webdriver
from appium.options.android import UiAutomator2Options
from appium.webdriver.common.appiumby import AppiumBy
from appium.webdriver.common.touch_action import TouchAction
from selenium.webdriver.common.action_chains import ActionChains, ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interaction

from bs4 import BeautifulSoup as bs
import re
import numpy as np
import pandas as pd
import time
from datetime import datetime

swipe_down_delay = 1
swipe_up_delay = 1
action_delay = 2
launch_delay = 3

# define swipe
def swipe(driver, start, end):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(start[0], start[1])
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.move_to_location(end[0], end[1])
    actions.w3c_actions.pointer_action.release()
    actions.perform()

def process_payfazz(rows, category, brand=None):
    if category in ['Uang Elektronik']:
        return (
            pd.DataFrame(rows, columns=['entry'])
            .assign(
                SKU = lambda x: x.entry.str.extract('^(.+)\\nRp.+\\nRp', expand=False),
                price = lambda x: x.entry.str.extract('^.+\\nRp (.+)\\nRp ', expand=False).str.replace('\.','', regex=True),
                admin_fee = lambda x: x.entry.str.extract('\\nRp ([0-9\.]+)\\nAtur$', expand=False).str.replace('\.','', regex=True),
                category = category, 
                brand=brand, 
            )
            .drop(columns=['entry'])
        )
    if category in ['Pulsa', 'Data', 'Voucher Game', 'Transfer Bank', 'Kirim Tunai', 'Tarik Tunai', 'Isi Deposit Aplikasi', 
                   'Bayar E-Commerce', 'Token PLN', 'Pulsa Pascabayar', 'Multifinance', 'Tagihan PLN', 'PDAM', 'Tagihan Gas', 
                   'TV Kabel Berlangganan', 'BPJS', 'PBB', 'TELKOM']:
        return (
            pd.DataFrame(rows, columns=['entry'])
            .assign(
                SKU = lambda x: x.entry.str.extract('^(.+)\\nRp ', expand=False),
                price = lambda x: x.entry.str.extract('Rp ([0-9\.]+)\\nAtur', expand=False).str.replace('\.','', regex=True),
                category = category, 
                brand = brand,
            )
            .drop(columns=['entry'])
        )

In [None]:
# dont forget to run "appium --allow-cors" in terminal

options = UiAutomator2Options()
options.automationName = 'UiAutomator2'
options.udid = 'emulator-5554'
options.platformName = 'Android'
options.platformVersion = '12'
options.deviceName = 'bwphone'

# setup the driver
driver = webdriver.Remote('http://127.0.0.1:4723', options=options)

In [None]:
# remove the warnings
import warnings
warnings.filterwarnings('once')

In [None]:
# keeps clicking back until reach home screen
while driver.current_activity != '.NexusLauncherActivity':
    driver.press_keycode(4)
    time.sleep(action_delay)

# open the shopee app then wait
driver.activate_app('com.payfazz.android')
time.sleep(launch_delay)

# popup will appear. we will click back and if we are out then will just go back in
driver.press_keycode(4)
time.sleep(action_delay)
if driver.current_package != 'com.payfazz.android':
    driver.activate_app('com.payfazz.android')
    time.sleep(launch_delay)


### Scrap promotions
results = []
## Click on coupon
driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[@index='1']").click()

coupon_types = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.widget.Button')]
coupon_types = list(set(coupon_types).difference(['Semua']))
for type_ in coupon_types:
    print(type_)
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.Button[@content-desc='{type_}']").click()
    rows = []
    while True:
        new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.widget.ImageView')]
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        swipe(driver, (500, 1400), (500, 800))
        time.sleep(swipe_down_delay)
        rows.extend(new_rows)
    rows = pd.Series(rows).dropna().drop_duplicates().tolist()
    
    for k, i in enumerate(rows[::-1]):
        print(k)
        while True:
            try:
                driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[@content-desc='{i}']").click()
                time.sleep(action_delay)
                break
            except:
                swipe(driver, (500, 1200), (500, 1400))
                time.sleep(swipe_up_delay)
        content1 = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')]
        driver.find_element(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "KETENTUAN")]').click()
        content2 = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View')][-1:]
        results.append([type_] + content1 + content2)

        driver.press_keycode(4)
        time.sleep(action_delay)
        
driver.press_keycode(4)
time.sleep(action_delay)
results = (
    pd.DataFrame(results)
    [[0,7,8,13,14]]
    .assign(
        first_detected = datetime.now().strftime('%Y-%m-%d'),
        Platform = 'payfazz'
    )
    .rename(columns={
        0:'Product',
        7:'Header',
        8:'Expiry',
        13:'Description',
        14:'Terms & Condition',
        'first_detected':'First detected',
    })
)
results.to_pickle('data/google_scrapping/payfazz_promo.pkl')

results = []
### Scroll to prabayar and click on atur harga jual
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='Alat Warung']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='Atur Harga Jual']").click()

##### scrap finance products
keuangan = ['Transfer Bank', 'Kirim Tunai', 'Tarik Tunai', 'Isi Deposit Aplikasi', 'Bayar E-Commerce']
for category in keuangan:
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='{category}']").click()
    time.sleep(action_delay)
    rows = []
    while True:
        new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "Rp")]')]
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        rows.extend(new_rows)

    driver.press_keycode(4)
    time.sleep(action_delay)
    results.append(process_payfazz(rows, category))

# scroll to prabayar
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='Prabayar']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)

#### Scrap prabayar
prabayar = ['Uang Elektronik', 'Pulsa', 'Data', 'Voucher Game']#, 'Token PLN'
for category in prabayar:
    print(f'click on {category}')
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='{category}']").click()
    time.sleep(action_delay)

    # identify brands within products
    print('search brands')
    brands = []
    while True:
        source = driver.page_source
        soup = bs(source, 'html')
        new_brands = [i['content-desc'].replace('\n', ' ') for i in list(soup.find_all(attrs={'content-desc': re.compile(r'^.+$')}))]
        if set(new_brands).intersection(brands) == set(new_brands):
            break
        swipe(driver, (500, 1400), (500, 700))
        time.sleep(swipe_down_delay)
        brands.extend(new_brands)
    print(brands)

    # scrap each SKU 
    for brand in pd.Series(brands).loc[lambda x: x!=category].drop_duplicates().tolist():
        # print(brand)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[contains(@text, 'Cari')]")
        el.click()
        el.send_keys(brand)
        time.sleep(action_delay)
        driver.press_keycode(4)
        time.sleep(action_delay)

        rows = []
        cat_item = driver.find_elements(by=AppiumBy.XPATH, value=f"//android.widget.ImageView[contains(@content-desc, '{brand}')]")
        for item in cat_item:
            item.click()
            time.sleep(action_delay)
            while True:
                new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "Rp")]')]
                # print(new_rows)
                if set(new_rows).intersection(rows) == set(new_rows):
                    break
                swipe(driver, (500, 1400), (500, 1000))
                time.sleep(swipe_down_delay)
                rows.extend(new_rows)
            # process result
            results.append(process_payfazz(rows, category, brand))
            # go back
            driver.press_keycode(4)
            time.sleep(action_delay)
            # print(f'{brand} complete')
    
    driver.press_keycode(4)
    time.sleep(action_delay)
    driver.press_keycode(4)
    time.sleep(action_delay)

category = 'Token PLN'
print(f'click on {category}')
driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='{category}']").click()
time.sleep(action_delay)
rows = []
while True:
    new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "Rp")]')]
    if set(new_rows).intersection(rows) == set(new_rows):
        break
    swipe(driver, (500, 1400), (500, 800))
    time.sleep(swipe_down_delay)
    rows.extend(new_rows)
# process result
results.append(process_payfazz(rows, category))
# go back
driver.press_keycode(4)
time.sleep(action_delay)

#### Scrap pascabayar
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='Pascabayar']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)

pascabayar = ['Pulsa Pascabayar', 'Multifinance', 'Tagihan PLN', 'PDAM', 'Tagihan Gas', 'TV Kabel Berlangganan', 'BPJS', 'PBB', 'TELKOM']
for category in pascabayar:
    print(f'click on {category}')
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.view.View[@content-desc='{category}']").click()
    time.sleep(action_delay)
    
    rows = []
    while True:
        new_rows = [i.get_attribute('content-desc') for i in driver.find_elements(by=AppiumBy.XPATH, value='//android.view.View[contains(@content-desc, "Rp")]')]
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        swipe(driver, (500, 1400), (500, 800))
        time.sleep(swipe_down_delay)
        rows.extend(new_rows)
    # process result
    results.append(process_payfazz(rows, category))
    # go back
    driver.press_keycode(4)
    time.sleep(action_delay)

driver.press_keycode(4)
time.sleep(action_delay)

results = (
    pd.concat(results)
    .drop_duplicates()
    .assign(
        scrap_date = datetime.now().strftime('%Y-%m-%d'),
        platform = 'payfazz',
    )
)
results.to_pickle('data/google_scrapping/payfazz.pkl')
results
