In [8]:
from appium import webdriver
from appium.options.android import UiAutomator2Options
from appium.webdriver.common.appiumby import AppiumBy
from appium.webdriver.common.touch_action import TouchAction
from selenium.webdriver.common.action_chains import ActionChains, ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interaction

from bs4 import BeautifulSoup as bs
import re
import numpy as np
import pandas as pd
import time
from datetime import datetime

swipe_down_delay = 1
swipe_up_delay = 1
action_delay = 7
launch_delay = 10

# define swipe
def swipe(driver, start, end):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(start[0], start[1])
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.move_to_location(end[0], end[1])
    actions.w3c_actions.pointer_action.release()
    actions.perform()

def click(driver, x, y):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(x, y)
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.pause(0.1)
    actions.w3c_actions.pointer_action.release()
    actions.perform()

def process_toko_pulsa(df, telco, category):
    cleaned =\
    (
        pd.concat(rows)
        .rename(columns={
                0:'base_price_temp', 
                1:'product_value_temp',
                2:'disc_price_temp',
                3:'notes',
                4:'promo'
            })
        .assign(
                has_promo = lambda x: np.where(x.notes.isnull(), 0, 1),
                product_value = lambda x: np.where(x.has_promo == 1, x.product_value_temp, x.base_price_temp),
                base_price = lambda x: np.where(x.has_promo == 1, x.base_price_temp, x.product_value_temp),
                disc_price = lambda x: np.where(x.has_promo == 1, x.disc_price_temp, np.nan),
                brand = telco,
                category = category
            )
        .drop(columns=['base_price_temp', 'product_value_temp', 'disc_price_temp'])
        .drop_duplicates()
    )
    return cleaned[cleaned['base_price'].str.contains('Rp')]

def process_toko_paket(df, telco, category):
    return (
        pd.concat(df)
        .rename(columns={
            0:'SKU', 
            1:'disc_price',
            2:'disc_rate',
            3:'base_price',
            5:'note',
        })
        .replace('Lihat Detail', None)
        .loc[lambda x: x.base_price.astype(str).str.contains('^Rp')]
        .drop(columns=[4])
        .assign(
            brand = telco,
            category = category,
        )
        .drop_duplicates()
    )

def process_toko_pln(df):
    return (
    pd.concat(df)
    .rename(columns={
        0:'SKU', 
        1:'base_price',
    })
    .loc[lambda x: x.base_price.astype(str).str.contains('^Rp')]
    .drop(columns=[2,3])
    .assign(
        # price=lambda x: x.price.str.replace('[Rp\.]', '', regex=True),
        product_value=lambda x: x.SKU.str.replace('[Rp\.]', 'PLN Prepaid', regex=True).replace('.',''),
        category = 'PLN',
    )
    .drop_duplicates()
)   

def process_toko_voucher(df, brand):
    cleaned =\
    (
        pd.concat(df)
        .rename(columns={
            0:'SKU'
        })
        .assign(
            cnt = lambda x: x.count(axis=1),
            base_price = lambda x: np.where(x.cnt == 2, x[1], x[2]),
            disc_price = lambda x: np.where(x.cnt == 4, x[3], np.nan),
            disc_rate = lambda x: np.where(x.cnt == 4, x[1], np.nan),
            brand = brand,
            category = 'Voucher Game'
        )
        .drop_duplicates()
    )
    return cleaned[cleaned['base_price'].str.contains('Rp')]

In [9]:
# dont forget to run "appium --allow-cors" in terminal

options = UiAutomator2Options()
options.automationName = 'UiAutomator2'
options.udid = 'emulator-5554'
options.platformName = 'Android'
options.platformVersion = '12'
options.deviceName = 'bwphone'

# setup the driver
driver = webdriver.Remote('http://127.0.0.1:4723', options=options)

In [10]:
# rows = []
# source = driver.page_source
# soup = bs(source, 'html')
# new_rows = pd.concat([pd.Series(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))], axis=1).T
# rows.append(new_rows)

# (
#     pd.concat(rows)
#     .assign(
#         cnt = lambda x: x.count(axis=1),
#         base_price = lambda x: np.where(x.cnt == 2, x[1], x[2]),
#         disc_price = lambda x: np.where(x.cnt == 4, x[3], np.nan),
#         disc_rate = lambda x: np.where(x.cnt == 4, x[1], np.nan)
#     )
# )

# cleaned =\
#     (
#         pd.concat(df)
#         .rename(columns={
#             0:'SKU', 
#             1:'disc_rate_temp',
#             2:'base_price_temp',
#             3:'disc_price_temp'
#         })
#         .assign(
#             has_promo = lambda x: np.where(x.notes.isnull(), 0, 1),
#             base_price = lambda x: np.where(x.has_promo == 1, x.base_price_temp, x.disc_rate_temp),
#             disc_price = lambda x: np.where(x.has_promo == 1, x.disc_price_temp, np.nan),
#             brand = telco,
#             category = category
#         )
#         .drop(columns=['base_price_temp', 'disc_rate_temp', 'disc_price_temp'])
#         .drop_duplicates()
#     )

In [11]:
'''
0	1	2	cnt
25 Crystal + 5 Topaz	Rp5.550	NaN	2
50 Crystal + 10 Topaz	Rp11.100	NaN	2
110 Crystal + 20 Topaz	Rp22.200	NaN	2
3	300 Crystal + 30 Topaz	Rp55.500	NaN	2
4	600 Crystal + 75 Topaz	Rp111.000	NaN	2
5	1250 Crystal + 200 Topaz	Rp222.000	NaN	2
6	3800 Crystal + 500 Topaz	Rp555.000	NaN	2
7	Gunakan kode promo	Total Harga	Rp0	3
'''




'\n0\t1\t2\tcnt\n25 Crystal + 5 Topaz\tRp5.550\tNaN\t2\n50 Crystal + 10 Topaz\tRp11.100\tNaN\t2\n110 Crystal + 20 Topaz\tRp22.200\tNaN\t2\n3\t300 Crystal + 30 Topaz\tRp55.500\tNaN\t2\n4\t600 Crystal + 75 Topaz\tRp111.000\tNaN\t2\n5\t1250 Crystal + 200 Topaz\tRp222.000\tNaN\t2\n6\t3800 Crystal + 500 Topaz\tRp555.000\tNaN\t2\n7\tGunakan kode promo\tTotal Harga\tRp0\t3\n'

In [12]:
# remove the warnings
import warnings
warnings.filterwarnings('ignore')

In [13]:
telcos_prefix = {
    'smartfren':'0881',
    'telkomsel':'0812',
    'im3':'0814',
    'xl':'0818',
    'axis':'0831',
    '3':'0894',
}

# # keeps clicking back until reach home screen
while driver.current_activity != '.NexusLauncherActivity':
    driver.press_keycode(5)
    time.sleep(action_delay)

# open the app then wait
driver.activate_app('com.tokopedia.kelontongapp')
time.sleep(launch_delay)

# popup will appear. we will click back and if we are out then will just go back in
driver.press_keycode(4)
time.sleep(launch_delay)
if driver.current_package != 'com.tokopedia.kelontongapp':
    driver.activate_app('com.tokopedia.kelontongapp')
    time.sleep(launch_delay)


results = []
# p = 'Pulsa'
# for telco in list(telcos_prefix.keys()):
#     # Go into pulsa
#     # notes: easier to just mention the coordinate
#     click(driver, 118, 1313)
#     time.sleep(action_delay)

#     el3 = driver.find_element(by=AppiumBy.XPATH, value='//android.widget.EditText')
#     el3.send_keys(telcos_prefix[telco])
#     time.sleep(action_delay)
#     driver.press_keycode(4)
#     time.sleep(action_delay)

#     rows = []
#     while True:
#         source = driver.page_source
#         soup = bs(source, 'html')
#         new_rows = pd.concat([pd.Series(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))], axis=1).T
#         rows.append(new_rows)
#         if bool(re.search('Mengapa Harus', source)):
#             break
#         swipe(driver, (500, 1400), (500, 800))
#         time.sleep(swipe_down_delay)
#     results.append(process_toko_pulsa(rows, telco, p))

#     driver.press_keycode(4)
#     time.sleep(action_delay)

# p = 'Paket Data'
# for telco in list(telcos_prefix.keys()):
#     print(p, telco)

#     # dismiss interstitial banner
#     try:
#         e = driver.find_element(by=AppiumBy.XPATH, value='/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/android.widget.RelativeLayout/android.webkit.WebView/android.webkit.WebView/android.view.View[2]/android.app.Dialog/android.view.View/android.view.View[1]/android.widget.Button')
#         e.click()
#         time.sleep(action_delay)
#     except:
#         pass

#     driver.find_element(by=AppiumBy.XPATH, value='//android.view.View[@text="Paket Data"]').click()
#     time.sleep(action_delay)

#     el3 = driver.find_element(by=AppiumBy.XPATH, value='//android.widget.EditText')
#     el3.send_keys(telcos_prefix[telco])
#     time.sleep(action_delay)
#     driver.press_keycode(4)
#     time.sleep(action_delay)

#     rows = []
#     while True:
#         source = driver.page_source
#         soup = bs(source, 'html')
#         new_rows = pd.concat([pd.Series(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))], axis=1).T
#         rows.append(new_rows)
#         if bool(re.search('Mengapa Harus', source)):
#             break
#         swipe(driver, (500, 1400), (500, 800))
#         time.sleep(swipe_down_delay)
#     results.append(process_toko_paket(rows, telco, p))
        
#     driver.press_keycode(4)
#     time.sleep(action_delay)

# p = 'PLN'
# driver.find_element(by=AppiumBy.XPATH, value='//android.view.View[@text="PLN"]').click()
# time.sleep(action_delay)
# driver.press_keycode(4)
# time.sleep(action_delay)

# rows = []
# while True:
#     source = driver.page_source
#     soup = bs(source, 'html')
#     new_rows = pd.concat([pd.Series(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))], axis=1).T
#     rows.append(new_rows)
#     if bool(re.search('Mengapa Harus', source)):
#         break
#     swipe(driver, (500, 1400), (500, 800))
#     time.sleep(swipe_down_delay)
# results.append(process_toko_pln(rows))
# driver.press_keycode(4)
# time.sleep(action_delay)


driver.find_element(by=AppiumBy.XPATH, value='//android.view.View[@text="Voucher Game"]').click()
time.sleep(action_delay)

swipes = 0
while swipes < 5:
    for boxes in range(5, 14):
        driver.find_element(by=AppiumBy.XPATH, value=f"/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/android.widget.RelativeLayout/android.webkit.WebView/android.webkit.WebView/android.view.View/android.view.View[{boxes}]/android.view.View[2]").click()
        time.sleep(action_delay)
        brand = driver.find_element(by=AppiumBy.XPATH, value=f"/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/android.widget.RelativeLayout/android.webkit.WebView/android.webkit.WebView/android.view.View/android.view.View[5]").text
        rows = []
        while True:
            source = driver.page_source
            soup = bs(source, 'html')
            new_rows = pd.concat([pd.Series(re.findall(r'text="(.+)"', str(i.parent))) for i in list(soup.find_all(attrs={'text': re.compile(r'^Rp.+$')}))], axis=1).T
            rows.append(new_rows)
            if bool(re.search('Mengapa Harus', source)):
                break
            swipe(driver, (500, 1400), (500, 800))
            time.sleep(swipe_down_delay)
        # go back
        driver.press_keycode(4)
        time.sleep(action_delay)
        results.append(process_toko_voucher(rows, brand=brand))
    swipe(driver, (500, 1400), (500, 800))
    time.sleep(swipe_down_delay)
    swipes += 1


results_df = (
    pd.concat(results)
    .drop_duplicates()
    .assign(
        platform = 'mitra tokopedia',
    )
)

NoSuchElementException: Message: An element could not be located on the page using the given search parameters.; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
NoSuchElementError: An element could not be located on the page using the given search parameters.
    at AndroidUiautomator2Driver.findElOrEls (C:\Users\bukuw\.appium\node_modules\appium-uiautomator2-driver\node_modules\appium-android-driver\lib\commands\find.js:75:11)
    at processTicksAndRejections (node:internal/process/task_queues:95:5)
    at AndroidUiautomator2Driver.findElOrElsWithProcessing (C:\Users\bukuw\AppData\Roaming\npm\node_modules\appium\node_modules\@appium\base-driver\lib\basedriver\commands\find.ts:60:12)
    at AndroidUiautomator2Driver.findElement (C:\Users\bukuw\AppData\Roaming\npm\node_modules\appium\node_modules\@appium\base-driver\lib\basedriver\commands\find.ts:75:12)

In [None]:
from datetime import date
today = date.today().strftime("%Y_%m_%d")
results_df.to_csv(f'result/tokopedia_scrape_{today}.csv')