In [1]:
# ----- CODE FOR SHOPEE MITRA SCRAPPING
from appium import webdriver
from appium.options.android import UiAutomator2Options
from appium.webdriver.common.appiumby import AppiumBy
from appium.webdriver.common.touch_action import TouchAction
from selenium.webdriver.common.action_chains import ActionChains, ActionBuilder
from selenium.webdriver.common.actions.pointer_input import PointerInput
from selenium.webdriver.common.actions import interaction

from bs4 import BeautifulSoup as bs
import re
import numpy as np
import pandas as pd
import time
from datetime import datetime

swipe_down_delay = 1
swipe_up_delay = 1
action_delay = 2
launch_delay = 3

# define swipe
def swipe(driver, start, end):
    actions = ActionChains(driver)
    actions.w3c_actions = ActionBuilder(driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
    actions.w3c_actions.pointer_action.move_to_location(start[0], start[1])
    actions.w3c_actions.pointer_action.pointer_down()
    actions.w3c_actions.pointer_action.move_to_location(end[0], end[1])
    actions.w3c_actions.pointer_action.release()
    actions.perform()

# organize the result
def process_mitra_shopee(rows, telco, category):
    return (
        pd.DataFrame(rows, columns=['SKU', 'price'])
        .assign(
            price = lambda x: x.price.str.replace('\.', '', regex=True),
            category = category,
            brand = telco, 
        )
    )

In [2]:
# dont forget to run "appium --allow-cors" in terminal

options = UiAutomator2Options()
options.automationName = 'UiAutomator2'
options.udid = 'emulator-5554'
options.platformName = 'Android'
options.platformVersion = '12'
options.deviceName = 'bwphone'

# setup the driver
driver = webdriver.Remote('http://127.0.0.1:4723', options=options)

In [3]:
# remove the warnings
import warnings
warnings.filterwarnings('once')

In [4]:
# keeps clicking back until reach home screen
while driver.current_activity != '.NexusLauncherActivity':
    driver.press_keycode(4)
    time.sleep(action_delay)


# open the shopee app then wait
driver.activate_app('com.shopee.mitra.id')
time.sleep(launch_delay)

# popup will appear. we will click back and if we are out then will just go back in
driver.press_keycode(4)
time.sleep(action_delay)
if driver.current_package != 'com.shopee.mitra.id':
    driver.activate_app('com.shopee.mitra.id')
    time.sleep(launch_delay)


# ------- PULSA -----------
# click the "PULSA" box
while True:
    try:
        swipe(driver, (350, 1750), (350, 1500)) # swipe up to open the bottom drawer
        time.sleep(swipe_up_delay) # wait for some time
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='PULSA']")
        if el.location['y'] < 1200: # this is checking that we are clicking the correct box, since the correct one is located below
            break
    except:
        swipe(driver, (350, 1750), (350, 1500))
        time.sleep(swipe_up_delay)

# click the "PULSA" box then wait
driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='PULSA']").click()
time.sleep(action_delay)

# click the "Atur Harga" button on the top right
driver.find_element(by=AppiumBy.XPATH, value="/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.view.ViewGroup/android.view.ViewGroup/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.widget.TextView").click()
time.sleep(action_delay)

# list all the brands there
brands = [i.text for i in driver.find_elements(by=AppiumBy.XPATH, value=f"//android.widget.TextView") if i.text != "Pilih Produk"]

results = []
for brand in brands:
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='{brand}']").click()
    time.sleep(action_delay)
    rows = []
    while True:
        source = driver.page_source # get the current page source
        soup = bs(source, 'html.parser') # parse so it can be read
        # get texts that have a number
        # then go to its parent directory
        # then get all texts that are not empty
        new_rows = [tuple(re.findall('text="(.+)"', str(i.parent))) for i in soup.find_all(attrs={'text': re.compile(r'^[\d\.]+$')})]
        # check if what we want to fill is already present
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        # swipe down to get all the products
        swipe(driver, (500, 1400), (500, 1000)) # how does this know when to stop?
        time.sleep(swipe_up_delay)
        rows.extend(new_rows)
    results.append(process_mitra_shopee(rows, telco=brand, category='Pulsa'))
    driver.press_keycode(4) # go back
    time.sleep(action_delay)

# go back to homepage
driver.press_keycode(4)
time.sleep(action_delay)
driver.press_keycode(4)
time.sleep(action_delay)


# ------- PAKET DATA -----------
### PAKET DATA
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='PAKET DATA']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)

driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='PAKET DATA']").click()
time.sleep(action_delay)

driver.find_element(by=AppiumBy.XPATH, value="/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.view.ViewGroup/android.view.ViewGroup/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.widget.TextView").click()
time.sleep(action_delay)

brands = [i.text for i in driver.find_elements(by=AppiumBy.XPATH, value=f"//android.widget.TextView") if i.text!='Pilih Produk']

for brand in brands:
    driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='{brand}']").click()
    time.sleep(action_delay)
    rows = []
    while True:
        source = driver.page_source # get the current page source
        soup = bs(source, 'html.parser') # parse so it can be read
        # get texts that have a number
        # then go to its parent directory
        # then get all texts that are not empty
        new_rows = [tuple(re.findall('text="(.+)"', str(i.parent))) for i in soup.find_all(attrs={'text': re.compile(r'^[\d\.]+$')})]
        # check if what we want to fill is already present
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        # swipe down to get all the products
        swipe(driver, (500, 1400), (500, 1000)) # how does this know when to stop?
        time.sleep(swipe_up_delay)
        rows.extend(new_rows)
    results.append(process_mitra_shopee(rows, telco=brand, category='Paket Data'))
    driver.press_keycode(4) # go back
    time.sleep(action_delay)

# go back to homepage
driver.press_keycode(4)
time.sleep(action_delay)
driver.press_keycode(4)
time.sleep(action_delay)


# ------- LISTRIK PLN -----------
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='LISTRIK PLN']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='LISTRIK PLN']").click()
time.sleep(action_delay*5)
driver.find_element(by=AppiumBy.XPATH, value="/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.view.ViewGroup/android.view.ViewGroup/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.widget.TextView").click()
time.sleep(action_delay)
rows = []
while True:
    source = driver.page_source
    soup = bs(source, 'html.parser')
    new_rows = [tuple(re.findall('text="(.+)"', str(i.parent))) for i in soup.find_all(attrs={'text': re.compile(r'^[\d\.]+$')})]
    if set(new_rows).intersection(rows) == set(new_rows):
        break
    swipe(driver, (500, 1400), (500, 1000))
    time.sleep(swipe_down_delay)
    rows.extend(new_rows)
results.append(process_mitra_shopee(rows, category='PLN', telco=''))

# go back to homepage
driver.press_keycode(4)
time.sleep(action_delay)
driver.press_keycode(4)
time.sleep(action_delay)


# ------- VOUCHER GAME -----------
while True:
    try:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        el = driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='VOUCHER GAME']")
        if el.location['y'] < 1200:
            break
    except:
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='VOUCHER GAME']").click()
time.sleep(action_delay*5)
driver.find_element(by=AppiumBy.XPATH, value="/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.view.ViewGroup/android.view.ViewGroup/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.widget.TextView").click()
time.sleep(action_delay)

brands = []
while True:
    new_brands = [i.text for i in driver.find_elements(by=AppiumBy.XPATH, value=f"//android.widget.TextView") if i.text!='Pilih Produk']
    if set(new_brands).intersection(brands) == set(new_brands):
        break
    swipe(driver, (500, 1400), (500, 800))
    time.sleep(swipe_down_delay)
    brands.extend(new_brands)

while True:
    swipe(driver, (500, 800), (500, 1400))
    time.sleep(swipe_up_delay)
    new_brands = [i.text for i in driver.find_elements(by=AppiumBy.XPATH, value=f"//android.widget.TextView") if i.text!='Pilih Produk']
    if brands[0] in new_brands:
        break
    
# scrap each SKU 
for brand in pd.Series(brands).drop_duplicates().tolist():
    while True:
        try:
            driver.find_element(by=AppiumBy.XPATH, value=f"//android.widget.TextView[@text='{brand}']").click()
            time.sleep(action_delay)
            break
        except:
            swipe(driver, (500, 1400), (500, 800))
            time.sleep(swipe_down_delay)
    rows = []
    while True:
        source = driver.page_source
        soup = bs(source, 'html.parser')
        new_rows = [tuple(re.findall('text="(.+)"', str(i.parent))) for i in soup.find_all(attrs={'text': re.compile(r'^[\d\.]+$')})]
        if set(new_rows).intersection(rows) == set(new_rows):
            break
        swipe(driver, (500, 1400), (500, 1000))
        time.sleep(swipe_down_delay)
        rows.extend(new_rows)
    results.append(process_mitra_shopee(rows, telco=brand, category='Voucher Game'))
    # go back
    driver.press_keycode(4)
    time.sleep(action_delay)

# go back to homepage 
driver.press_keycode(4)
time.sleep(action_delay)
driver.press_keycode(4)
time.sleep(action_delay)


results_df = (
    pd.concat(results)
    .drop_duplicates()
    .assign(
        platform = 'shopee'
    )
    .append(
        pd.DataFrame(
            {
                'SKU':['Topup GoPay', 'Topup OVO', 'Topup Dana', 'Topup LinkAja'],
                'price':[2000, 1500, 1000, 2000],
                'category':'E-Wallet',
                'brand':['GoPay', 'OVO', 'Dana', 'LinkAja'],
                'platform': 'shopee'
            }
        )
    )
)

  pd.concat(results)


In [1]:
results_df

NameError: name 'results_df' is not defined

In [6]:
from datetime import date
today = date.today().strftime("%Y_%m_%d")
results_df.to_csv(f'result/shopee_scrape_{today}.csv')