### Translation using Google

In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import importlib.util
import os

In [2]:
def get_module(module_name, file_name):
    name = file_name.split('.')[0]

    module_path = os.path.join(os.getcwd(), '..', module_name, file_name)
    spec = importlib.util.spec_from_file_location(name, module_path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module

In [3]:
driver_module = get_module('common', 'web_driver.py')
get_driver = driver_module.get_driver

translate_module = get_module('common', 'translate.py')
Translate = translate_module.Translate

In [4]:
driver = get_driver()

In [5]:
translator = Translate(driver)

In [6]:
from langdetect import detect
import requests

In [7]:
COUNTRIES = ['Ghana', 'Kenya', 'Liberia', 
             'Malawi', 'Mauritania', 'Morocco',
             'Mozambique', 'Nigeria', 'Sierra Leone', 
             'Somalia', 'South Africa', 'Tanzania', 
             'Zambia', 'Thailand']

LANGUAGES = {
    'Ghana': ['English'],
    'Kenya': ['English', 'Swahili'],
    'Liberia': ['English'],
    'Malawi': ['Chichewa', 'English'],
    'Mauritania': ['Arabic'],
    'Morocco': ['Arabic'],
    'Mozambique': ['Portuguese'],
    'Nigeria': ['English'],
    'Sierra Leone': ['English'],
    'Somalia': ['Somali'],
    'South Africa': ['Afrikaans', 'English', 'Zulu'],
    'Tanzania': ['Swahili', 'English'],
    'Zambia': ['English'],
    'Thailand': ['Thai']
}

LANGUAGES_CODES = {
    'English': 'en',
    'Swahili': 'sw',
    'Arabic': 'ar',
    'Portuguese': 'pt',
    'Somali': 'so',
    'Afrikaans': 'af',
    'Chichewa': 'ny',
    'Northern Sotho': 'ns',
    'Ndebele': 'nr',
    'Siswati': 'ss',
    'Tswana': 'tn',
    'Tsonga': 'ts',
    'Venda': 've',
    'Xhosa': 'xh',
    'Zulu': 'zu',
    'Thai': 'th'
}

COUNTRY_CODES = {
    'Ghana': 'GH',
    'Kenya': 'KE',
    'Liberia': 'LR',
    'Malawi': 'MW',
    'Mauritania': 'MR',
    'Morocco': 'MA',
    'Mozambique': 'MZ',
    'Nigeria': 'NG',
    'Sierra Leone': 'SL',
    'Somalia': 'SO',
    'South Africa': 'ZA',
    'Tanzania': 'TZ',
    'Zambia': 'ZM',
    'Thailand': 'TH'
}


In [8]:
def getAPIURL(text, from_lang, to_lang):
    return f"https://api.mymemory.translated.net/get?q={text}&langpair={from_lang}%7C{to_lang}"

In [9]:
def translate_using_google(text):
    return translator.translate(text)

In [10]:
def translateUsingApi(text):
    from_lang = detect(text)
    url = getAPIURL(text, from_lang, 'en')
    response = requests.get(url, verify=False)
    data = response.json()
    response.raise_for_status() # Raise an exception for HTTP errors
    data['responseData']['translatedText']
    return data['responseData']['translatedText']

In [11]:
translate_using_google("ŸÖÿ±ÿ≠ÿ®Ÿãÿß ÿ®ÿßŸÑÿ¨ŸÖŸäÿπÿå ÿ£ÿ™ŸÖŸÜŸâ ŸÑŸÉŸÖ ŸäŸàŸÖŸãÿß ÿ¨ÿØŸäÿØŸãÿß ŸÖŸÑŸäÿ¶Ÿãÿß ÿ®ÿßŸÑÿ∑ÿßŸÇÿ© ŸàÿßŸÑÿ≠ÿ∏.")

In [12]:
translate_using_google("‡∏ã‡∏±‡∏°‡∏ã‡∏∏‡∏á‡∏Å‡∏±‡∏ö‡πÇ‡∏Å‡∏î‡∏±‡∏Å‡∏ô‡∏±‡πâ‡∏ô‡∏°‡∏µ‡∏õ‡∏±‡∏ç‡∏´‡∏≤‡∏Å‡∏±‡∏ô‡∏≠‡∏¢‡∏π‡πà‡πÉ‡∏ô‡∏Å‡∏£‡∏ì‡∏µ‡∏™‡∏¥‡∏ó‡∏ò‡∏¥‡∏ö‡∏±‡∏ï‡∏£‡∏ô‡∏µ‡πâ ‡∏Å‡πà‡∏≠‡∏ô‡∏´‡∏ô‡πâ‡∏≤‡∏ô‡∏µ‡πâ ‡πÇ‡∏Å‡∏î‡∏±‡∏Å‡∏Å‡πá‡πÑ‡∏î‡πâ‡∏£‡πâ‡∏≠‡∏á‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡∏Å‡∏±‡∏ö‡∏´‡∏ô‡πà‡∏ß‡∏¢‡∏á‡∏≤‡∏ô‡πÄ‡∏î‡∏µ‡∏¢‡∏ß‡∏Å‡∏±‡∏ô‡∏ô‡∏µ‡πâ ‡∏ß‡πà‡∏≤‡∏ã‡∏±‡∏°‡∏ã‡∏∏‡∏á‡πÅ‡∏•‡∏∞‡πÅ‡∏≠‡∏•‡∏à‡∏µ‡∏•‡∏∞‡πÄ‡∏°‡∏¥‡∏î‡∏™‡∏¥‡∏ó‡∏ò‡∏¥‡∏ö‡∏±‡∏ï‡∏£‡∏Ç‡∏≠‡∏á‡πÇ‡∏Å‡∏î‡∏±‡∏Å ‡∏ã‡∏∂‡πà‡∏á‡∏Ñ‡∏ì‡∏∞‡∏Å‡∏£‡∏£‡∏°‡∏Å‡∏≤‡∏£‡∏Å‡πá‡πÑ‡∏î‡πâ‡∏£‡∏±‡∏ö‡πÄ‡∏£‡∏∑‡πà‡∏≠‡∏á‡∏£‡πâ‡∏≠‡∏á‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡∏ô‡∏±‡πâ‡∏ô‡πÑ‡∏ß‡πâ‡∏û‡∏¥‡∏à‡∏≤‡∏£‡∏ì‡∏≤ ‡∏Ñ‡∏ì‡∏∞‡∏Å‡∏£‡∏£‡∏°‡∏Å‡∏≤‡∏£‡∏ô‡∏µ‡πâ‡∏°‡∏µ‡∏≠‡∏≥‡∏ô‡∏≤‡∏à‡πÉ‡∏ô‡∏Å‡∏≤‡∏£‡∏´‡∏¢‡∏∏‡∏î‡∏Å‡∏≤‡∏£‡∏ô‡∏≥‡πÄ‡∏Ç‡πâ‡∏≤‡∏™‡∏¥‡∏ô‡∏Ñ‡πâ‡∏≤‡∏ó‡∏µ‡πà‡∏•‡∏∞‡πÄ‡∏°‡∏¥‡∏î‡∏™‡∏¥‡∏ó‡∏ò‡∏¥‡∏ö‡∏±‡∏ï‡∏£ ‡∏à‡∏∂‡∏á‡∏ó‡∏≥‡πÉ‡∏´‡πâ‡πÄ‡∏õ‡πá‡∏ô‡∏ä‡πà‡∏≠‡∏á‡∏ó‡∏≤‡∏á‡∏ó‡∏µ‡πà‡∏°‡∏±‡∏Å‡πÉ‡∏ä‡πâ‡πÉ‡∏ô‡∏Å‡∏≤‡∏£‡∏£‡πâ‡∏≠‡∏á‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡πÄ‡∏£‡∏∑‡πà‡∏≠‡∏á‡∏ô‡∏µ‡πâ")

### Google Scanner

In [13]:
with open('../common/logger.py') as f:
    exec(f.read())

logger = get_logger(name='google')
logger.info('Start crawl multi-country google')
logger

<Logger google (DEBUG)>

In [14]:
from time import sleep
google_count = 0
def get_google(url):
    google_count += 1
    if google_count % 40 == 0:
        sleep(60)
    driver.get(url)

In [15]:
TAT_CA_CAC_TU = 'bypass knox samsung'
CUM_TU_CHINH_XAC = ''
BAT_KY_TU_NAO = ''
KHONG_TU_NAO = ''

In [16]:
TAT_CA_CAC_TU = 'samsung'
CUM_TU_CHINH_XAC = '"bypass KNOX" OR "bypass samsung"' # chua 1 trong cac cum tu
BAT_KY_TU_NAO = 'KNOX KG hack bypass attack' # ch∆∞a 1 trong cac tu nay
KHONG_TU_NAO = ''

In [17]:
FROM_DAY = 15
FROM_MONTH = 4
FROM_YEAR = 2024
TO_DAY = 15
TO_MONTH = 10
TO_YEAR = 2024

In [18]:
try:
    get_google("https://www.google.com/advanced_search")
    elems = driver.find_elements(By.TAG_NAME, "input")
    elems[0].send_keys(TAT_CA_CAC_TU)
    elems[1].send_keys(CUM_TU_CHINH_XAC)
    elems[2].send_keys(BAT_KY_TU_NAO)
    elems[3].send_keys(KHONG_TU_NAO)
    elems[14].send_keys(Keys.ENTER)
except Exception as e:
    logger.error(f'Selenium google search advanced: {e}')
    

In [19]:
# INPUT = "samsung AND (unlock OR hacking OR bypass OR crack OR furid OR jabsasho OR ka gudubid OR jebin) AND (Knox guard OR knoxguard OR KG OR MDM OR Maareynta Aaladaha Gacanta)"
INPUTS = ["\"samsung\" \"knox\" \"guard\" hack",
         "\"samsung\" \"knox\" \"guard\" crack",
         "\"samsung\" \"knox\" \"guard\" bypass",
         "\"samsung\" \"knox\" \"guard\" unlock",
         "\"samsung\" \"mdm\" hack",
         "\"samsung\" \"mdm\" crack",
         "\"samsung\" \"mdm\" bypass",
         "\"samsung\" \"mdm\" unlock",
         "\"samsung\" \"kg\" hack",
         "\"samsung\" \"kg\" crack",
         "\"samsung\" \"kg\" bypass",
         "\"samsung\" \"kg\" unlock",
         "\"samsung\" \"finance\" plus hack",
         "\"samsung\" \"finance\" plus crack",
         "\"samsung\" \"finance\" plus bypass",
         "\"samsung\" \"finance\" plus unlock"
         ]

INPUT = "\"samsung\" (\"knox\" \"guard\" OR \"MDM\" OR \"KG\" OR \"finance\") (\"hack\" OR \"crack\" OR \"bypass\" OR \"unlock\")"

In [20]:
urlByCountry = []
COUNTRYY = ['Thailand']

In [21]:
def getUrlByCountry():
    url = driver.current_url
    for country in COUNTRYY:
        new_url = url + "&cr=country" + COUNTRY_CODES[country]
        urlByCountry.append(new_url)

In [22]:
# driver.get("https://www.google.com/search?q="+INPUT)
# getUrlByCountry()

for input in INPUTS:
    get_google("https://www.google.com/search?q="+input)
    getUrlByCountry()

urlByCountry

UnboundLocalError: cannot access local variable 'google_count' where it is not associated with a value

In [None]:
len(urlByCountry)

16

In [None]:
def preprocess():
    try:
        cur_url = driver.current_url
        # driver.get(cur_url+"&udm=14&tbs=qdr:w")
        get_google(cur_url+f"&udm=14&tbs=cdr%3A1%2Ccd_min%3A{FROM_MONTH}%2F{FROM_DAY}%2F{FROM_YEAR}%2Ccd_max%3A{TO_MONTH}%2F{TO_DAY}%2F{TO_YEAR}")
    except Exception as e:
        logger.error(f'Selenium google filter search result: {e}')

In [None]:
def process():    
    list_ignore = ['youtube.com', 'samsung.com']
    try:
        while(1):
            elem = driver.find_elements(By.CSS_SELECTOR, 'div.g')
            for e in elem:
                title       = e.find_element(By.TAG_NAME, 'h3').text
                link        = e.find_element(By.TAG_NAME, 'a').get_attribute("href")
                elem        = e.find_elements(By.TAG_NAME, 'span')
                website     = elem[2].text 
                elem        = e.find_element(By.TAG_NAME, 'div')
                elem        = elem.find_elements(By.TAG_NAME, 'div')
                description = elem[-2].text
                if all(ignore not in link for ignore in list_ignore):
                    google_links.append({'title': title, 'link': link, 'description': description, 'website': website})

            # next page    
            elem = driver.find_element(By.ID, "pnnext")
            elem.click()

    except NoSuchElementException:
        logger.info('Searched all pages!')
    except Exception as e:
        logger.info(f'Selenium search filter many pages: {e}')

In [None]:
google_links = []
def getAllGoogleLinks():
    for url in urlByCountry:
        get_google(url)
        preprocess()
        process()

In [None]:
def remove_duplicate_links():
    seen_links = set()
    result = []
    
    for link in google_links:
        if link['link'] not in seen_links:
            result.append(link)
            seen_links.add(link['link'])
    
    return result

In [None]:
getAllGoogleLinks()
google_links_all = google_links
google_links = remove_duplicate_links()
google_links

[{'title': 'Sandee Phoenix',
  'link': 'https://www.facebook.com/paviwat.suwannasansak/?locale=th_TH',
  'description': '2024. 5. 29. ‚Äî ... Knox Guard MDM/KG and IT admin restrictions on various Samsung Galaxy smartphone models. Supported models include Galaxy Z Fold, Z Flip, S series, Note ...',
  'website': 'Facebook'},
 {'title': 'T-Tool Team',
  'link': 'https://www.facebook.com/photo.php?fbid=2938584213119832&id=2027539344224328&set=a.2036003380044591&locale=th_TH',
  'description': '2024. 7. 8. ‚Äî ... samsung knox Guard (KG) status (4 cases) ‚úîÔ∏èKG Prenormal ‚úîÔ∏èKG Locked ‚úîÔ∏èKG Completed ‚úîÔ∏èKG Checking Wipe userdata & Reset UserLocks [7 Methods] ‚úîÔ∏è ...',
  'website': 'Facebook'},
 {'title': 'Clean IMEI',
  'link': 'https://www.facebook.com/phonereform/?locale=th_TH',
  'description': '2024. 7. 5. ‚Äî Samsung #kg Removal. CLEANIMEI.COM. Samsung KG (Knox Guard) Lock Removal Service ... for Samsung Unlock, Samsung IMEI Repair, and. Samsung FRP Unlock services ...',


In [None]:
for link in google_links:
    try:
        if(detect(link["description"])!= 'en'):
            original = link["description"]
            translate = translate_using_google(original)
            link["description"] = "Translate by API: " + translate + "\nOriginal: " + original
    except:
        pass
    try:
        if(detect(link["title"])!= 'en'):
            original = link["title"]
            translate = translate_using_google(original)
            link["title"] = "Translate by API: " + translate + "\nOriginal: " + original
    except:
        pass



In [None]:
google_links

[{'title': 'Clean IMEI',
  'link': 'https://www.facebook.com/phonereform/?locale=th_TH',
  'description': '5 thg 7, 2024 ‚Äî Samsung #kg Removal. CLEANIMEI.COM. Samsung KG (Knox Guard) Lock Removal Service ... for Samsung Unlock, Samsung IMEI Repair, and. Samsung FRP services ...',
  'website': 'Facebook'},
 {'title': 'T-Tool Team',
  'link': 'https://www.facebook.com/photo.php?fbid=2938584213119832&id=2027539344224328&set=a.2036003380044591&locale=th_TH',
  'description': '8 thg 7, 2024 ‚Äî ... samsung knox Guard (KG) status (4 cases) ‚úîÔ∏èKG Prenormal ‚úîÔ∏èKG Locked ‚úîÔ∏èKG Completed ‚úîÔ∏èKG Checking Wipe userdata & Reset UserLocks [7 Methods] ‚úîÔ∏è ...',
  'website': 'Facebook'},
 {'title': 'Translate by API: Unlock True Center Pro Phone\nOriginal: ‡∏õ‡∏•‡∏î‡∏•‡πá‡∏≠‡∏Ñ‡πÇ‡∏ó‡∏£‡∏®‡∏±‡∏û‡∏ó‡πå‡∏ï‡∏¥‡∏î‡πÇ‡∏õ‡∏£‡∏®‡∏π‡∏ô‡∏¢‡πå‡∏ó‡∏£‡∏π',
  'link': 'https://pantip.com/topic/39500035/desktop',
  'description': 'Translate by API: 1 ng√†y tr·∫£c ‚Äî Samsung Galaxy A50 Samsung... bou

In [None]:
len(google_links)

43

### Save data

In [None]:
COL_TYPE = 'Type'
COL_LINK = 'Link'
COL_TITLE = 'Title'
COL_DES = 'Short description'
COL_CONTENT = 'Web content'
COL_SUMMARY = 'Summary'

In [None]:
data = []

for i, link in enumerate(google_links):
    row = {
        COL_TYPE:'google',
        COL_LINK: link["link"],
        COL_TITLE: link["title"],
        COL_DES: link["description"]
    }
    data.append(row)

In [None]:
len(google_links)

43

In [None]:
import os
import pandas as pd
from datetime import datetime
try:
    today = datetime.today().date()
    file_path = f'..//output//output_{today}.xlsx'
    sheet_name = f'google_{today}'

    columns = [COL_TYPE, COL_LINK, COL_TITLE, COL_DES]
    df = pd.DataFrame(data, columns=columns)

    # Check file exist, delete old sheet before add new sheet
    if os.path.exists(file_path):
        with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='new') as writer:
            if sheet_name in writer.book.sheetnames:
                writer.book.remove(writer.book[sheet_name])
            df.to_excel(writer, sheet_name=sheet_name, index=False)
    else:
        with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
            df.to_excel(writer, sheet_name=sheet_name, index=False)

    logger.info(f'Exported {len(data)} data successful')
except Exception as e:
    logger.error(f'Save data fail: {e}')