### Get link

Doc: https://developers.google.com/youtube/v3/docs/search/list?hl=vi

In [1]:
import certifi
certifi.where()
cert_path = certifi.where()

### Logger

In [2]:
with open('../common/logger.py') as f:
    exec(f.read())

logger = get_logger(name='youtube')
logger.info('Start crawl youtube')

### Import

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException

import os
import importlib.util
from time import sleep

import warnings
warnings.filterwarnings('ignore')
TAG = 'YOUTUBE'

In [4]:
def get_module(module_name, file_name):
    name = file_name.split('.')[0]

    module_path = os.path.join(os.getcwd(), '..', module_name, file_name)
    spec = importlib.util.spec_from_file_location(name, module_path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module

In [5]:
variable_module = get_module('internal', 'variable.py')
API_KEY = variable_module.API_KEY

### Get youtube search api

In [6]:
# limit time query
from datetime import datetime
from dateutil.relativedelta import relativedelta

def get_previous_date(month, day):
    today = datetime.now()

    # divide date
    previous_month_date = today - relativedelta(months=month, days=day)

    # Convert to require config "YYYY-MM-DDTHH:MM:SSZ"
    formatted_date = previous_month_date.strftime("%Y-%m-%dT%H:%M:%SZ")

    return formatted_date


In [7]:
# URL for API YouTube
import requests
API_URL = 'https://www.googleapis.com/youtube/v3/search'

def search_youtube(query, before, after):
    params = {
        'q': query,
        'part': 'snippet',
        'type': 'video',
        'order': 'date',
        'publishedBefore': before,
        'publishedAfter': after,
        'maxResults': 50,
        'key': API_KEY
    }
    
    response = requests.get(API_URL, params=params, verify=False)
    response.raise_for_status()  # Raise an exception for HTTP errors
    
    items = response.json()['items']
    return items

In [8]:
try:
    query = "samsung+kg+mdm+unlock"
    before_date = get_previous_date(month=0, day=0) # end
    after_date = get_previous_date(month=1, day=0) # start

    search_result = search_youtube(query= query, before=before_date, after=after_date)

    
    logger.info(f'Search youtube api success with {len(search_result)} videos')
except Exception as e:
    logger.error(f'Youtube search api err: {e}')

In [9]:
video_ids = []
try:
    video_ids = [item['id']['videoId'] for item in search_result]
except Exception as e:
    logger.error(f'Youtube search json: {e}')

### Get youtube video data API

In [10]:
# get video id
from urllib.parse import urlparse, parse_qs

def get_video_id(url):
    parsed_url = urlparse(url)
    query_params = parse_qs(parsed_url.query)
    return query_params.get('v', [None])[0]

In [11]:
import json

def prinJson(data):
    formatted_json = json.dumps(data, indent=4, ensure_ascii=False)
    print(formatted_json)

In [12]:
COL_TYPE = 'Type'
COL_LINK = 'Link'
COL_TITLE = 'Title'
COL_PUBLISHED = 'Published at'
COL_DES = 'Short description'
COL_CONTENT = 'Web content'
COL_SUMMARY = 'Summary'

In [13]:
import requests
# requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'ALL:@SECLEVEL=1'

# URL for API YouTube
API_URL_VIDEO_INFO = 'https://www.googleapis.com/youtube/v3/videos'

def get_video_info(video_id):
    params = {
        'part': 'snippet,contentDetails,statistics',
        'id': video_id,
        'key': API_KEY
    }
    
    response = requests.get(API_URL_VIDEO_INFO, params=params, verify=False)
    response.raise_for_status()  # Raise an exception for HTTP errors
    
    video_info = response.json()['items'][0]
    return video_info

def get_data(link, video_info):
    title = video_info['snippet']['title']
    description = video_info['snippet']['description']
    published_at = video_info['snippet']['publishedAt']
    res = {
        COL_TYPE: 'youtube',
        COL_LINK: link,
        COL_PUBLISHED: published_at,
        COL_TITLE: title,
        COL_DES: description
    }
    return res

In [14]:
#ignore title
ignore_word = ["T Mobile", "US Cellular", "Sprint USA", "Unlock Service", "Xfinity USA", "Cricket USA", "FRP", "Boost USA", "Verizon USA", "Spectrum", "Lost mode", "Huawei",
                   "Xiaomi", "screen lock", "TFN", "iphone", "icloud"]
def checkContain(title):
    title = title.lower()
    for word in ignore_word:
        if word.lower() in title:
            return True
    else:
        return False

In [15]:
data = []
try:
    for i, video_id in enumerate(video_ids):
        video_info = get_video_info(video_id)
        link = f"https://www.youtube.com/watch?v={video_id}"
        data_row = get_data(link, video_info)
        title = data_row[COL_TITLE]
        if checkContain(title = title):
            data.append(data_row)
            print (f'{i} {data_row[COL_TITLE]}')
    logger.info(f'Get video info success')
except Exception as e:
    logger.error(f'Get video info err: {e}')

1 Samsung a035f kg lock | #shorts #shortvideo #samsung #realmefrpunlocknewtrick2022 #factoryresetphone
6 PL Tool V1.0  Qualcomm Samsung Unlock Tool  FRP, MDM, KG, Screenlock  Remove
19 S21 FE 5G Finance Plus 2024/Samsung Frp Bypass/New Security ADB Enable 2024_Samsung Finance Unlock
24 iPhone iCloud, Erase Frp,Factory Reset, Bootloader, kg,MDM, imei Repair With Griffin Unlocker
30 TSM Pro Tool Activation - Samsung KG lock MDM FRP Remove Xiaomi LG Huawei more
31 SAMSUNG A15  KG LOCKED FRP ON MDM KG BYPASS ALL BINARY
34 Gsm Power tool / Samsung Frp Remove Tool Android Version 12/13/14
35 Samsung KG & MDM Reset | Restore imei ,Erase FRP,Format Safe Bootloader Unlock TSM Tool One Click


### GENAI

In [16]:
genai_module = get_module('common', 'genai.py')
Genai = genai_module.Genai

In [17]:
genai = Genai()

Open list conversation


In [18]:
for i, row in enumerate(data):
    try:
        summary  = genai.search(row[COL_DES])
        print(summary)
        print(f"{i}. -----------------------------")
        row[COL_SUMMARY] = summary

    except Exception as e:
        logger.error(f'Query genai fail: {e}')
        row[COL_SUMMARY] = ''

Fail
0. -----------------------------
Fail
1. -----------------------------
Fail
2. -----------------------------
Fail
3. -----------------------------
Fail
4. -----------------------------
Fail
5. -----------------------------
Fail
6. -----------------------------
Fail
7. -----------------------------


### Save data

In [19]:
data

[{'Type': 'youtube',
  'Link': 'https://www.youtube.com/watch?v=wAb_ZqDnDLE',
  'Published at': '2024-10-02T12:54:17Z',
  'Title': 'Samsung a035f kg lock | #shorts #shortvideo #samsung #realmefrpunlocknewtrick2022 #factoryresetphone',
  'Short description': 'Samsung kg lock | all Samsung Android 14 kg unlock | Samsung a23 5g kg unlock | Samsung a236e u7 kg #samsung #frplock #how #mobilephone #mobilesolution #realme #realmemobiles #samsungmobile #security #umt \nJoin my group \nhttps://chat.whatsapp.com/Hlt1pDpVHqg6eJb1NOiBgJ\nTelegram \nhttps://t.me/Ajaytechnicalsoftwearsolution\nfrplock #how #mobilephone #mobilesolution #realme #realmemobiles #samsung #samsungmobile #security #umt\nMDM fix tool Samsung Android 14 kg Remove‚úÖ All CPU support ‚úÖ QC MTK Exynos Spd Mdm fix tool 2024 kg #samsung\u200b #mdm\u200b #androidphone\u200b #tech\u200b #frp\u200b #frpbypass\u200b #repai\u200b #samsunggalaxy\u200b #vivo\u200b #frpsolution\u200b\n#samsung\u200b #vivoy16frpbypassandroid12\u200b #mdm

In [20]:
import pandas as pd
import os
from datetime import datetime
from openpyxl import load_workbook
try:
    today = datetime.today().date()
    df = pd.DataFrame(data)

    file_path = f'..//output//output_{today}.xlsx'
    sheet_name = f'youtube_{today}'

    if os.path.exists(file_path):
        with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='new') as writer:
            if sheet_name in writer.book.sheetnames:
            # X√≥a sheet c≈©
                writer.book.remove(writer.book[sheet_name])
            df.to_excel(writer, sheet_name=sheet_name, index=False)
    else:
        with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
            df.to_excel(writer, sheet_name=sheet_name, index=False)

    logger.info(f'Export {len(data)} data successful')
except Exception as e:
    logger.error(f"Save data fail: {e}")

query"
("Samsung Knox Guard" OR "Samsung MDM" OR "Samsung KG") AND ("unlock" OR "unfasten" OR "unbolt" OR "open" OR "release" OR "unlatch" OR "disengage" OR "free" OR "unseal" OR "uncover" OR "access") AND ("bypass" OR "circumvent" OR "avoid" OR "sidestep" OR "evade" OR "skip" OR "dodge" OR "work around" OR "ignore" OR "overcome" OR "elude") AND ("removal" OR "elimination" OR "deletion" OR "eradication" OR "extraction" OR "withdrawal" OR "dismissal" OR "expulsion" OR "displacement" OR "ouster" OR "exclusion") AND ("tool" OR "software" OR "method" OR "technique" OR "unlocker" OR "key generator" OR "exploit" OR "vulnerability" OR "APK") AND ("ADB" OR "flash firmware") AND ("guide" OR "tutorial" OR "step-by-step" OR "how-to") AND ("legal" OR "issues" OR "compatibility" OR "support") AND ("community forums" OR "troubleshooting") AND ("2024" OR "updated methods") AND ("Galaxy S-series" OR "Note-series" OR "latest security patch") AND (date:2024-08)

32 words only for google search:
("Samsung Knox Guard" OR "Samsung MDM" OR "Samsung KG") AND ("unlock" OR "bypass" OR "removal") AND ("tool" OR "method" OR "software" OR "guide") AND ("August 2024" OR "latest update")
