In [None]:
import time
import re
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
# from googleapiclient.errors import HttpError

# from google.oauth2 import service_account
# from googleapiclient.discovery import build

print("스크립트 시작됨: ", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

SERVICE_ACCOUNT_FILE = r'C:\Users\이근행\Desktop\파이썬 코드\고객 관리\고객 관리_업데이트\kirby-customer-management-d21001a30bf2.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
sheets_service = build('sheets', 'v4', credentials=credentials)
SPREADSHEET_ID = '1JQ5I1SC9Qhk0CwJyHBfWvv8kKN_q3qunGILHLmPegO8'


def append_sheet_data(sheets_service, spreadsheet_id, values, range_name="raw_videos!A2"):
    body = {'values': values}
    for attempt in range(3):
        try:
            result = sheets_service.spreadsheets().values().append(
                spreadsheetId=spreadsheet_id,
                range=range_name,
                valueInputOption='USER_ENTERED',
                insertDataOption='INSERT_ROWS',
                body=body
            ).execute()
            print(f"✅ Sheet append 완료: {result.get('updates').get('updatedCells')} 셀 업데이트됨.")
            return
        except HttpError as e:
            print(f"❌ [시도 {attempt+1}] Google API 오류 발생: {e}")
            if attempt < 2:
                print("⏳ 5초 후 재시도...")
                time.sleep(5)
            else:
                raise e  # 3회 시도 실패 시 오류 발생

def scroll_down(driver, scroll_pause_time=2, min_videos=220, max_scrolls=25):
    scrolls = 0
    consecutive_no_change = 0
    last_count = 0
    while scrolls < max_scrolls:
        driver.execute_script("window.scrollBy(0, 10000);")
        time.sleep(scroll_pause_time)
        video_elements = driver.find_elements(By.XPATH, "//ytm-shorts-lockup-view-model-v2")
        count = len(video_elements)
        print(f"현재 영상 개수: {count}")
        if count == last_count:
            consecutive_no_change += 1
        else:
            consecutive_no_change = 0
        if consecutive_no_change >= 2:
            print("2번 연속으로 영상 개수가 늘어나지 않았으므로 스크롤을 중단합니다.")
            break
        if count >= min_videos:
            break
        last_count = count
        scrolls += 1

def parse_view_count(view_text):
    text_cleaned = view_text.replace("조회수", "").strip()
    match = re.search(r'([\d,.]+)([만천]*)', text_cleaned)
    if match:
        number_str = match.group(1).replace(',', '')
        unit = match.group(2)
        try:
            number = float(number_str)
        except ValueError:
            return 0
        if "만" in unit:
            return int(number * 10000)
        elif "천" in unit:
            return int(number * 1000)
        else:
            return int(number)
    return 0

def parse_upload_date(date_text: str):
    date_text = date_text.strip()
    now = datetime.now()
    if re.search(r'\d+시간|\d+분', date_text):
        return now - timedelta(days=1)
    if "전" in date_text:
        m = re.search(r'(\d+)\s*(일|주|개월|년)', date_text)
        if m:
            num = int(m.group(1))
            unit = m.group(2)
            if unit == "일":
                delta = timedelta(days=num)
            elif unit == "주":
                delta = timedelta(weeks=num)
            elif unit == "개월":
                delta = timedelta(days=30 * num)
            elif unit == "년":
                delta = timedelta(days=365 * num)
            else:
                delta = timedelta(0)
            return now - delta
        else:
            return None
    m = re.search(r'(\d{1,2})월\s*(\d{1,2})일', date_text)
    if m:
        month = int(m.group(1))
        day = int(m.group(2))
        year = now.year
        try:
            parsed_date = datetime(year, month, day)
            if parsed_date > now:
                parsed_date = datetime(year - 1, month, day)
            return parsed_date
        except ValueError:
            pass
    try:
        date_text = date_text.rstrip('.')
        return datetime.strptime(date_text, "%Y.%m.%d")
    except Exception:
        return None

def get_channel_name(html: str) -> str:
    soup = BeautifulSoup(html, 'lxml')
    h1_tag = soup.find('h1', class_='dynamic-text-view-model-wiz__h1')
    if h1_tag:
        span_tag = h1_tag.find('span', class_='yt-core-attributed-string yt-core-attributed-string--white-space-pre-wrap')
        if span_tag:
            return span_tag.get_text(strip=True)
    return "알 수 없음"

def extract_subscriber_count(html: str):
    soup = BeautifulSoup(html, 'lxml')
    span_tags = soup.find_all("span", class_="yt-core-attributed-string yt-content-metadata-view-model-wiz__metadata-text yt-core-attributed-string--white-space-pre-wrap yt-core-attributed-string--link-inherit-color")
    for span in span_tags:
        text = span.get_text(strip=True)
        if text.startswith("구독자"):
            match = re.search(r'구독자\s*([\d,]+)명', text)
            if match:
                return int(match.group(1).replace(',', ''))
    return ""

def main_process_combined(html: str):
    soup = BeautifulSoup(html, 'lxml')
    span_elements = soup.find_all('span', attrs={
        'class': 'yt-core-attributed-string yt-core-attributed-string--white-space-pre-wrap',
        'role': 'text'
    })
    found_views = None
    found_date = None
    for span in span_elements:
        text = span.get_text(strip=True)
        if "좋아요" in text:
            continue
        if re.match(r'^[\d,]+$', text):
            found_views = parse_view_count(text)
        elif any(kw in text for kw in ["시간", "분", "월", "일", "전"]):
            parsed_date = parse_upload_date(text)
            if parsed_date:
                found_date = parsed_date

    title_span = soup.find('span', 
                           attrs={'class': 'yt-core-attributed-string yt-core-attributed-string--white-space-pre-wrap yt-core-attributed-string--link-inherit-color'})
    title = title_span.get_text(separator=" ", strip=True) if title_span else None

    return found_views, found_date, title

def get_shorts_monthly_views(driver, channel_url):
    if not re.match(r'^https?://', channel_url):
        channel_url = "https://" + channel_url
    if "/shorts" in channel_url:
        shorts_url = channel_url
    else:
        shorts_url = re.sub(r'/featured$', '', channel_url.rstrip("/")) + "/shorts"

    driver.get(shorts_url)
    time.sleep(10)

    if "404" in driver.title:
        print(f"{shorts_url} : 404 Not Found - 스킵합니다.")
        return []

    channel_html = driver.page_source
    channel_name = get_channel_name(channel_html)
    subscriber_count = extract_subscriber_count(channel_html)

    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    scroll_down(driver)

    video_elements = driver.find_elements(By.XPATH, "//ytm-shorts-lockup-view-model-v2")
    if not video_elements:
        print(f"{channel_name} : 동영상이 하나도 없습니다. 스킵합니다.")
        return []

    video_data_rows = []
    accumulation_str = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
    today_str = datetime.now().strftime("%Y-%m-%d")
    print(f"{channel_name} 채널의 Shorts 탭에서 {len(video_elements)}개의 영상이 발견되었습니다.")

    for idx, video in enumerate(video_elements):
        try:
            link_element = video.find_element(By.TAG_NAME, "a")
            video_link = link_element.get_attribute("href")
            driver.execute_script("window.open(arguments[0]);", video_link)
            driver.switch_to.window(driver.window_handles[-1])
            time.sleep(10)

            html = driver.page_source
            found_views, found_date, title = main_process_combined(html)

            if found_views is None or found_date is None:
                print(f"영상 {idx+1}: 조회수/업로드일을 찾지 못했습니다. (조회수={found_views}, 업로드일={found_date})")
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
                continue

            upload_str = found_date.strftime("%Y-%m-%d")
            print(f"영상 {idx+1}: 채널명: {channel_name}, 제목: {title}, 업로드일: {upload_str}, 조회수: {found_views}, 구독자: {subscriber_count}")

            video_data_rows.append((found_date, [accumulation_str, today_str, channel_name, title, video_link, upload_str, found_views, subscriber_count]))

            driver.close()
            driver.switch_to.window(driver.window_handles[0])
            time.sleep(2)
        except Exception as e:
            print(f"영상 {idx+1} 처리 중 오류 발생: {e}")
            if len(driver.window_handles) > 1:
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
            continue

    if video_data_rows:
        video_data_rows.sort(key=lambda x: x[0])
        sorted_rows = [row[1] for row in video_data_rows]
        append_sheet_data(sheets_service, SPREADSHEET_ID, sorted_rows, range_name="raw_videos!A2")

    return video_data_rows

def main(channel_urls):
    options = Options()
    options.add_argument("--headless")                # 창 없이 실행
    options.add_argument("--no-sandbox")              # Linux/Windows 스케줄러에서 권한 문제 방지
    options.add_argument("--disable-dev-shm-usage")   # 메모리 공유 이슈 방지
    options.add_argument("--disable-gpu")             # GPU 사용 안 함 (headless 안정성)
    options.add_argument("--window-size=1920,1080")   # 충분한 해상도 설정
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
    )
    s = Service(r'C:\Users\이근행\Desktop\파이썬 코드\chromedriver-win64\chromedriver.exe')
    driver = webdriver.Chrome(service=s, options=options)

    for url in channel_urls:
        print(f"\n채널 처리 중: {url}")
        sorted_rows = get_shorts_monthly_views(driver, url)
        if sorted_rows:
            print("\n추가된 데이터:")
            for row in sorted_rows:
                print(row)
    driver.quit()

if __name__ == "__main__":
    channels = ['http://www.youtube.com/channel/UCuO9qb9PdlO_WFWd_wrTBkg', 'http://www.youtube.com/channel/UC1lhiz5lHfCw2rLZWqwhxnw', 'http://www.youtube.com/channel/UCgeTWh3tYz3LwkhCzbUsVzQ', 'http://www.youtube.com/channel/UC4LrX_37ZWrNcMS2NtXgKrA', 'http://www.youtube.com/channel/UCO8gltLqmlaN3a6M9i55OLA', 'http://www.youtube.com/channel/UCrRSAxplRF1_3MjoMC9_W9Q', 'http://www.youtube.com/channel/UCjHC_N682cOiH3GZ4AjeCDQ', 'http://www.youtube.com/channel/UCvBvw9pYAP5tB2quJ-BiF_A', 'http://www.youtube.com/channel/UC31ZzQkagJzsSHg2a0MXLeA', 'http://www.youtube.com/channel/UCi1Z70ZED1eCl8dJB9E4JTw', 'http://www.youtube.com/channel/UClL7bo4m9EmOGgSyn2oGfvA', 'http://www.youtube.com/channel/UC2_aGeWBT47euc6w5MCF2lw', 'http://www.youtube.com/channel/UCYwLHEPzSNs8v1Ko4hJonAg', 'http://www.youtube.com/channel/UCeUlwIfwei6U3VnTsUxNC9w', 'http://www.youtube.com/channel/UCtcHIOm9SK89MJpRZqIEEVA', 'http://www.youtube.com/channel/UC8nUDc4Fc4VvZkRP1oePY-w', 'http://www.youtube.com/channel/UCCi4CqLzemXkZ-6fQC3A3ag', 'http://www.youtube.com/channel/UCbnz_dXMThEAaq5lz6q6IGA', 'http://www.youtube.com/channel/UCKIBIgAKYheiaaC7hVHJzrQ', 'http://www.youtube.com/channel/UCp8p0NTwUjxU5G3ZqEn2FQg', 'http://www.youtube.com/channel/UCkavWA4JMTdOKfLr1Qilx8g', 'http://www.youtube.com/channel/UC2hfZKwDwWmhuqJbjKEh3aQ', 'http://www.youtube.com/channel/UCcN0eBs98KRjSoqBaPLq1jw', 'http://www.youtube.com/channel/UCFS3tu-34eounHIcU9Hu_xg', 'http://www.youtube.com/channel/UCYbqe2OyzTyPn8-vtUCJM6Q', 'http://www.youtube.com/channel/UC4Lv-mrKtNwv1YNZUpw1ftw', 'http://www.youtube.com/channel/UCfnmh_tZqEA095970xoPhUQ', 'http://www.youtube.com/channel/UCpyYMUbVskUHlOrPrGfT4Rw', 'http://www.youtube.com/channel/UC1TQrg-nROTEyRFewdoORyQ', 'http://www.youtube.com/channel/UC_hbyIBAKfTNXbQU8p5YHzg', 'http://www.youtube.com/channel/UCZ5e3Gd0BTbOLIfTWlcbReA', 'http://www.youtube.com/channel/UCAA_OMo9r0b-iUs3VrPuwPw', 'http://www.youtube.com/channel/UCHisNUl3gc00Ywy-o0ttypw','http://www.youtube.com/channel/UCbnz_dXMThEAaq5lz6q6IGA']
                
    main(channels)