In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time, pandas as pd, os, pickle

# 🔐 사용자 정보 입력
TWITTER_ID = "여기다가 너 x 아이디"
TWITTER_PW = "여기다가 너 X 비번"

COOKIE_PATH = "twitter_cookies.pkl"

# 🔧 Chrome 설정
options = Options()
options.add_argument("--headless")  # 창 없이 실행
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

def save_cookies(driver, path):
    with open(path, 'wb') as filehandler:
        pickle.dump(driver.get_cookies(), filehandler)

def load_cookies(driver, path):
    with open(path, 'rb') as cookiesfile:
        cookies = pickle.load(cookiesfile)
        for cookie in cookies:
            # 'sameSite' 옵션이 있으면 제거 (selenium 버전에 따라 충돌날 수 있음)
            if 'sameSite' in cookie:
                del cookie['sameSite']
            driver.add_cookie(cookie)

def is_logged_in(driver):
    driver.get("https://twitter.com/home")
    time.sleep(3)
    # 로그인 되어 있으면 프로필 아이콘이 보임
    try:
        driver.find_element(By.XPATH, '//a[@href="/home"]')
        # 혹은 메뉴 아이콘 확인
        driver.find_element(By.XPATH, '//div[@aria-label="Account menu"]')
        return True
    except:
        return False

def login_and_save_cookies():
    driver.get("https://twitter.com/login")
    time.sleep(3)

    try:
        # ID 입력
        id_input = driver.find_element(By.NAME, "text")
        id_input.send_keys(TWITTER_ID)
        id_input.send_keys(Keys.RETURN)
        time.sleep(2)

        # PW 입력
        pw_input = driver.find_element(By.NAME, "password")
        pw_input.send_keys(TWITTER_PW)
        pw_input.send_keys(Keys.RETURN)
        time.sleep(5)

        if is_logged_in(driver):
            print("✅ 로그인 성공")
            save_cookies(driver, COOKIE_PATH)
            print("✅ 쿠키 저장 완료")
        else:
            print("❌ 로그인 실패: 로그인 후 프로필을 찾을 수 없음")
            driver.quit()
            exit()

    except Exception as e:
        print("❌ 로그인 실패:", e)
        driver.quit()
        exit()

# 쿠키 파일이 존재하면 불러오고, 없으면 로그인 시도
driver.get("https://twitter.com")
time.sleep(3)

if os.path.exists(COOKIE_PATH):
    try:
        load_cookies(driver, COOKIE_PATH)
        driver.refresh()
        time.sleep(3)

        if not is_logged_in(driver):
            print("⚠️ 쿠키로 로그인 실패, 새로 로그인 시도")
            login_and_save_cookies()
        else:
            print("✅ 쿠키로 로그인 성공")
    except Exception as e:
        print("⚠️ 쿠키 불러오기 실패, 새로 로그인 시도:", e)
        login_and_save_cookies()
else:
    login_and_save_cookies()

# 이제 로그인 세션 유지된 상태에서 크롤링 시작

user_ids = [17140361]  # 예시

results = []

for user_id in user_ids:
    try:
        driver.get(f"https://twitter.com/i/user/{user_id}")
        time.sleep(3)

        current_url = driver.current_url
        username = current_url.split("/")[-1]

        # 기본 정보 수집
        try:
            display_name = driver.find_element(By.XPATH, '//div[@data-testid="UserName"]//span[1]').text
        except:
            display_name = ""

        try:
            bio = driver.find_element(By.XPATH, '//div[@data-testid="UserDescription"]').text
        except:
            bio = ""

        try:
            profile_pic = driver.find_element(By.XPATH, '//img[@alt="Image"]').get_attribute("src")
        except:
            profile_pic = ""

        try:
            followers = driver.find_element(By.XPATH, '//a[contains(@href,"/followers")]//span[1]/span').text
        except:
            followers = ""

        try:
            following = driver.find_element(By.XPATH, '//a[contains(@href,"/following")]//span[1]/span').text
        except:
            following = ""

        try:
            posts = driver.find_element(By.XPATH, '//a[contains(@href,"/with_replies")]//span[1]/span').text
        except:
            posts = ""

        nums_in_username = sum(c.isdigit() for c in username)
        nums_in_fullname = sum(c.isdigit() for c in display_name)
        external_url = "http" in bio
        is_private = "This account is private" in driver.page_source

        results.append({
            "user_id": user_id,
            "username": username,
            "display_name": display_name,
            "profile_pic": profile_pic,
            "fullname words": len(display_name.split()),
            "nums/len username": f"{nums_in_username}/{len(username)}",
            "nums/len fullname": f"{nums_in_fullname}/{len(display_name)}",
            "name==username": display_name.lower() == username.lower(),
            "description length": len(bio),
            "external URL": external_url,
            "private": is_private,
            "#posts": posts,
            "#followers": followers,
            "#follows": following,
        })

        print(f"✅ @{username} 크롤링 완료")

    except Exception as e:
        print(f"❌ {user_id} 실패: {e}")

driver.quit()

df = pd.DataFrame(results)
df.to_csv("x_user_data.csv", index=False, encoding='utf-8-sig')
print("📦 저장 완료: x_user_data.csv")


✅ 로그인 성공
✅ @CourtneyBaker 크롤링 완료
📦 저장 완료: x_user_data.csv
