In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

import pandas as pd
import time
# CSV 파일 불러오기
df = pd.read_csv("sampled_label.csv")  # 기존 데이터 파일 (id, label)
df["id"] = df["id"].astype(str).str.replace("^u", "", regex=True)  # "u" 제거

In [5]:
df.head()

Unnamed: 0,id,label
0,17140361,human
1,1165940942106312704,human
2,133482732,human
3,284933167,human
4,757907959989080064,human


In [8]:

# 🔹 Selenium 설정 (Chrome Headless 모드)
options = Options()
options.add_argument("--headless")  # 브라우저 창 없이 실행
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")

test_id = 17140361
# 🔹 Chrome 드라이버 실행
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

user_ids = df["id"].tolist()

# 🔹 결과 저장
id_to_username = {}

for user_id in test_id:
    try:
        url = f"https://twitter.com/i/user/{user_id}"
        driver.get(url)
        time.sleep(3)  # 페이지 로딩 대기

        # 🔹 리디렉션된 URL에서 @username 추출
        redirected_url = driver.current_url  # 최종 URL (예: https://twitter.com/elonmusk)
        username = redirected_url.split("/")[-1]  # 마지막 부분이 @username

        id_to_username[user_id] = username
        print(f"✅ {user_id} -> @{username}")

    except Exception as e:
        print(f"❌ {user_id} 크롤링 실패: {e}")
        id_to_username[user_id] = None

# 🔹 Selenium 종료
driver.quit()

# 🔹 결과를 CSV 파일로 저장
df["username"] = df["id"].map(id_to_username)  # ID 매핑
df.to_csv("twitter_usernames.csv", index=False)

print("🚀 크롤링 완료! 'twitter_usernames.csv' 파일 저장됨.")

✅ 17140361 -> @login?redirect_after_login=%2FCourtneyBaker
✅ 1165940942106312704 -> @404
✅ 133482732 -> @login?redirect_after_login=%2FDAVET800
✅ 284933167 -> @JaredTNelson
✅ 757907959989080064 -> @login?redirect_after_login=%2FNeuroRune
✅ 41555830 -> @SNCmusic
✅ 6382502 -> @login?redirect_after_login=%2Fmseckington
✅ 1437714819189182468 -> @m_musterman12
✅ 943389062 -> @login?redirect_after_login=%2FLevel39CW
✅ 705785298094047232 -> @login?redirect_after_login=%2FRogueEconomist1
✅ 1419696140413554688 -> @login?redirect_after_login=%2FJHayestetra
✅ 1040081969047060480 -> @login?redirect_after_login=%2FRenay48937
✅ 91127416 -> @katlynnmil
✅ 1441295237788278787 -> @SamsungDSGlobal
✅ 1485253775392710664 -> @login?redirect_after_login=%2FENDRICKCHRISTO1
✅ 2233223270 -> @login?redirect_after_login=%2FmyAccessHub
✅ 2981885181 -> @Zeke_Cohen
✅ 764449471 -> @login?redirect_after_login=%2FONAChicago
✅ 3424712931 -> @login?redirect_after_login=%2FMamaetMoi15
✅ 27278391 -> @login?redirect_after_l

KeyboardInterrupt: 

In [10]:
pip install -U pybind11

Collecting pybind11
  Downloading pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)
Downloading pybind11-2.13.6-py3-none-any.whl (243 kB)
   ---------------------------------------- 0.0/243.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/243.3 kB ? eta -:--:--
   - -------------------------------------- 10.2/243.3 kB ? eta -:--:--
   ---- ---------------------------------- 30.7/243.3 kB 660.6 kB/s eta 0:00:01
   ------ -------------------------------- 41.0/243.3 kB 487.6 kB/s eta 0:00:01
   ----------------------- ---------------- 143.4/243.3 kB 1.2 MB/s eta 0:00:01
   ---------------------------------------- 243.3/243.3 kB 1.4 MB/s eta 0:00:00
Installing collected packages: pybind11
Successfully installed pybind11-2.13.6
Note: you may need to restart the kernel to use updated packages.


In [12]:
pip install -U scikit-learn

Note: you may need to restart the kernel to use updated packages.
