In [None]:
import csv  # CSV 파일 저장을 위한 모듈
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time, random, os
from dotenv import load_dotenv
from fake_useragent import UserAgent
from datetime import datetime, timedelta

# User-Agent 생성
ua = UserAgent()

def create_driver(proxy=None):
    """Selenium WebDriver 생성 및 설정"""
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-blink-features=AutomationControlled")  # Selenium 표시 제거
    options.add_argument(f"user-agent={ua.random}")  # 랜덤 User-Agent 설정

    driver = webdriver.Chrome(options=options)
    return driver

def scrape_tweets(keyword, start_date, end_date, max_tweets=50):
    """
    트위터에서 특정 키워드로 트윗 크롤링하는 함수 (날짜별로 수행).
    
    Parameters:
        keyword (str): 검색할 키워드
        start_date (str): 시작 날짜 (형식: 'YYYY-MM-DD')
        end_date (str): 종료 날짜 (형식: 'YYYY-MM-DD')
        max_tweets (int): 하루에 가져올 최대 트윗 수
    
    Returns:
        tweets (list): 크롤링 결과 (날짜와 트윗을 포함한 리스트)
    """
    url = f"https://x.com/search?q={keyword}%20(bullish%20OR%20bearish%20OR%20volatile)%20" \
          f"-%23ad%20-%23sponsored%20-FUD%20-NGMI%20-free%20-giveaway%20-hack%20-joke%20-meme%20" \
          f"-promo%20-retweet%20-scam%20-spam%20-win%20lang%3Aen%20until%3A{end_date}%20since%3A{start_date}%20" \
          f"-filter%3Alinks%20-filter%3Areplies&src=typed_query"

    driver = create_driver()
    wait = WebDriverWait(driver, 10)

    load_dotenv()

    # 트위터 계정 정보
    email_or_phone = os.getenv('MY_EMAIL_OR_PHONE')
    username = os.getenv('MY_USERNAME')
    password = os.getenv('MY_PASSWORD')

    driver.get('https://x.com/i/flow/login')
    
    # 로그인 단계1 - 아이디/이메일 입력
    email_input = wait.until(EC.presence_of_element_located((By.NAME, 'text')))
    email_input.send_keys(email_or_phone)
    email_input.send_keys(Keys.ENTER)
    time.sleep(3)

    # 사용자명 확인 창이 나오면 처리
    try:
        username_input = driver.find_element(By.NAME, 'text')
        username_input.send_keys(username)
        username_input.send_keys(Keys.ENTER)
        time.sleep(3)
    except:
        pass  # 사용자명 확인 창이 안나오면 패스

    # 로그인 단계2 - 비밀번호 입력
    password_input = wait.until(EC.presence_of_element_located((By.NAME, 'password')))
    password_input.send_keys(password)
    password_input.send_keys(Keys.ENTER)
    time.sleep(5)

    driver.get(url)
    
    tweets = []
    
    try:
        last_height = driver.execute_script("return document.body.scrollHeight")

        while len(tweets) < max_tweets:  # 최대 트윗 수를 초과하지 않도록 제한
            elements = driver.find_elements(By.XPATH, '//div[@data-testid="tweetText"]')
            for element in elements:
                tweet_text = element.text.strip()
                if tweet_text and tweet_text not in [t["Tweet"] for t in tweets]:  # 중복 제거
                    tweets.append({"Date": start_date, "Tweet": tweet_text})  # 날짜와 함께 저장
                    print(f"[{len(tweets)}] 트윗 저장: {tweet_text}")  # 저장된 트윗 출력

                if len(tweets) >= max_tweets:  # 최대 트윗 개수 도달 시 종료
                    break

            # 스크롤 동작
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(random.uniform(10, 15))  # 랜덤 대기 시간

            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:  # 더 이상 스크롤할 내용이 없으면 종료
                print(f"{start_date} ~ {end_date} 스크롤 완료: 더 이상 새로운 데이터가 없습니다.")
                break
            last_height = new_height
        
    except Exception as e:
        print(f"Error during scraping: {e}")
    
    finally:
        driver.quit()
    
    return tweets

# 실행 코드
if __name__ == "__main__":
    keyword = 'bitcoin'
    
    # 현재 날짜와 일주일 전 날짜 계산
    current_date = datetime.today()  # 현재 날짜 설정
    start_date = current_date - timedelta(days=14)  # 2주일 전 날짜 계산

    # 날짜 리스트 생성 (하루씩 나눔)
    all_dates = [(start_date + timedelta(days=i)).strftime('%Y-%m-%d') for i in range((current_date - start_date).days)]

    all_tweets = []  # 모든 트윗을 저장할 리스트
    
    for i in range(len(all_dates) - 1):
        print(f'{all_dates[i]} 트윗을 수집 중...')
        
        tweets = scrape_tweets(keyword, all_dates[i], all_dates[i + 1], max_tweets=100)  # 하루에 최대 100개 제한
        all_tweets.extend(tweets)  # 하루치 데이터를 전체 리스트에 추가
    
        print(f"{all_dates[i]} 데이터 수집 완료.")

    print(f"총 {len(all_tweets)}개의 트윗을 수집했습니다.")
    
    # CSV 파일로 저장
    with open("tweets_with_dates.csv", mode="w", newline="", encoding="utf-8") as file:
        fieldnames = ["Date", "Tweet"]
        writer = csv.DictWriter(file, fieldnames=fieldnames)

        writer.writeheader()  # 헤더 추가
        writer.writerows(all_tweets)  # 모든 데이터를 한 번에 저장

    print("트윗 데이터가 'tweet_data.csv' 파일에 저장되었습니다!")

2025-03-07 트윗을 수집 중...
[1] 트윗 저장: Portfolio : -80%
Mental health: unstable 
Relationship : AI girlfriend

Me when Bitcoin rebounce +8% :
[2] 트윗 저장: Be bullish when everyone else is gripped by fear. 

#Bitcoin
[3] 트윗 저장: i am bullish but thats because i am always bullish
[4] 트윗 저장: ive never ever been more bullish on #bitcoin and #crypto than i am right now. and price will eventually reflect that. easiest investment hold of my life. lets get rich, buy big dips.

$btc $sol $xrp $eth $ada
[5] 트윗 저장: I’m sickeningly bullish on #bitcoin and America right now.
[6] 트윗 저장: Metaplanet is making bold moves by acquiring more Bitcoin, adding 497 BTC to its already impressive stash. This aggressive accumulation strategy reflects a significant bullish sentiment on Bitcoin's future value, which aligns with the growing optimism within the community
[7] 트윗 저장: The Biggest Bitcoin Investment Mistakes (And How to Avoid Them) 

1. Panic Selling 

Bitcoin is volatile—always has been, always will be. If you

TimeoutException: Message: 
Stacktrace:
0   chromedriver                        0x0000000104829804 cxxbridge1$str$ptr + 2785964
1   chromedriver                        0x0000000104821ddc cxxbridge1$str$ptr + 2754692
2   chromedriver                        0x0000000104375ea8 cxxbridge1$string$len + 92928
3   chromedriver                        0x00000001043bd1d0 cxxbridge1$string$len + 384552
4   chromedriver                        0x00000001043fe678 cxxbridge1$string$len + 651984
5   chromedriver                        0x00000001043b135c cxxbridge1$string$len + 335796
6   chromedriver                        0x00000001047eecd4 cxxbridge1$str$ptr + 2545532
7   chromedriver                        0x00000001047f1fa0 cxxbridge1$str$ptr + 2558536
8   chromedriver                        0x00000001047ced04 cxxbridge1$str$ptr + 2414508
9   chromedriver                        0x00000001047f2800 cxxbridge1$str$ptr + 2560680
10  chromedriver                        0x00000001047bfba0 cxxbridge1$str$ptr + 2352712
11  chromedriver                        0x000000010481245c cxxbridge1$str$ptr + 2690820
12  chromedriver                        0x00000001048125e4 cxxbridge1$str$ptr + 2691212
13  chromedriver                        0x0000000104821a50 cxxbridge1$str$ptr + 2753784
14  libsystem_pthread.dylib             0x00000001865fa06c _pthread_start + 148
15  libsystem_pthread.dylib             0x00000001865f4e2c thread_start + 8
