In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import os

class YahooFinanceCrawler:
    
    def __init__(self, headless=False):
        options = webdriver.ChromeOptions()
        if headless:
            options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--disable-blink-features=AutomationControlled')
        options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
        
        self.driver = webdriver.Chrome(options=options)
        self.wait = WebDriverWait(self.driver, 20)
    
    def crawl_stock_history(self, ticker, max_scroll=20):
        results = []
        
        try:
            url = f"https://finance.yahoo.com/quote/{ticker}/history/"
            self.driver.get(url)
            time.sleep(3)
            
            # 팝업 닫기
            try:
                got_it_btn = self.driver.find_element(By.XPATH, "//button[contains(text(), 'Got it')]")
                got_it_btn.click()
                time.sleep(1)
            except:
                pass
            
            # 날짜 범위 Max 설정
            try:
                date_button = self.wait.until(
                    EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Dec 12, 2024')]"))
                )
                self.driver.execute_script("arguments[0].click();", date_button)
                time.sleep(1)
                
                max_option = self.driver.find_element(By.XPATH, "//button[contains(text(), 'Max')]")
                self.driver.execute_script("arguments[0].click();", max_option)
                time.sleep(2)
            except:
                pass
            
            # 동적 스크롤
            last_date = None
            no_change_count = 0
            
            for scroll_num in range(max_scroll):
                try:
                    rows = self.driver.find_elements(By.CSS_SELECTOR, "table tbody tr")
                    if rows:
                        last_row = rows[-1]
                        current_last_date = last_row.find_elements(By.TAG_NAME, "td")[0].text
                        
                        if current_last_date == last_date:
                            no_change_count += 1
                            if no_change_count >= 3:
                                break
                        else:
                            no_change_count = 0
                            last_date = current_last_date
                except:
                    pass
                
                self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(1.5)
            
            # 데이터 파싱
            rows = self.driver.find_elements(By.CSS_SELECTOR, "table tbody tr")
            
            for row in rows:
                try:
                    cols = row.find_elements(By.TAG_NAME, "td")
                    
                    if len(cols) >= 7:
                        date_text = cols[0].text.strip()
                        open_price = cols[1].text.strip().replace(',', '')
                        high = cols[2].text.strip().replace(',', '')
                        low = cols[3].text.strip().replace(',', '')
                        close = cols[4].text.strip().replace(',', '')
                        adj_close = cols[5].text.strip().replace(',', '')
                        volume = cols[6].text.strip().replace(',', '')
                        
                        if date_text and open_price and open_price != '-' and 'Dividend' not in open_price:
                            results.append({
                                "Date": date_text,
                                "Open": float(open_price),
                                "High": float(high),
                                "Low": float(low),
                                "Close": float(close),
                                "Adj Close": float(adj_close),
                                "Volume": int(volume) if volume != '-' else 0
                            })
                except:
                    continue
        
        except:
            pass
        
        return pd.DataFrame(results)
    
    def crawl_multiple_stocks(self, stock_dict, save_dir="data/"):
        os.makedirs(save_dir, exist_ok=True)
        all_data = {}
        
        for name, ticker in stock_dict.items():
            df = self.crawl_stock_history(ticker, max_scroll=20)
            
            if not df.empty:
                df['Date'] = pd.to_datetime(df['Date'])
                df = df.sort_values('Date').reset_index(drop=True)
                
                filename = f"{save_dir}{name}_{ticker}.csv"
                df.to_csv(filename, index=False, encoding='utf-8-sig')
                
                all_data[name] = df
            
            time.sleep(3)
        
        return all_data
    
    def close(self):
        self.driver.quit()


if __name__ == "__main__":
    AI_STOCKS = {
        "NVIDIA": "NVDA",
        "Microsoft": "MSFT",
        "Google": "GOOGL",
        "Meta": "META",
        "Amazon": "AMZN",
        "AMD": "AMD",
        "Tesla": "TSLA",
        "Intel": "INTC"
    }
    
    crawler = YahooFinanceCrawler(headless=False)
    
    try:
        stock_data = crawler.crawl_multiple_stocks(AI_STOCKS, save_dir="ai_stock_data/")
        
        print("="*70)
        print("수집 결과 요약")
        print("="*70)
        
        for name, df in stock_data.items():
            start_date = df['Date'].min().strftime('%Y-%m-%d')
            end_date = df['Date'].max().strftime('%Y-%m-%d')
            start_price = df.iloc[0]['Close']
            end_price = df.iloc[-1]['Close']
            growth = ((end_price - start_price) / start_price) * 100
            
            print(f"\n[{name}]")
            print(f"  기간: {start_date} ~ {end_date}")
            print(f"  데이터: {len(df)}일")
            print(f"  시작가: ${start_price:,.2f}")
            print(f"  종가: ${end_price:,.2f}")
            print(f"  성장률: {growth:+.2f}%")
        
    finally:
        crawler.close()


수집 결과 요약

[NVIDIA]
  기간: 2024-12-13 ~ 2025-12-12
  데이터: 250일
  시작가: $134.25
  종가: $175.02
  성장률: +30.37%

[Microsoft]
  기간: 2024-12-13 ~ 2025-12-12
  데이터: 250일
  시작가: $447.27
  종가: $478.53
  성장률: +6.99%

[Google]
  기간: 2024-12-13 ~ 2025-12-12
  데이터: 250일
  시작가: $189.82
  종가: $309.29
  성장률: +62.94%

[Meta]
  기간: 2024-12-13 ~ 2025-12-12
  데이터: 250일
  시작가: $620.35
  종가: $644.23
  성장률: +3.85%

[Amazon]
  기간: 2024-12-13 ~ 2025-12-12
  데이터: 250일
  시작가: $227.46
  종가: $226.19
  성장률: -0.56%

[AMD]
  기간: 2024-12-13 ~ 2025-12-12
  데이터: 250일
  시작가: $126.91
  종가: $210.78
  성장률: +66.09%

[Tesla]
  기간: 2024-12-13 ~ 2025-12-12
  데이터: 250일
  시작가: $436.23
  종가: $458.96
  성장률: +5.21%

[Intel]
  기간: 2024-12-13 ~ 2025-12-12
  데이터: 250일
  시작가: $20.34
  종가: $37.81
  성장률: +85.89%


In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import os
import glob
import warnings

# 경고 메시지 숨기기
warnings.filterwarnings('ignore')

# 한글 폰트 설정
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

# 작업 디렉토리 설정
data_dir = r"C:\Users\pc\ai_stock_data"
os.chdir(data_dir)

# CSV 파일 리스트
csv_files = glob.glob("*.csv")
print(f"발견된 CSV 파일: {len(csv_files)}개")
for file in csv_files:
    print(f"  - {file}")

# LSTM 파라미터 설정
lookback_days = 60
forecast_days = 30

# 결과 저장용
results = {}

for csv_file in csv_files:
    try:
        df = pd.read_csv(csv_file, encoding='utf-8-sig')
        print(f"\n[{csv_file}] 처리 시작 - 총 {len(df)}개 행")
        
        min_required = lookback_days + 20
        if len(df) < min_required:
            print(f"  ⚠ 데이터 부족: {len(df)}개 행 (최소 {min_required}개 필요)")
            continue
        
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.sort_values('Date').reset_index(drop=True)
        close_prices = df['Close'].values.reshape(-1, 1)
        
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(close_prices)
        
        def create_dataset(data, lookback):
            X, y = [], []
            for i in range(lookback, len(data)):
                X.append(data[i-lookback:i, 0])
                y.append(data[i, 0])
            return np.array(X), np.array(y)
        
        X, y = create_dataset(scaled_data, lookback_days)
        train_size = int(len(X) * 0.8)
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]
        
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
        
        # 수정된 모델 구축 (Input 레이어 사용)
        model = Sequential([
            Input(shape=(lookback_days, 1)),  # 여기가 변경된 부분
            LSTM(50, return_sequences=True),
            Dropout(0.2),
            LSTM(50, return_sequences=False),
            Dropout(0.2),
            Dense(25),
            Dense(1)
        ])
        
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
        
        history = model.fit(
            X_train, y_train,
            batch_size=32,
            epochs=50,
            validation_data=(X_test, y_test),
            verbose=0
        )
        
        predictions = model.predict(X_test, verbose=0)
        predictions = scaler.inverse_transform(predictions)
        y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))
        
        last_sequence = scaled_data[-lookback_days:]
        future_predictions = []
        
        for _ in range(forecast_days):
            current_sequence = last_sequence.reshape(1, lookback_days, 1)
            next_pred = model.predict(current_sequence, verbose=0)
            future_predictions.append(next_pred[0, 0])
            last_sequence = np.append(last_sequence[1:], next_pred, axis=0)
        
        future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))
        
        current_price = close_prices[-1][0]
        predicted_price = future_predictions[-1][0]
        growth_rate = ((predicted_price - current_price) / current_price) * 100
        
        ticker = csv_file.replace('.csv', '')
        results[ticker] = {
            'current_price': current_price,
            'predicted_price': predicted_price,
            'growth_rate': growth_rate,
            'data_points': len(df),
            'train_loss': history.history['loss'][-1],
            'val_loss': history.history['val_loss'][-1]
        }
        
        print(f"  ✓ 완료 - 현재가: ${current_price:.2f}, 예상가({forecast_days}일 후): ${predicted_price:.2f}, 성장률: {growth_rate:+.2f}%")
        
        plt.figure(figsize=(14, 6))
        
        plt.subplot(1, 2, 1)
        test_dates = df['Date'].iloc[-len(y_test_actual):].values
        plt.plot(test_dates, y_test_actual, label='실제 가격', linewidth=2)
        plt.plot(test_dates, predictions, label='LSTM 예측', linewidth=2, linestyle='--')
        plt.title(f'{ticker} - 테스트 데이터 예측 정확도')
        plt.xlabel('날짜')
        plt.ylabel('주가 (USD)')
        plt.legend()
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
        
        plt.subplot(1, 2, 2)
        last_30_days = df['Date'].iloc[-30:].values
        last_30_prices = close_prices[-30:]
        future_dates = pd.date_range(start=df['Date'].iloc[-1], periods=forecast_days+1, freq='D')[1:]
        
        plt.plot(last_30_days, last_30_prices, label='과거 30일 실제 가격', linewidth=2, color='blue')
        plt.plot(future_dates, future_predictions, label=f'향후 {forecast_days}일 예측', linewidth=2, color='red', linestyle='--')
        plt.axhline(y=current_price, color='gray', linestyle=':', alpha=0.5)
        plt.title(f'{ticker} - 향후 {forecast_days}일 주가 예측')
        plt.xlabel('날짜')
        plt.ylabel('주가 (USD)')
        plt.legend()
        plt.xticks(rotation=45)
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(f'{ticker}_LSTM_분석.png', dpi=300, bbox_inches='tight')
        plt.close()
        
    except Exception as e:
        print(f"  ✗ 오류: {str(e)}")

print("\n" + "="*80)
print("LSTM 시계열 분석 결과 요약")
print("="*80)

if results:
    results_df = pd.DataFrame.from_dict(results, orient='index')
    results_df = results_df.sort_values('growth_rate', ascending=False)
    
    print(f"\n총 {len(results)}개 종목 분석 완료\n")
    print(results_df.to_string())
    
    results_df.to_csv('LSTM_분석_결과_요약.csv', encoding='utf-8-sig')
    print(f"\n결과 저장: LSTM_분석_결과_요약.csv")
    
    print(f"\n[ 향후 {forecast_days}일 예상 성장률 TOP 3 ]")
    for idx, (ticker, row) in enumerate(results_df.head(3).iterrows(), 1):
        print(f"{idx}. {ticker}: {row['growth_rate']:+.2f}% (${row['current_price']:.2f} → ${row['predicted_price']:.2f})")
else:
    print("\n분석 가능한 데이터가 없습니다.")


발견된 CSV 파일: 10개
  - AI주_기간별_성장률_분석.csv
  - Amazon_AMZN.csv
  - AMD_AMD.csv
  - Google_GOOGL.csv
  - Intel_INTC.csv
  - LSTM_분석_결과_요약.csv
  - Meta_META.csv
  - Microsoft_MSFT.csv
  - NVIDIA_NVDA.csv
  - Tesla_TSLA.csv

[AI주_기간별_성장률_분석.csv] 처리 시작 - 총 8개 행
  ⚠ 데이터 부족: 8개 행 (최소 80개 필요)

[Amazon_AMZN.csv] 처리 시작 - 총 250개 행
  ✓ 완료 - 현재가: $226.19, 예상가(30일 후): $213.21, 성장률: -5.74%

[AMD_AMD.csv] 처리 시작 - 총 250개 행
  ✓ 완료 - 현재가: $210.78, 예상가(30일 후): $147.87, 성장률: -29.85%

[Google_GOOGL.csv] 처리 시작 - 총 250개 행
  ✓ 완료 - 현재가: $309.29, 예상가(30일 후): $281.79, 성장률: -8.89%

[Intel_INTC.csv] 처리 시작 - 총 250개 행
  ✓ 완료 - 현재가: $37.81, 예상가(30일 후): $43.83, 성장률: +15.92%

[LSTM_분석_결과_요약.csv] 처리 시작 - 총 8개 행
  ⚠ 데이터 부족: 8개 행 (최소 80개 필요)

[Meta_META.csv] 처리 시작 - 총 250개 행
  ✓ 완료 - 현재가: $644.23, 예상가(30일 후): $714.60, 성장률: +10.92%

[Microsoft_MSFT.csv] 처리 시작 - 총 250개 행
  ✓ 완료 - 현재가: $478.53, 예상가(30일 후): $492.56, 성장률: +2.93%

[NVIDIA_NVDA.csv] 처리 시작 - 총 250개 행
  ✓ 완료 - 현재가: $175.02, 예상가(30일 후): $191.11, 성장률: +9.19%

[Tesla_TS

In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

data_dir = r"C:\Users\pc\ai_stock_data"
os.chdir(data_dir)

csv_files = [f for f in glob.glob("*.csv") if not f.startswith('LSTM_') and not f.startswith('AI주_')]

periods = {'1개월': 30, '3개월': 90, '6개월': 180, '1년': 250}
growth_data = []

for csv_file in csv_files:
    df = pd.read_csv(csv_file, encoding='utf-8-sig')
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date').reset_index(drop=True)
    
    ticker = csv_file.replace('.csv', '')
    company_name = ticker.split('_')[0]
    current_price = df['Close'].iloc[-1]
    
    period_growth = {'종목명': company_name, '티커': ticker.split('_')[1], '현재가': current_price}
    
    for period_name, days in periods.items():
        if len(df) >= days:
            past_price = df['Close'].iloc[-days]
            period_growth[period_name] = ((current_price - past_price) / past_price) * 100
        else:
            period_growth[period_name] = None
    
    growth_data.append(period_growth)

growth_df = pd.DataFrame(growth_data)
growth_df.to_csv('AI주_기간별_성장률_분석.csv', index=False, encoding='utf-8-sig')

# 시각화 1: 기간별 성장률 비교
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('AI 관련 주식 단기간 성장률 종합 분석', fontsize=18, fontweight='bold')

period_list = list(periods.keys())
for idx, period_name in enumerate(period_list):
    row, col = idx // 2, idx % 2
    ax = axes[row, col]
    
    plot_data = growth_df[['종목명', period_name]].dropna().sort_values(period_name, ascending=True)
    colors = ['#FF6B6B' if x < 0 else '#4ECDC4' for x in plot_data[period_name]]
    
    bars = ax.barh(plot_data['종목명'], plot_data[period_name], color=colors, alpha=0.8, edgecolor='black')
    
    for i, (value, bar) in enumerate(zip(plot_data[period_name], bars)):
        ax.text(value + (2 if value > 0 else -2), i, f'{value:+.1f}%', 
                va='center', ha='left' if value > 0 else 'right', fontsize=10, fontweight='bold')
    
    ax.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
    ax.set_xlabel('성장률 (%)', fontsize=11, fontweight='bold')
    ax.set_title(f'{period_name} 성장률', fontsize=13, fontweight='bold')
    ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('AI주_기간별_성장률_종합비교.png', dpi=300, bbox_inches='tight')
plt.close()

# 시각화 2: 히트맵
plt.figure(figsize=(12, 8))
heatmap_data = growth_df.set_index('종목명')[period_list]
sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn', center=0, 
            cbar_kws={'label': '성장률 (%)'}, linewidths=1, linecolor='black',
            vmin=-50, vmax=50)
plt.title('AI 관련 주식 기간별 성장률 히트맵', fontsize=16, fontweight='bold', pad=20)
plt.xlabel('분석 기간', fontsize=12, fontweight='bold')
plt.ylabel('종목', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.savefig('AI주_성장률_히트맵.png', dpi=300, bbox_inches='tight')
plt.close()

# 시각화 3: 추세선
plt.figure(figsize=(14, 8))
for _, row in growth_df.iterrows():
    values = [row[period] for period in period_list if row[period] is not None]
    x_positions = [i for i, period in enumerate(period_list) if row[period] is not None]
    plt.plot(x_positions, values, marker='o', linewidth=2.5, markersize=8, label=row['종목명'])

plt.xticks(range(len(period_list)), period_list, fontsize=11)
plt.xlabel('분석 기간', fontsize=12, fontweight='bold')
plt.ylabel('성장률 (%)', fontsize=12, fontweight='bold')
plt.title('AI 관련 주식 기간별 성장률 추세', fontsize=16, fontweight='bold')
plt.axhline(y=0, color='black', linestyle='--', linewidth=0.8, alpha=0.5)
plt.legend(loc='best', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('AI주_성장률_추세선.png', dpi=300, bbox_inches='tight')
plt.close()

# 요약 출력
print("="*60)
print("AI 관련 주식 단기간 성장률 분석 완료")
print("="*60)
print(f"\n분석 종목: {len(growth_df)}개")
print("\n[ 기간별 평균 성장률 ]")
for period_name in period_list:
    avg = growth_df[period_name].mean()
    print(f"  {period_name}: {avg:+.2f}%")

print(f"\n결과 파일:")
print(f"  - AI주_기간별_성장률_분석.csv")
print(f"  - AI주_기간별_성장률_종합비교.png")
print(f"  - AI주_성장률_히트맵.png")
print(f"  - AI주_성장률_추세선.png")
print("="*60)


AI 관련 주식 단기간 성장률 분석 완료

분석 종목: 8개

[ 기간별 평균 성장률 ]
  1개월: -5.23%
  3개월: +23.49%
  6개월: +57.53%
  1년: +32.60%

결과 파일:
  - AI주_기간별_성장률_분석.csv
  - AI주_기간별_성장률_종합비교.png
  - AI주_성장률_히트맵.png
  - AI주_성장률_추세선.png
