In [1]:
!pip install finance-datareader



In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, concatenate

In [3]:
import pandas as pd
import numpy as np
import FinanceDataReader as fdr
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv1D, Flatten, BatchNormalization, Dropout
from sklearn.metrics import mean_squared_error

# 주식 데이터 불러오기
# '373220'
# stock_num_li = ['005930', '000660', '207940', '005935', '005380', '005490', '000270', '051910', '035420', '006400', '068270', '105560', '012330', '003670', '028260', '055550', '066570', '035720', '032830', '096770', '003550', '086790', '000810','033780', '017670', '015760', '138040', '034730', '009150', '018260', '329180', '010130', '047050', '323410', '316140', '024110', '034020', '352820']
stock_num_li =['323410']
for stock_num in stock_num_li:
    stock_code = stock_num  # 주식 코드 (066570 주식)
    start_date = '1995-01-01'  # 데이터 수집 시작 날짜
    end_date = '2022-12-31'   # 데이터 수집 종료 날짜
    stock_data = fdr.DataReader(stock_code, start=start_date, end=end_date)

    # 5일 이동평균
    stock_data['ma5'] = stock_data['Close'].rolling(window=5).mean()

    # 10일 이동평균
    stock_data['ma10'] = stock_data['Close'].rolling(window=10).mean()

    # 20일 이동평균
    stock_data['ma20'] = stock_data['Close'].rolling(window=20).mean()

    # KOSPI data
    kospi_data = fdr.DataReader('KS11', start=start_date, end=end_date)
    kospi_data = kospi_data['Close'].rename('Kospi')

    # 코스피 추가
    stock_data = stock_data.merge(kospi_data, left_on=stock_data.index, right_on=kospi_data.index, suffixes=('', '_Kospi'))
    stock_data.set_index('key_0', inplace=True)
    stock_data.index.name = 'Date'

    # Load S&P 500 data
    sp500_data = fdr.DataReader('US500', start=start_date, end=end_date)
    sp500_data = sp500_data['Close'].rename('S&P500')

    # S&P 500 추가
    stock_data = stock_data.merge(sp500_data, left_on=stock_data.index, right_on=sp500_data.index, suffixes=('', 'S&P500'))
    stock_data.set_index('key_0', inplace=True)
    stock_data.index.name = 'Date'

    # RSI 계산을 위한 함수
    def calculate_rsi(data, period=14):
        delta = data['Close'].diff(1)
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)

        avg_gain = gain.rolling(window=period).mean()
        avg_loss = loss.rolling(window=period).mean()

        rs = avg_gain / avg_loss
        rsi = 100 - (100 / (1 + rs))
        
        return rsi

    # 14일 RSI
    stock_data['rsi'] = calculate_rsi(stock_data)

    # %K 계산을 위한 함수
    def calculate_stochastic_k(data, period=14):
        lowest_low = data['Low'].rolling(window=period).min()
        highest_high = data['High'].rolling(window=period).max()
        stochastic_k = (data['Close'] - lowest_low) / (highest_high - lowest_low) * 100
        
        return stochastic_k

    # %D 계산을 위한 함수
    def calculate_stochastic_d(data, period=3):
        stochastic_k = calculate_stochastic_k(data)
        stochastic_d = stochastic_k.rolling(window=period).mean()
        
        return stochastic_d

    # 주요 값 추가
    stock_data['%K'] = calculate_stochastic_k(stock_data)
    stock_data['%D'] = calculate_stochastic_d(stock_data)

    # 볼린저 밴드 계산을 위한 함수
    def calculate_bollinger_bands(data, window=20):
        sma = data['Close'].rolling(window=window).mean()
        rolling_std = data['Close'].rolling(window=window).std()

        upper_band = sma + (rolling_std * 2)
        lower_band = sma - (rolling_std * 2)

        return upper_band, sma, lower_band

    # 볼린저 밴드 값 추가
    stock_data['bb_upper'], stock_data['bb_sma'], stock_data['bb_lower'] = calculate_bollinger_bands(stock_data)

    # 거래량 이동평균
    stock_data['volume_ma5'] = stock_data['Volume'].rolling(window=5).mean()

    # 모멘텀 (Close 가격의 5일 차)
    stock_data['momentum'] = stock_data['Close'].diff(5)

    # 최고가와 최저가 차이
    stock_data['high_low_diff'] = stock_data['High'] - stock_data['Low']
    stock_data.dropna(inplace=True)
    # MinMax 스케일링을 위한 객체 생성
    scaler = MinMaxScaler()

    stock_data['Date'] = stock_data.index
    # stock_data 데이터프레임에서 'Date' 컬럼을 추출
    date_column = stock_data['Date']

    # 'Date' 컬럼을 데이터프레임에서 삭제
    stock_data = stock_data.drop(columns=['Date'])

    # 'Date' 컬럼을 데이터프레임의 첫 번째 열로 추가
    stock_data.insert(0, 'Date', date_column)


    # 정규화된 데이터 출력
    print(stock_data)

    # # CSV 파일로 저장할 경로와 파일 이름 정의
    output_file = f'../stock_code_learning/high50data/{stock_num}.csv'

    # # 정규화된 데이터프레임을 CSV 파일로 저장
    stock_data.to_csv(output_file, index=False)

    print(f'정규화된 주식 데이터를 {output_file} 파일로 저장했습니다.')

                 Date   Open   High    Low  Close     Volume    Change  \
Date                                                                     
2021-09-03 2021-09-03  82900  83000  80200  80800    3987547 -0.013431   
2021-09-07 2021-09-07  77300  77300  72600  73000    6001690 -0.056848   
2021-09-08 2021-09-08  73400  75800  71400  72300    5849714 -0.009589   
2021-09-09 2021-09-09  71300  73000  70300  72000    5748242 -0.004149   
2021-09-10 2021-09-10  71000  72400  68100  68900    6750339 -0.043056   
...               ...    ...    ...    ...    ...        ...       ...   
2022-12-22 2022-12-22  26700  26800  26200  26350  131020654  0.001901   
2022-12-23 2022-12-23  26000  26200  25050  25100    2382779 -0.047438   
2022-12-27 2022-12-27  25100  25850  25100  25800    1518799  0.027888   
2022-12-28 2022-12-28  25500  25650  24900  25050    1716525 -0.029070   
2022-12-29 2022-12-29  24850  25400  23950  24300    1906638 -0.029940   

                ma5     ma10     ma20

In [31]:
# stock_data 출력
# print(stock_data)


df = fdr.DataReader('KS11', '1999-08-02', '2021-12-30')
print(df['Open'])



Date
1999-08-02   1999-08-02
1999-08-03   1999-08-03
1999-08-04   1999-08-04
1999-08-05   1999-08-05
1999-08-06   1999-08-06
                ...    
2021-12-24   2021-12-24
2021-12-27   2021-12-27
2021-12-28   2021-12-28
2021-12-29   2021-12-29
2021-12-30   2021-12-30
Name: Date, Length: 5537, dtype: datetime64[ns]
Date
1999-08-02     3770
1999-08-03     3800
1999-08-04     3890
1999-08-05     3720
1999-08-06     3660
              ...  
2021-12-24    80200
2021-12-27    80600
2021-12-28    80200
2021-12-29    80200
2021-12-30    78900
Name: Open, Length: 5537, dtype: int64
Date
1999-08-02     3860
1999-08-03     3920
1999-08-04     3920
1999-08-05     3800
1999-08-06     3740
              ...  
2021-12-24    80800
2021-12-27    80600
2021-12-28    80400
2021-12-29    80200
2021-12-30    79500
Name: High, Length: 5537, dtype: int64
Date
1999-08-02     3630
1999-08-03     3730
1999-08-04     3800
1999-08-05     3620
1999-08-06     3480
              ...  
2021-12-24    80200
2021-12-27

In [8]:
stock_code = '373220'  # 주식 코드 (066570 주식)
start_date = '1995-01-01'  # 데이터 수집 시작 날짜
end_date = '2021-12-31'   # 데이터 수집 종료 날짜
stock_data = fdr.DataReader(stock_code, start=start_date, end=end_date)
print(stock_data)

"027940" invalid symbol or has no data
Empty DataFrame
Columns: []
Index: []
