In [74]:
!pip install finance-datareader



In [75]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, concatenate

In [76]:
import pandas as pd
import numpy as np
import FinanceDataReader as fdr
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv1D, Flatten, BatchNormalization, Dropout
from sklearn.metrics import mean_squared_error

# 주식 데이터 불러오기
stock_code = '066570'  # 주식 코드 (066570 주식)
start_date = '2015-01-01'  # 데이터 수집 시작 날짜
end_date = '2019-12-31'   # 데이터 수집 종료 날짜
stock_data = fdr.DataReader(stock_code, start=start_date, end=end_date)

# 5일 이동평균
stock_data['ma5'] = stock_data['Close'].rolling(window=5).mean()

# 10일 이동평균
stock_data['ma10'] = stock_data['Close'].rolling(window=10).mean()

# 20일 이동평균
stock_data['ma20'] = stock_data['Close'].rolling(window=20).mean()

# RSI 계산을 위한 함수
def calculate_rsi(data, period=14):
    delta = data['Close'].diff(1)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    avg_gain = gain.rolling(window=period).mean()
    avg_loss = loss.rolling(window=period).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    
    return rsi

# 14일 RSI
stock_data['rsi'] = calculate_rsi(stock_data)

# %K 계산을 위한 함수
def calculate_stochastic_k(data, period=14):
    lowest_low = data['Low'].rolling(window=period).min()
    highest_high = data['High'].rolling(window=period).max()
    stochastic_k = (data['Close'] - lowest_low) / (highest_high - lowest_low) * 100
    
    return stochastic_k

# %D 계산을 위한 함수
def calculate_stochastic_d(data, period=3):
    stochastic_k = calculate_stochastic_k(data)
    stochastic_d = stochastic_k.rolling(window=period).mean()
    
    return stochastic_d

# 주요 값 추가
stock_data['%K'] = calculate_stochastic_k(stock_data)
stock_data['%D'] = calculate_stochastic_d(stock_data)

# 볼린저 밴드 계산을 위한 함수
def calculate_bollinger_bands(data, window=20):
    sma = data['Close'].rolling(window=window).mean()
    rolling_std = data['Close'].rolling(window=window).std()

    upper_band = sma + (rolling_std * 2)
    lower_band = sma - (rolling_std * 2)

    return upper_band, sma, lower_band

# 볼린저 밴드 값 추가
stock_data['bb_upper'], stock_data['bb_sma'], stock_data['bb_lower'] = calculate_bollinger_bands(stock_data)

# 거래량 이동평균
stock_data['volume_ma5'] = stock_data['Volume'].rolling(window=5).mean()

# 모멘텀 (Close 가격의 5일 차)
stock_data['momentum'] = stock_data['Close'].diff(5)

# 최고가와 최저가 차이
stock_data['high_low_diff'] = stock_data['High'] - stock_data['Low']
stock_data.dropna(inplace=True)
# MinMax 스케일링을 위한 객체 생성
scaler = MinMaxScaler()

# 정규화할 컬럼 선택 (예: 종가, 시작가, 거래량제외, 최고가, 최저가, 이동평균, RSI, %K, %D, 볼린저 밴드, 거래량 이동평균, 모멘텀, 최고가와 최저가 차이)
columns_to_normalize = ['Close', 'Open', 'High', 'Low', 'ma5', 'ma10', 'ma20', 'rsi', '%K', '%D', 'bb_upper', 'bb_sma', 'bb_lower', 'volume_ma5', 'momentum', 'high_low_diff']

# 선택한 컬럼을 정규화
stock_data[columns_to_normalize] = scaler.fit_transform(stock_data[columns_to_normalize])

# 거래량이 10억 이상인 행 찾기
high_volume_rows = stock_data[stock_data['Volume'] >= 20000]

# 찾은 행과 그 앞의 10개 행 선택
selected_indices = []
for index in high_volume_rows.index:
    index_loc = stock_data.index.get_loc(index)
    if index_loc >= 10:  # 10 이전의 행이 있는지 확인
        selected_indices.extend(list(range(index_loc - 10, index_loc + 1)))

# 선택된 인덱스로 데이터프레임 업데이트
stock_data = stock_data.iloc[selected_indices]

# 거래량을 정규화할 컬럼 선택
volume_column = ['Volume']

# 거래량 컬럼을 정규화
stock_data[volume_column] = scaler.fit_transform(stock_data[volume_column])

# 상한가, 하한가를 넘어서게 변화한 데이터 삭제
stock_data = stock_data[(stock_data['Change'] >= -0.3) & (stock_data['Change'] <= 0.3)]

# 정규화된 데이터 출력
print(stock_data)

# # CSV 파일로 저장할 경로와 파일 이름 정의
# output_file = '15_20_066570.csv'

# # 정규화된 데이터프레임을 CSV 파일로 저장
# stock_data.to_csv(output_file, index=False)

# print(f'정규화된 주식 데이터를 {output_file} 파일로 저장했습니다.')

                Open      High       Low     Close    Volume    Change  \
Date                                                                     
2015-01-29  0.333783  0.323470  0.317992  0.309362  0.084334 -0.014173   
2015-01-30  0.313554  0.301950  0.297071  0.284939  0.166516 -0.028754   
2015-02-02  0.289278  0.279085  0.290098  0.280868  0.077978 -0.004934   
2015-02-03  0.285233  0.277740  0.290098  0.278155  0.068692 -0.003306   
2015-02-04  0.289278  0.287155  0.295676  0.291723  0.089672  0.016584   
...              ...       ...       ...       ...       ...       ...   
2019-12-23  0.448415  0.436449  0.454672  0.443691  0.008099  0.000000   
2019-12-24  0.447067  0.437794  0.454672  0.435550  0.002450 -0.008276   
2019-12-26  0.440324  0.431069  0.444909  0.427408  0.025861 -0.008345   
2019-12-27  0.430883  0.429724  0.443515  0.434193  0.045281  0.007013   
2019-12-30  0.437626  0.431069  0.450488  0.438263  0.012101  0.004178   

                 ma5      ma10      m

In [77]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv1D, Flatten
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# 데이터 로드 및 전처리
# (위에서 주신 데이터 및 전처리 과정 사용)

# 데이터를 입력(X)와 출력(y)로 분할
X = stock_data.drop('Close', axis=1).values  # 종가를 제외한 모든 특성을 입력으로 사용
y = stock_data['Close'].values  # 종가를 출력으로 사용

# 학습 및 테스트 데이터로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터를 3D 형태로 변환 (samples, timesteps, features)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
print(X_train)
print(X_test)

[[[0.10316925 0.09885676 0.09414226 ... 0.31771928 0.61392405 0.1039604 ]]

 [[0.16250843 0.16341627 0.16317992 ... 0.22286554 0.58227848 0.08910891]]

 [[0.20161834 0.20511096 0.19804742 ... 0.15101879 0.44303797 0.14851485]]

 ...

 [[0.19892111 0.19300605 0.19525802 ... 0.13566702 0.51476793 0.07920792]]

 [[0.22454484 0.21452589 0.22315202 ... 0.13700715 0.45147679 0.03960396]]

 [[0.75859744 0.78480161 0.77405858 ... 0.61562411 0.82278481 0.32673267]]]
[[[0.52124073 0.5198386  0.53138075 ... 0.19498188 0.61181435 0.0990099 ]]

 [[0.3054619  0.30598521 0.31380753 ... 0.2216254  0.26582278 0.06930693]]

 [[0.33108564 0.32078009 0.31799163 ... 0.20215167 0.47679325 0.14851485]]

 ...

 [[0.09710047 0.10154674 0.09902371 ... 0.27567456 0.33755274 0.08910891]]

 [[0.15576534 0.16341627 0.15620642 ... 0.40855983 0.46413502 0.13861386]]

 [[0.85839514 0.84532616 0.85355649 ... 0.12460478 0.46835443 0.20792079]]]


In [81]:
class StockModelEnsemble:
    def __init__(self, X_train, y_train, X_test, y_test):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

    def build_lstm_model(self):
        model = Sequential()
        model.add(LSTM(50, input_shape=(1, self.X_train.shape[2]))
        model.add(Dense(1, activation='linear'))

        model.compile(optimizer='adam', loss='mean_squared_error')
        model.fit(self.X_train, self.y_train, epochs=50, batch_size=32, validation_data=(self.X_test, self.y_test))
        return model

    def build_dnn_model(self):
        model = Sequential()
        model.add(Dense(64, input_shape=(1, self.X_train.shape[2]), activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(1, activation='linear'))

        model.compile(optimizer='adam', loss='mean_squared_error')
        model.fit(self.X_train, self.y_train, epochs=50, batch_size=32, validation_data=(self.X_test, self.y_test))
        return model

    def build_cnn_model(self):
        model = Sequential()
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(1, self.X_train.shape[2]))
        model.add(Flatten())
        model.add(Dense(1, activation='linear'))

        model.compile(optimizer='adam', loss='mean_squared_error')
        model.fit(self.X_train, self.y_train, epochs=50, batch_size=32, validation_data=(self.X_test, self.y_test))
        return model

    def predict(self, model, X):
        return model.predict(X)

# 모델 초기화 및 학습
stock_ensemble = StockModelEnsemble(X_train, y_train, X_test, y_test)
lstm_model = stock_ensemble.build_lstm_model()
dnn_model = stock_ensemble.build_dnn_model()
cnn_model = stock_ensemble.build_cnn_model()

# 각 모델로 예측 생성
lstm_predictions = stock_ensemble.predict(lstm_model, X_test)
dnn_predictions = stock_ensemble.predict(dnn_model, X_test)
cnn_predictions = stock_ensemble.predict(cnn_model, X_test)


SyntaxError: invalid syntax (3278938477.py, line 11)

In [None]:
import numpy as np

class EnsembleModel:
    def __init__(self, models):
        self.models = models

    def predict(self, X):
        # 모든 모델의 예측을 생성
        predictions = [model.predict(X) for model in self.models]
        # 예측을 평균
        ensemble_predictions = np.mean(predictions, axis=0)
        return ensemble_predictions

# 모든 모델을 리스트에 넣어서 앙상블 모델을 생성
models = [lstm_model, dnn_model, cnn_model]
ensemble = EnsembleModel(models)

# 테스트 데이터로 앙상블 모델의 예측 생성
X_test_reshaped = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
ensemble_predictions = ensemble.predict(X_test_reshaped)

# 평균 앙상블의 예측 결과 출력
print(ensemble_predictions)
