<a href="https://colab.research.google.com/github/sincereQK/QK-ML-Data-study/blob/main/LSTM_First_20250904.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
import matplotlib.pyplot as plt

# 1. 데이터 불러오기 및 전처리
df = pd.read_csv('20230610_20250610_일봉_000050_경방.csv')

# '일자'를 datetime 형식으로 변환하고 오름차순으로 정렬
df['일자'] = pd.to_datetime(df['일자'], format='%Y%m%d')
df = df.sort_values('일자')

# 현재가(종가) 데이터만 사용
data = df['현재가'].values.reshape(-1, 1)

# 데이터 정규화 (0과 1 사이로 스케일링)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# 학습 데이터와 테스트 데이터 분리 (80% 학습, 20% 테스트)
train_data_len = int(np.ceil(len(scaled_data) * 0.8))
train_data = scaled_data[0:train_data_len, :]
test_data = scaled_data[train_data_len - 60:, :]

# 학습 데이터셋 생성
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))


# 2. LSTM 모델 구축
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25))
model.add(Dense(units=1))

# 모델 컴파일
model.compile(optimizer='adam', loss='mean_squared_error')

# 3. 모델 학습
model.fit(x_train, y_train, batch_size=32, epochs=50)


# 4. 테스트 데이터셋 생성 및 예측
x_test = []
y_test = data[train_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)


# 5. 결과 시각화
train = df[:train_data_len]
valid = df[train_data_len:]
valid['Predictions'] = predictions

plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price KRW', fontsize=18)
plt.plot(train['일자'], train['현재가'])
plt.plot(valid['일자'], valid[['현재가', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.savefig('stock_prediction.png')

# 6. 향후 주가 예측 (마지막 60일 데이터 사용)
last_60_days = scaled_data[-60:]
x_predict = np.reshape(last_60_days, (1, last_60_days.shape[0], 1))
predicted_price = model.predict(x_predict)
predicted_price = scaler.inverse_transform(predicted_price)
print(f'다음 날 예측 주가: {predicted_price[0][0]}')