모듈 ∙ 라이브러리 import

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
tf.config.experimental.set_visible_devices([], 'GPU')
# tf.config.experimental.list_physical_devices('GPU')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

데이터 불러오기 ∙ 전처리

In [2]:
def filter_df(df): #데이터프레임 필터링
    # 첫 번째 열에서 같은 값을 가진 행의 수를 계산합니다.
    row_counts = df['0'].value_counts()

    # 가장 많은 행의 수를 찾습니다.
    max_row_count = row_counts.max()

    # 가장 많은 행의 수에 해당하는 행만 분류합니다.
    filtered = pd.DataFrame(df[df['0'].isin(row_counts[row_counts == max_row_count].index)])

    return filtered

# 데이터 로드   
stock_df = pd.read_csv('/Users/moon/Desktop/Moon SeungHoo/Stock_Machine_Learning/StockData_5%_test.csv',low_memory=False)

#데이터 필터링
filter_stock = filter_df(stock_df)
filter_label = filter_stock['24']

# 불필요한 데이터 삭제
filter_stock = filter_stock.drop({'0','1','7','24'},axis=1) #날자, 상승율, 5%이상 상승여부 삭제 

데이터 전처리

In [3]:
scaler = MinMaxScaler()
stock_label = scaler.fit_transform(filter_label.values.reshape(-1, 1))

count_zeros = np.sum(stock_label == 0)
count_ones = np.sum(stock_label == 1)
total_samples = len(stock_label)

ratio_zeros = (count_zeros / total_samples).round(2)
ratio_ones = (count_ones / total_samples).round(2)

print("Ratio of zeros (0s):", ratio_zeros)
print("Ratio of ones (1s):", ratio_ones)

Ratio of zeros (0s): 0.95
Ratio of ones (1s): 0.05


데이터 분할

In [4]:
# 훈련 및 테스트 데이터로 분할
X_train, X_test, y_train, y_test = train_test_split(filter_stock, stock_label, test_size=0.2, random_state=42)

under_sampler = RandomUnderSampler(sampling_strategy='auto' , random_state=42)
X_resampled, y_resampled = under_sampler.fit_resample(X_train, y_train)
X_test_resampled, y_test_resampled = under_sampler.fit_resample(X_test, y_test)

# smote = SMOTE(sampling_strategy=0.7 ,random_state=42)
# X_train_resampled, y_train_resampled = smote.fit_resample(X_train,y_train)
# X_test_resampled, y_test_resampled = smote.fit_resample(X_test,y_test)

count_zeros = np.sum(y_resampled == 0)
count_ones = np.sum(y_resampled == 1)
total_samples = len(y_resampled)

ratio_zeros = (count_zeros / total_samples).round(2)
ratio_ones = (count_ones / total_samples).round(2)

print("Ratio of zeros (0s):", ratio_zeros)
print("Ratio of ones (1s):", ratio_ones)

Ratio of zeros (0s): 0.5
Ratio of ones (1s): 0.5


GRU 모델 정의

In [5]:
# GRU 모델 정의
tf.random.set_seed(42)
model = tf.keras.Sequential()
model.add(tf.keras.layers.GRU(32, input_shape=(X_resampled.shape[1],1),kernel_initializer='glorot_uniform', recurrent_dropout=0.0 ,return_sequences=True))
model.add(tf.keras.layers.GRU(32, return_sequences=True))
model.add(tf.keras.layers.GRU(32, return_sequences=True))
model.add(tf.keras.layers.GRU(32, return_sequences=True))
model.add(tf.keras.layers.GRU(32, return_sequences=True))
model.add(tf.keras.layers.GRU(32, return_sequences=True))
model.add(tf.keras.layers.GRU(32, return_sequences=True))
model.add(tf.keras.layers.GRU(32, return_sequences=True))
model.add(tf.keras.layers.GRU(32, return_sequences=False))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 조기 종료 콜백 정의
early_stopping = EarlyStopping(monitor='accuracy', patience=30, restore_best_weights=True)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 25, 32)            3360      
                                                                 
 gru_1 (GRU)                 (None, 25, 32)            6336      
                                                                 
 gru_2 (GRU)                 (None, 25, 32)            6336      
                                                                 
 gru_3 (GRU)                 (None, 25, 32)            6336      
                                                                 
 gru_4 (GRU)                 (None, 25, 32)            6336      
                                                                 
 gru_5 (GRU)                 (None, 25, 32)            6336      
                                                                 
 gru_6 (GRU)                 (None, 25, 32)            6

모델 훈련

In [6]:
# 모델 훈련
model.fit(X_resampled,y_resampled,epochs=500,
validation_data=(X_test_resampled,y_test_resampled),callbacks=[early_stopping])
 
# 모델 저장
model.save("GRU_Model_9L_32_5%.h5")

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

  saving_api.save_model(


모델 평가

In [7]:
# 모델 로드
loaded_model = tf.keras.models.load_model("GRU_Model_9L_32_5%.h5")

# 모델을 사용하여 주가 상승 여부 예측
test = loaded_model.predict(X_test)



In [8]:
y_pred_binary = (test > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred_binary)
conf_matrix = confusion_matrix(y_test, y_pred_binary)
classification_rep = classification_report(y_test, y_pred_binary)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(classification_rep)

Accuracy: 0.8915185824650407
Confusion Matrix:
[[183115  22487]
 [   949   9486]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.99      0.89      0.94    205602
         1.0       0.30      0.91      0.45     10435

    accuracy                           0.89    216037
   macro avg       0.65      0.90      0.69    216037
weighted avg       0.96      0.89      0.92    216037



In [9]:
# 예측 결과를 이진값(0 또는 1)으로 변환
binary_predictions = (test > 0.5).astype(int)

# 예측 결과 출력
correct_predictions = np.equal(binary_predictions, y_test)  # 정확하게 예측한 경우 True, 그렇지 않으면 False
accuracy = np.mean(correct_predictions)  # 정확도 계산

print(f"정확도: {accuracy * 100:.2f}%")

정확도: 89.15%
