모듈 ∙ 라이브러리 import

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
tf.config.experimental.set_visible_devices([], 'GPU')
# tf.config.experimental.list_physical_devices('GPU')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

데이터 불러오기 ∙ 전처리

In [2]:
def filter_df(df): #데이터프레임 필터링
    # 첫 번째 열에서 같은 값을 가진 행의 수를 계산합니다.
    row_counts = df['Ticker'].value_counts()

    # 가장 많은 행의 수를 찾습니다.
    max_row_count = row_counts.max()

    # 가장 많은 행의 수에 해당하는 행만 분류합니다.
    filtered = pd.DataFrame(df[df['Ticker'].isin(row_counts[row_counts == max_row_count].index)])

    return filtered

# 데이터 로드   
stock_df = pd.read_csv('/Users/moon/Desktop/Moon SeungHoo/Stock_Machine_Learning/StockData_3%_test.csv',low_memory=False)

#데이터 필터링
filter_stock = filter_df(stock_df)
filter_label = filter_stock['Label']

# 불필요한 데이터 삭제
filter_stock = filter_stock.drop({'Ticker','Date','Change','Label'},axis=1) #날자, 상승율, 5%이상 상승여부 삭제 

데이터 전처리

In [3]:
scaler = MinMaxScaler()
stock_label = scaler.fit_transform(filter_label.values.reshape(-1, 1))

count_zeros = np.sum(stock_label == 0)
count_ones = np.sum(stock_label == 1)
total_samples = len(stock_label)

ratio_zeros = (count_zeros / total_samples).round(2)
ratio_ones = (count_ones / total_samples).round(2)

print("Ratio of zeros (0s):", ratio_zeros)
print("Ratio of ones (1s):", ratio_ones)

Ratio of zeros (0s): 0.79
Ratio of ones (1s): 0.21


데이터 분할

In [4]:
# 훈련 및 테스트 데이터로 분할
X_train, X_test, y_train, y_test = train_test_split(filter_stock, stock_label, test_size=0.2, random_state=42)

# under_sampler = RandomUnderSampler(sampling_strategy='auto' , random_state=42)
# X_resampled, y_resampled = under_sampler.fit_resample(X_train, y_train)
# X_test_resampled, y_test_resampled = under_sampler.fit_resample(X_test, y_test)

smote = SMOTE(sampling_strategy='auto' ,random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train,y_train)
X_test_resampled, y_test_resampled = smote.fit_resample(X_test,y_test)

count_zeros = np.sum(y_train_resampled == 0)
count_ones = np.sum(y_train_resampled == 1)
total_samples = len(y_train_resampled)

ratio_zeros = (count_zeros / total_samples).round(2)
ratio_ones = (count_ones / total_samples).round(2)

print("Ratio of zeros (0s):", ratio_zeros)
print("Ratio of ones (1s):", ratio_ones)

Ratio of zeros (0s): 0.5
Ratio of ones (1s): 0.5


GRU 모델 정의

In [5]:
# GRU 모델 정의
model = tf.keras.Sequential()
model.add(tf.keras.layers.GRU(128, kernel_initializer='random_uniform',input_shape=(X_train_resampled.shape[1],1),return_sequences=True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.GRU(128, kernel_initializer='random_uniform',return_sequences=True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.GRU(64, kernel_initializer='random_uniform',return_sequences=True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.GRU(64, kernel_initializer='random_uniform',return_sequences=True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.GRU(32, kernel_initializer='random_uniform',return_sequences=True))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.GRU(32, kernel_initializer='random_uniform',return_sequences=False))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(1, kernel_initializer='random_uniform',activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 조기 종료 콜백 정의
early_stopping = EarlyStopping(monitor='accuracy', patience=10, restore_best_weights=True)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 35, 128)           50304     
                                                                 
 dropout (Dropout)           (None, 35, 128)           0         
                                                                 
 gru_1 (GRU)                 (None, 35, 128)           99072     
                                                                 
 dropout_1 (Dropout)         (None, 35, 128)           0         
                                                                 
 gru_2 (GRU)                 (None, 35, 64)            37248     
                                                                 
 dropout_2 (Dropout)         (None, 35, 64)            0         
                                                                 
 gru_3 (GRU)                 (None, 35, 64)            2

모델 훈련

In [6]:
# 모델 훈련
model.fit(X_train_resampled,y_train_resampled,epochs=1,
validation_data=(X_test_resampled, y_test_resampled),callbacks=[early_stopping])
 
# 모델 저장
model.save("RaspberryPi_test.h5")



  saving_api.save_model(


모델 평가

In [7]:
# 모델 로드
# loaded_model = tf.keras.models.load_model("GRU_Model_8L_64_10%.h5")

# 모델을 사용하여 주가 상승 여부 예측
test = model.predict(X_test_resampled)



In [8]:
y_pred_binary = (test > 0.5).astype(int)

accuracy = accuracy_score(y_test_resampled, y_pred_binary)
conf_matrix = confusion_matrix(y_test_resampled, y_pred_binary)
classification_rep = classification_report(y_test_resampled, y_pred_binary)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(classification_rep)

Accuracy: 0.8190779528352593
Confusion Matrix:
[[200197  37947]
 [ 48224 189920]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.81      0.84      0.82    238144
         1.0       0.83      0.80      0.82    238144

    accuracy                           0.82    476288
   macro avg       0.82      0.82      0.82    476288
weighted avg       0.82      0.82      0.82    476288

