In [39]:
from pykrx import stock
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
from sklearn.linear_model import LassoCV, RidgeCV
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.metrics import MeanSquaredError
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import numpy as np
import tensorflow as tf
# 시드 설정
np.random.seed(190)
tf.random.set_seed(180)
tf.keras.utils.set_random_seed(170)

# SK하이닉스 종목코드
ticker = "000660"

# 시작 날짜와 종료 날짜 설정
start_date = "2022-03-20"
end_date = "2024-03-28"
# 주어진 기간 동안의 일별 거래량 정보 가져오기
df = stock.get_market_ohlcv_by_date(fromdate=start_date, todate=end_date, ticker=ticker)


def calculate_macd(df, short_window=12, long_window=26, signal_window=9):
    """MACD 및 MACD 신호 계산"""
    df['EMA_short'] = df['종가'].ewm(span=short_window, adjust=False).mean()
    df['EMA_long'] = df['종가'].ewm(span=long_window, adjust=False).mean()
    df['MACD'] = df['EMA_short'] - df['EMA_long']
    df['MACD_Signal'] = df['MACD'].ewm(span=signal_window, adjust=False).mean()
    return df

def calculate_rsi(df, window=14, signal_window=9):
    """RSI 및 RSI 신호 계산"""
    delta = df['종가'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()

    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    # RSI 신호선 추가
    df['RSI_Signal'] = df['RSI'].ewm(span=signal_window, adjust=False).mean()
    return df

# MACD 및 MACD 신호 계산
df = calculate_macd(df)

df = calculate_rsi(df)

# '종가' 컬럼이 포함된 DataFrame을 가정합니다. 예를 들어, df라고 합시다.
# df = pd.read_csv('path_to_your_data.csv') # 데이터 파일을 로드하는 예시

# 지정된 기간에 대한 SMA 계산
periods = [5, 20, 60, 120, 240]
for period in periods:
    df[f'SMA_{period}'] = df['종가'].rolling(window=period).mean()

# 지정된 기간에 대한 EMA 계산
for period in periods:
    df[f'EMA_{period}'] = df['종가'].ewm(span=period, adjust=False).mean()

for i in range(len(periods)):
    for j in range(i + 1, len(periods)):
        smaller_period = periods[i]
        larger_period = periods[j]
        df[f'SMA_{smaller_period}_minus_SMA_{larger_period}'] = df[f'SMA_{smaller_period}'] - df[f'SMA_{larger_period}']

# EMA 간의 차이 계산
for i in range(len(periods)):
    for j in range(i + 1, len(periods)):
        smaller_period = periods[i]
        larger_period = periods[j]
        df[f'EMA_{smaller_period}_minus_EMA_{larger_period}'] = df[f'EMA_{smaller_period}'] - df[f'EMA_{larger_period}']

for column in ['시가', '고가', '저가','거래량','MACD','MACD_Signal','RSI','RSI_Signal']:
    df[f'{column}_등락률'] = df[column].pct_change() * 100

# SMA 및 EMA의 전 거래일 대비 등락률 계산 및 DataFrame에 추가
periods = [5, 20, 60, 120, 240]

# SMA 등락률 계산 및 추가
for period in periods:
    df[f'SMA_{period}_등락률'] = df[f'SMA_{period}'].pct_change() * 100

# EMA 등락률 계산 및 추가
for period in periods:
    df[f'EMA_{period}_등락률'] = df[f'EMA_{period}'].pct_change() * 100

# 'MACD'가 0 이상일 때 1, 아니면 0을 할당
df['MACD_Binary'] = (df['MACD'] >= 0).astype(int)

# 'MACD - MACD_Signal'이 0 이상일 때 1, 아니면 0을 할당
df['MACD_minus_Signal_Binary'] = ((df['MACD'] - df['MACD_Signal']) >= 0).astype(int)

# 'RSI - RSI_Signal'이 0 이상일 때 1, 아니면 0을 할당
df['RSI_minus_Signal_Binary'] = ((df['RSI'] - df['RSI_Signal']) >= 0).astype(int)

# 시가 대비 종가 등락률 컬럼 추가
df['시가_대비_종가_등락률'] = ((df['종가'] - df['시가']) / df['시가']) * 100
df['시가_대비_저가_등락률'] = ((df['저가'] - df['시가']) / df['시가']) * 100
df['시가_대비_고가_등락률'] = ((df['고가'] - df['시가']) / df['시가']) * 100
df['저가_대비_종가_등락률'] = ((df['종가'] - df['저가']) / df['저가']) * 100
df['저가_대비_고가_등락률'] = ((df['고가'] - df['저가']) / df['저가']) * 100
df['고가_대비_종가_등락률'] = ((df['종가'] - df['고가']) / df['고가']) * 100
# 종가 - SMA [5, 20, 60, 120, 240] 값의 차이 컬럼 추가
for period in [5, 20, 60, 120, 240]:
    df[f'종가_minus_SMA_{period}'] = df['종가'] - df[f'SMA_{period}']

# 종가 - EMA [5, 20, 60, 120, 240] 값의 차이 컬럼 추가
for period in [5, 20, 60, 120, 240]:
    df[f'종가_minus_EMA_{period}'] = df['종가'] - df[f'EMA_{period}']



df['next_day_return'] = df['등락률'].shift(-1)

df1=df.dropna()  # 마지막 행 삭제

first_column_name = df1.columns[0]

#df1= df.iloc[300:, :]
X = df1.drop(['next_day_return'], axis=1)
y = df1['next_day_return']


# 시계열 데이터와 타겟 생성 함수
def generate_time_series_data(n_samples, n_steps):
    freq1, freq2, offsets1, offsets2 = np.random.rand(4, n_samples, 1)
    time = np.linspace(0, 1, n_steps)
    series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10))  # 웨이브 패턴 1
    series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20))  # 웨이브 패턴 2
    series += 0.1 * (np.random.rand(n_samples, n_steps) - 0.5)  # 잡음 추가
    return series[..., np.newaxis].astype(np.float32)

# 데이터셋 생성
n_steps = 50
series = generate_time_series_data(10000, n_steps + 1)
#X, y = series[:, :-1], series[:, -1]
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)

# LSTM 모델 구축
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=[None, 1]),
    LSTM(50),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=[MeanSquaredError()])



# # df.dropna(inplace=True)  # 마지막 행 삭제
# X_last = df.iloc[-1:].drop('next_day_return', axis=1)
        

# y_pred1 = model.predict(X_last)

# # first_column_name = df.columns[0]
# # X_last = df.iloc[-1:,:]

# # y_pred = model.predict(X_last)
# print("예측된 y 값:", y_pred1[0][0])
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(X_valid, y_valid, batch_size=128)
print("test loss, test acc:", results)

  super().__init__(**kwargs)


Evaluate on test data
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 6.4986 - mean_squared_error: 6.4986
test loss, test acc: [6.4986042976379395, 6.4986042976379395]


In [40]:
results

[6.4986042976379395, 6.4986042976379395]

In [35]:
df1

Unnamed: 0_level_0,시가,고가,저가,종가,거래량,등락률,EMA_short,EMA_long,MACD,MACD_Signal,...,종가_minus_SMA_20,종가_minus_SMA_60,종가_minus_SMA_120,종가_minus_SMA_240,종가_minus_EMA_5,종가_minus_EMA_20,종가_minus_EMA_60,종가_minus_EMA_120,종가_minus_EMA_240,next_day_return
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-08,87700,87800,86400,86800,2790989,-2.362205,89370.277490,89589.451318,-219.173828,483.965686,...,-4205.0,581.666667,-70.833333,-8042.500000,-1507.817474,-2916.064727,-1541.564006,-2498.380615,-8111.901674,-1.382488
2023-03-09,87300,87800,85300,85600,3315016,-1.382488,88790.234799,89293.936406,-503.701606,286.432227,...,-4940.0,-686.666667,-1209.166667,-9084.583333,-1805.211649,-3724.058562,-2651.676661,-3637.250357,-9234.624482,-2.686916
2023-03-10,83800,84400,83100,83300,3559824,-2.686916,87945.583292,88849.941116,-904.357825,48.274217,...,-6685.0,-3023.333333,-3436.666667,-11217.083333,-2736.807766,-5450.338699,-4789.326607,-5839.113988,-11438.901457,1.200480
2023-03-13,83200,84700,82400,84300,2860341,1.200480,87384.724324,88512.908441,-1128.184117,-187.017450,...,-5225.0,-2056.666667,-2379.166667,-10066.250000,-1157.871844,-4026.496918,-3665.086390,-4759.128633,-10352.271569,-3.795967
2023-03-14,82600,83000,81100,81100,3359381,-3.795967,86417.843659,87963.804112,-1545.960453,-458.806051,...,-7950.0,-5245.000000,-5505.000000,-13112.500000,-2905.247896,-6538.259117,-6640.001591,-7827.572787,-13439.804585,-2.466091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-21,165200,171400,162600,170000,9205769,8.626198,162666.328305,158488.232086,4178.096218,5034.645454,...,7515.0,22898.333333,32900.000000,47539.166667,6368.627257,9579.074825,20351.721799,30334.958483,42555.981392,-0.117647
2024-03-22,169000,173900,167400,169800,5400916,-0.117647,163763.816258,159326.140821,4437.675437,4915.251450,...,6650.0,22210.000000,32255.833333,47001.666667,4112.418171,8485.829604,19491.009609,29636.859996,42004.479472,-0.235571
2024-03-25,170500,174800,168500,169400,3434516,-0.235571,164630.921449,160072.352612,4558.568837,4843.914928,...,5850.0,21330.000000,31421.666667,46265.000000,2474.945448,7315.750594,18465.074868,28753.606111,41259.214082,4.250295
2024-03-26,172000,179500,171900,176600,6549669,4.250295,166472.318149,161296.622789,5175.695360,4910.271014,...,12310.0,27935.000000,38125.000000,53092.500000,6449.963632,13133.298157,24823.597003,35359.331630,48057.062928,2.604757


In [36]:
X

array([[[-0.11320735],
        [-0.08504713],
        [-0.06968293],
        ...,
        [ 0.17206527],
        [ 0.11090159],
        [ 0.09625275]],

       [[-0.61489815],
        [-0.551992  ],
        [-0.51827884],
        ...,
        [-0.40664008],
        [-0.37025896],
        [-0.31324098]],

       [[ 0.34823957],
        [ 0.18824752],
        [-0.07509039],
        ...,
        [ 0.5739155 ],
        [ 0.5521308 ],
        [ 0.36520612]],

       ...,

       [[ 0.26131982],
        [ 0.22128558],
        [ 0.24608314],
        ...,
        [ 0.64097416],
        [ 0.55524105],
        [ 0.4193573 ]],

       [[ 0.3767924 ],
        [ 0.30704266],
        [ 0.11688843],
        ...,
        [ 0.20439193],
        [ 0.3577781 ],
        [ 0.4445568 ]],

       [[-0.37744656],
        [-0.5311025 ],
        [-0.6202653 ],
        ...,
        [ 0.43759882],
        [ 0.25651425],
        [-0.092332  ]]], dtype=float32)