In [1]:
import numpy as np
import pandas as pd
import FinanceDataReader as fdr
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
dataframe = fdr.DataReader('005930', '2000')     # Index is already 'Date', 데이터 불러오기
dataframe.dropna(inplace=True) # 결측치제거
dataframe

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-04,6000,6110,5660,6110,1483967,0.148496
2000-01-05,5800,6060,5520,5580,1493604,-0.086743
2000-01-06,5750,5780,5580,5620,1087810,0.007168
2000-01-07,5560,5670,5360,5540,806195,-0.014235
2000-01-10,5600,5770,5580,5770,937615,0.041516
...,...,...,...,...,...,...
2021-11-29,71700,73000,71400,72300,16682559,0.000000
2021-11-30,73200,73900,70500,71300,30364841,-0.013831
2021-12-01,72000,74800,71600,74400,21954856,0.043478
2021-12-02,73900,75800,73800,75800,23652940,0.018817


In [3]:
def trend_separater(x):
    if x > 0.0016639: # 상승과 하락 트렌드의 절대적인 개수를 비슷하게 맞춰주기 위한 기준점을 선정한다.
        return 1
    elif x < -0.001:
        return -1

def updown(dataframe):
    dataframe['UD_Trend'] = dataframe['Change'].map(lambda x : trend_separater(x)) 
    dataframe['UD_Trend'] = dataframe['UD_Trend'].shift(-1)  # 다음날 트렌드를 예측해야하므로 다음날 트렌드를 앞으로 한 행 당긴다
    dataframe.dropna(inplace=True)  # 결측치 제거
    return dataframe

 

In [4]:
labeled_df = dataframe.copy()
labeled_df = updown(labeled_df)
total_count = labeled_df.UD_Trend.count()
labeled_df['UD_Trend'].value_counts()

-1.0    2515
 1.0    2514
Name: UD_Trend, dtype: int64

In [5]:
target_df = labeled_df.copy()
target_df.reset_index(inplace=True)
target_df

Unnamed: 0,Date,Open,High,Low,Close,Volume,Change,UD_Trend
0,2000-01-04,6000,6110,5660,6110,1483967,0.148496,-1.0
1,2000-01-05,5800,6060,5520,5580,1493604,-0.086743,1.0
2,2000-01-06,5750,5780,5580,5620,1087810,0.007168,-1.0
3,2000-01-07,5560,5670,5360,5540,806195,-0.014235,1.0
4,2000-01-11,5820,6100,5770,5770,1194974,0.000000,-1.0
...,...,...,...,...,...,...,...,...
5024,2021-11-25,75100,75100,73600,73700,12559258,-0.014706,-1.0
5025,2021-11-29,71700,73000,71400,72300,16682559,0.000000,-1.0
5026,2021-11-30,73200,73900,70500,71300,30364841,-0.013831,1.0
5027,2021-12-01,72000,74800,71600,74400,21954856,0.043478,1.0


In [7]:
# 전체 세트
X_all = np.array(target_df.loc[:, target_df.drop(['UD_Trend', 'Date'], axis=1).columns])
y_all = np.array(target_df.loc[:, ['UD_Trend']])

# 훈련 세트
X_train = X_all[:4000]
y_train = y_all[:4000]

# 검증 세트
X_test = X_all[4000:]
y_test = y_all[4000:]

In [14]:

svm_cla = Pipeline([
                    ("scaler", StandardScaler()),
                    ("svc", SVC()),
            ])


In [15]:
svm_cla.fit(X_train, y_train) # 훈련 세트로 훈련진행

  return f(**kwargs)


Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])

In [16]:
print("train_set score: ", svm_cla.score(X_train, y_train)) # 훈련세트 점수
print("test_set score : ", svm_cla.score(X_test, y_test)) # 검증세트 점수

train_set score:  0.5325
test_set score :  0.49854227405247814


In [21]:
sample = target_df.loc[4000:,].copy()
sample['predicted'] = svm_cla.predict(X_all[4000:])
sample

Unnamed: 0,Date,Open,High,Low,Close,Volume,Change,UD_Trend,predicted
4000,2017-04-26,42700,42800,42520,42800,295896,0.002342,1.0,1.0
4001,2017-04-27,42700,44520,41960,43840,460645,0.024299,1.0,1.0
4002,2017-04-28,45780,45800,44520,44620,453714,0.017792,1.0,1.0
4003,2017-05-02,45500,45500,44760,44900,281366,0.006275,1.0,1.0
4004,2017-05-04,45700,45700,44860,45520,273802,0.013808,1.0,1.0
...,...,...,...,...,...,...,...,...,...
5024,2021-11-25,75100,75100,73600,73700,12559258,-0.014706,-1.0,1.0
5025,2021-11-29,71700,73000,71400,72300,16682559,0.000000,-1.0,1.0
5026,2021-11-30,73200,73900,70500,71300,30364841,-0.013831,1.0,1.0
5027,2021-12-01,72000,74800,71600,74400,21954856,0.043478,1.0,1.0


In [18]:
def create_trade_book(sample):
    book = sample[['Close', 'UD_Trend', 'predicted', 'Date']].copy()
    book['trade'] = ''
    return book

def returns(book):    
    rtn = 1.0
    book['return'] = 1
    buy = 0.0
    sell = 0.0
    for i in book.index[1:]:
        if book.loc[i, 'trade'] == 'buy' and book.shift(1).loc[i, 'trade'] == '':
            buy = book.loc[i, 'Close']
            print('매수일 : ', book.loc[i, 'Date'].strftime('%Y-%m-%d'), '매수가격 : ', buy)
        elif book.loc[i, 'trade'] == '' and book.shift(1).loc[i, 'trade'] == 'buy':
            sell = book.loc[i, 'Close']
            rtn = (sell - buy) / buy + 1
            book.loc[i, 'return'] = rtn
            print('매도일 : ', book.loc[i, 'Date'].strftime('%Y-%m-%d'), '매도가격 : ', sell, ' | return:', round(rtn, 4))

        if book.loc[i, 'trade'] == '':
            buy = 0.0
            sell = 0.0
            
        acc_rtn = 1.0
    for i in book.index:
        rtn = book.loc[i, 'return']
        acc_rtn = acc_rtn * rtn
        book.loc[i, 'acc return'] = acc_rtn

    print('Accumulated return :', round(acc_rtn, 4))
    return(round(acc_rtn, 4))

def tradings(sample, book):
    # 다음날 오를것으로 예측했으면 매수, 내릴것으로 예측했으면 매도
    for i in sample.index[1:]:
        if book.shift(1).loc[i, 'trade'] == 'buy':
            if book.loc[i, 'predicted'] == -1:
                book.loc[i, 'trade'] = ''
            else:
                book.loc[i, 'trade'] = 'buy'
                             
        else:
            if book.loc[i, 'predicted'] == 1:
                book.loc[i, 'trade'] = 'buy'
    return(book)

In [13]:
book = create_trade_book(sample) # 거래장부
tradings(sample, book) # 거래 실행
book.tail(100)

Unnamed: 0,Close,UD_Trend,predicted,Date,trade
5000,70200,1.0,1.0,2021-10-21,
5001,70400,-1.0,1.0,2021-10-22,buy
5002,70200,1.0,1.0,2021-10-25,buy
5003,71100,-1.0,1.0,2021-10-26,buy
5004,70100,1.0,1.0,2021-10-27,buy
5005,70700,-1.0,1.0,2021-10-28,buy
5006,69900,1.0,1.0,2021-11-01,buy
5007,71500,-1.0,1.0,2021-11-02,buy
5008,70400,1.0,1.0,2021-11-03,buy
5009,70600,-1.0,1.0,2021-11-04,buy


In [39]:
returns(book) # 수익률 계산

매수일 :  2021-10-22 매수가격 :  70400
Accumulated return : 1.0


1.0