In [10]:
import numpy as np
import pandas as pd
import FinanceDataReader as fdr

import matplotlib as mpl
%matplotlib inline
mpl.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import os
from shutil import copyfile, move
from pathlib import Path

In [11]:
def removeOutput(finput):
    if(Path(finput)).is_file():
        os.remove(finput)
        
def csv_initiator(market, ticker, head_date, tail_date):
    df = fdr.DataReader(ticker, head_date, tail_date, exchange=market)
    
    filedir = os.getcwd() + '\\dataset\\raw_data\\'
    filename = "{}_{}.csv".format(market, ticker)    
    filepath = filedir + filename
    
    if not os.path.exists(filedir):
        os.makedirs(filedir)    
    removeOutput(filepath)
    df.to_csv(filepath)
    
    print("csv file saved as : {}".format(filepath))
    
    return filepath

In [12]:
def seq_seq_trend(csv_path, seq_len, trend_len, gap=0, seq_mode, trend_mode):
    print("Creating label . . .")
    print("type : Sequence to Sequence")
    print("sequence_length : {}, trend_sequence_length : {}, gap : {}".format(seq_len, trend_len, gap))
    
    # 데이터프레임으로 일일주가데이터 불러오기, 결측치 제거
    df = pd.read_csv(csv_path, parse_dates=True, index_col=0)
    df.fillna(0)
    df.reset_index(inplace=True)
    df['Date'] = df['Date'].map(mdates.date2num) # Y-M-D 포멧에서 num 포멧으로 변경
        
    # 파일을 저장할 디렉토리 명과 파일이름 지정
    filedir = os.getcwd() + '\\dataset\\labeled_data\\'
    filename = "{}_label_seq{}_tseq{}.txt".format(csv_path.split('\\')[-1][0:-4], seq_len, trend_seq_len) # ex) KRX_005930_label_seq30_tseq10
    filepath = filedir + filename
        
    # 디렉토리가 없을시 생성, 같은이름의 파일 제거
    if not os.path.exists(filedir):
        os.makedirs(filedir)
    removeOutput(filepath)

    # 레이블링
    for i in range(0, len(df)-int(seq_len)-1):
        tmp_df = df.iloc[i:i + int(seq_len)+1]  # seq_len+1 만큼 데이터프레임 슬라이싱
        starting = int(tmp_df["Close"].iloc[-2]) # seq 마지막날 종가
        endvalue = int(tmp_df["Close"].iloc[-1]) # seq 다음날 종가
        tmp_rtn = endvalue / starting - 1 
        
        if tmp_rtn > 0:
            label = 1
        else:
            label = 0
        # 레이블링한 sequence를 한 라인으로 파일에 입력        
        with open(filepath, 'a') as the_file:
            the_file.write("{}--{},{}".format(filename[0:-4], i, label))
            the_file.write("\n")

    print("Create label finished.")
    return filepath

In [13]:
# main
market = 'KRX'
ticker = '005930'
seq_len = 20
trend_seq_len = 10
head_date = '2000-01-01'
tail_date = '2022-01-01'
dimension = 60
use_volume = False

data_csv_path = csv_initiator(market, ticker, head_date, tail_date)
label_set_path = seq_seq_trend(data_csv_path, seq_len, trend_seq_len, gap=5)

csv file saved as : C:\Users\SeungHyuck\Documents\github_remote\AI-Trade\Trend_Labeling\dataset\raw_data\KRX_005930.csv
Creating label . . .
type : Sequence to Sequence
sequence_length : 20, trend_sequence_length : 10, gap : 5
Create label finished.
