In [6]:
import numpy as np
import pandas as pd
import FinanceDataReader as fdr

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import glob
import argparse
import os

from shutil import copyfile, move
from pathlib import Path
from mpl_finance import candlestick2_ochl, volume_overlay

In [7]:
def isnan(value):
    try:
        import math
        return math.isnan(float(value))
    except:
        return False

def removeOutput(finput):
    if(Path(finput)).is_file():
        os.remove(finput)
    
def csv_initiator(ticker, head_date, tail_date):
    df = fdr.DataReader(ticker, head_date, tail_date)
    outputname = ticker + '.csv'
    removeOutput(outputname)
    df.to_csv(outputname)
    print("csv file saved as : {}".format(outputname))
    return outputname

In [8]:
def createLabel(fname, seq_len):
    print("Creating label . . .")
    
    df = pd.read_csv(fname, parse_dates=True, index_col=0)
    df.fillna(0)
    df.reset_index(inplace=True)
    
    outputname = "{}_label_{}.txt".format(fname[0:-4], seq_len)
    removeOutput(outputname)
    
    df['Date'] = df['Date'].map(mdates.date2num) # Y-M-D 포멧에서 num 포멧으로 변경
    # print(df)
    
    for i in range(0, len(df)):
        c = df.iloc[i:i + int(seq_len), :]  # seq_len 만큼 데이터프레임 슬라이싱
        starting = 0
        endvalue = 0
        label = ""
        
        if len(c) == int(seq_len): # 데이터프레임이 손실없이 seq_len 만큼 슬라이싱 되었다면
          starting = c["Open"].iloc[-1] # seq 마지막날 시가
          endvalue = c["Close"].iloc[-1] # seq 마지막날 종가
          # print(starting)
          # print(endvalue)
          tmp_rtn = endvalue / starting - 1 
          
          if tmp_rtn > 0:
              label = 1
          else:
              label = 0
        
          with open(outputname, 'a') as the_file:
            the_file.write("{}--{},{}".format(outputname, i, label))
            the_file.write("\n")
    print("Create label finished.")


In [9]:
def ohlc2cs(fname, seq_len, dataset_type, dimension, use_volume):
    print("Converting ohlc to candlestick")
    symbol = fname.split('.')[0]
    print(symbol)
    path = "{}".format(os.getcwd())
    print(path)
    if not os.path.exists("{}/dataset/{}_{}/{}/{}".format(path, seq_len, dimension, symbol, dataset_type)):
        os.makedirs("{}/dataset/{}_{}/{}/{}".format(path, seq_len, dimension, symbol, dataset_type))
        
    df = pd.read_csv(fname, parse_dates=True, index_col=0)
    df.fillna(0)
    
    plt.style.use('dark_background')
    df.reset_index(inplace=True)
    df['Date'] = df['Date'].map(mdates.date2num)
    
    for i in range(0, len(df)-int(seq_len)):
        c = df.iloc[i:i + int(seq_len), : ]
        if len(c) == int(seq_len):
            my_dpi = 96
            fig = plt.figure(figsize=(dimension / my_dpi, dimension / my_dpi), dpi=my_dpi)
            ax1 = fig.add_subplot(1, 1, 1)
            candlestick2_ochl(ax1, c['Open'], c['Close'], c['High'], c['Low'],
                                width=1, colorup='#77d879', colordown='#db3f3f')
            
            ax1.grid(False)
            ax1.set_xticklabels([])
            ax1.set_yticklabels([])
            ax1.xaxis.set_visible(False)
            ax1.yaxis.set_visible(False)
            ax1.axis('off')
            
            
            if use_volume:
                ax2 = ax1.twinx()
                bc = volume_overlay(ax2, c['Open'], c['Close'], c['Volume'],
                                        colorup='#77d879', colordown='#db3f3f',
                                        alpha=0.5,  width=1)
                ax2.add_collection(bc)
                ax2.grid(False)
                ax2.set_xticklabels([])
                ax2.set_yticklabels([])
                ax2.xaxis.set_visible(False)
                ax2.yaxis.set_visible(False)
                ax2.axis('off')
                
            pngfile = 'dataset/{}_{}/{}/{}/{}-{}.png'.format(
                        seq_len, dimension, symbol, dataset_type, fname[:-4], i)
            fig.savefig(pngfile, pad_inches=0, transparent=False)
            plt.close(fig)
    print("Converting olhc to candlestick finished")                
                
                

In [10]:
ticker = '005930'
seq_len = '20'
head_date = '2020-01-01'
tail_date = '2022-03-01'
 
# fdr -> data.csv ,data.csv -> labeled_data.txt, data.csv -> candlechart_img.png

data_csv = csv_initiator(ticker, head_date, tail_date) # 이 함수를 추가해서 FDR 이용하여 데이터를 불러오고 csv 파일로 저장하는 부분을 추가
createLabel(data_csv, seq_len)
ohlc2cs(data_csv, seq_len, 'stock', 20, False)

csv file saved as : 005930.csv
Creating label . . .
Create label finished.
Converting ohlc to candlestick
005930
C:\Users\SeungHyuck\Documents\github_remote\AI-Trade\DL Quant Strategy
Converting olhc to candlestick finished
