In [3]:
import talib as ta
import pandas
import core
import FinanceDataReader as fdr
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('labeler'))))
from order_creator import gatherer

In [6]:
def init():
    gath = gatherer.Gatherer()
    df, _ = gath.get_stock('005930', '2017-01-01', '2017-03-31')
    return df

In [7]:
def candle_type(df):
    ###################################
    # 봉 종류 레이블
    # column: candle_type
    # category: red(양봉), blue(음봉)
    # parameter: none
    ##################################
    if core.is_df(df):
        df['candle_type'] = False
        df.loc[df[df['close'] > df['open']].index,'candle_type'] = 'red'
        df.loc[df[df['close'] < df['open']].index,'candle_type'] = 'blue'
        return df
    else:
        return 0

In [8]:
def candle_shape(df):
    '''봉 모양 레이블'''
    #################################################
    # 봉 모양 레이블
    # column: candle_shape
    # category: fullred(장대양봉), fullblue(장대음봉)
    # parameter: none
    #################################################
    if core.is_df(df):
        df['candle_shape'] = False
        o_l = df['open'] == df['low']
        c_h = df['close'] == df['high']
        o_h = df['open'] == df['high']
        c_l = df['close'] == df['low']
        df.loc[df[o_l & c_h].index, 'candle_shape'] = 'full_red'
        df.loc[df[o_h & c_l].index, 'candle_shape'] = 'full_blue'
        return df
    else:
        return 0

In [9]:
def three_red(df, num=3):
    '''적삼병 레이블'''
    #####################################################
    # 적삼병 레이블
    # column: three_red
    # category: 1(적삼병), 0(적삼병 외)
    # parameter: num(적삼병이 나타나는 최소한의 봉 개수)
    #####################################################
    if core.is_df(df) & core.is_pos(num):
        df['three_red'] = False
        def check_tr(ndf):
            ndf.reset_index(inplace=True)
            for i in range(1, len(ndf)):
                con1 = ndf.loc[i-1, 'low'] < ndf.loc[i,'low']
                con2 = ndf.loc[i-1, 'high'] < ndf.loc[i, 'high']
                con3 = ndf.loc[i-1, 'close'] > ndf.loc[i-1, 'open']
                con4 = ndf.loc[i, 'close'] > ndf.loc[i, 'open']
                if con1 and con2 and con3 and con4:
                    continue
                else:
                    return 0
            return 1
        row = 0
        while row <= len(df)-num:
            if check_tr(df[row:row+num]):
                df.loc[df.index[row+num-1], 'three_red'] = 1
            row += 1
        return df
    else:
        return 0

In [10]:
def three_blue(df, num=3):
    '''흑삼병 레이블'''
    #####################################################
    # 흑삼병 레이블
    # column: three_blue
    # category: 1(흑삼병), 0(적삼병 외)
    # parameter: num(흑삼병이 나타나는 최소한의 봉 개수)
    #####################################################
    if core.is_df(df) & core.is_pos(num):
        df['three_blue'] = False
        def check_tb(ndf, num):
            ndf.reset_index(inplace=True)
            for i in range(1, num):
                con1 = ndf.loc[i-1, 'low'] > ndf.loc[i,'low']
                con2 = ndf.loc[i-1, 'high'] > ndf.loc[i, 'high']
                con3 = ndf.loc[i-1, 'close'] < ndf.loc[i-1, 'open']
                con4 = ndf.loc[i, 'close'] < ndf.loc[i, 'open']
                if con1 and con2 and con3 and con4:
                    continue
                else:
                    return 0
            return 1
        row = 0
        while row <= len(df)-num:
            if check_tb(df[row:row+num], num):
                df.loc[df.index[row+num-1], 'three_blue'] = 1
            row += 1
        return df
    else:
        return 0

In [11]:
def n_gap(df, num=0):
    '''갭 상승/하락 레이블'''
    #####################################################
    # 갭 상승/하락 레이블
    # column: n%_gap
    # category: gap_up(갭 상승), gap_down(갭 하락)
    # parameter: num(금봉 시가 에 대한 전봉 종가 대비 상승/하락 비율)
    #####################################################
    if core.is_df(df) & (num >= 0) & (core.is_integer(num) | core.is_float(num)):
        c_name = f'gap_{num}%'
        df[c_name] = False
        df.reset_index(inplace=True)
        for i in range(1, len(df)):
            up_con1 = df.loc[i-1,'close'] > df.loc[i-1, 'open']
            up_con2 = df.loc[i, 'close'] > df.loc[i, 'open']
            up_con3 = df.loc[i, 'open'] >= df.loc[i-1, 'close'] * (1 + num/100)
            down_con1 = df.loc[i-1, 'close'] < df.loc[i-1, 'open']
            down_con2 = df.loc[i, 'close'] < df.loc[i, 'open']
            down_con3 = df.loc[i, 'open'] <= df.loc[i-1, 'close'] * (1 - num/100)
            if up_con1 and up_con2 and up_con3:
                df.loc[i, c_name] = 'gap_up'  
            elif down_con1 and down_con2 and down_con3:
                df.loc[i, c_name] = 'gap_down'
        df.set_index('Date', inplace=True)
        return df
    else:
        return 0

In [12]:
def roc(df, period=12, target='close'):
    '''rate of change의 plus/minus 레이블'''
    #####################################################
    # rate of change의 plus/minus 레이블
    # column: roc_{period}({target})
    # category: plus, minus
    # parameter: prev_day(~일 전)
    #####################################################
    
    if core.is_df(df) & core.is_pos(period) & core.is_dflen(df, period) & core.is_column(df, target):
        df.reset_index(inplace=True)
        if isinstance(target, str):
            target = [target]
        for tar in target:
            c_name = f'roc_{period}({tar})'
            df[c_name] = False

            roc = ta.ROC(df['close'], timeperiod=14)
            df.loc[df[roc<0].index, c_name] = 'minus'
            df.loc[df[roc>0].index, c_name] = 'plus'

        df.set_index('Date', inplace=True)
        return df
    else:
        return 0

In [13]:
def sma_cross(df, short=5, long=20, target='close'):
    '''단순이동평균 골든크로스/데드크로스 레이블'''
    #####################################################
    # 단순이동평균 골든크로스/데드크로스 레이블
    # column: {short}-{long}_cross
    # category: golden_cross, dead_cross
    # parameter: short(단기이동평균 기간), long(장기이동평균 기간)
    #####################################################
    
    if core.is_df(df) & core.is_pos(short) & core.is_pos(long) & core.is_dflen(df, [short, long]) & core.is_column(df, target):
        df.reset_index(inplace=True)
        if isinstance(target, str):
            target = [target]
        for tar in target:
            c_name = f'ma_cross_{short}_{long}({tar})'
            df[c_name] = False

            short_ma = ta.MA(df[tar], timeperiod = short)
            long_ma = ta.MA(df[tar], timeperiod = long)

            df.loc[core.cross_up(short_ma, long_ma), c_name] = 'golden_cross'
            df.loc[core.cross_down(short_ma, long_ma), c_name] = 'dead_cross'

        df.set_index('Date', inplace=True)
        return df
    else:
        return 0

In [14]:
def dema_cross(df, short=5, long=20, target='close'):
    '''이중지수이동평균 골든크로스/데드크로스 레이블'''
    #####################################################
    # 이중지수이동평균 골든크로스/데드크로스 레이블
    # column: {short}-{long}_cross
    # category: golden_cross, dead_cross
    # parameter: short(단기이중지수이동평균 기간), long(장기이중지수이동평균 기간)
    #####################################################
    
    if core.is_df(df) & core.is_pos(short) & core.is_pos(long) & core.is_dflen(df, [short, long]) & core.is_column(df, target):
        df.reset_index(inplace=True)
        if isinstance(target, str):
            target = [target]
        for tar in target:
            c_name = f'dema_cross_{short}_{long}({tar})'
            df[c_name] = False

            short_ma = ta.DEMA(df[tar], timeperiod = short)
            long_ma = ta.DEMA(df[tar], timeperiod = long)

            df.loc[core.cross_up(short_ma, long_ma), c_name] = 'golden_cross'
            df.loc[core.cross_down(short_ma, long_ma), c_name] = 'dead_cross'

        df.set_index('Date', inplace=True)
        return df
    else:
        return 0

In [15]:
def vwma_cross(df, short=5, long=20, target='close'):
    '''거래량가중이동평균 골든크로스/데드크로스 레이블'''
    #####################################################
    # 거래량가중이동평균 골든크로스/데드크로스 레이블
    # column: {short}-{long}_cross
    # category: golden_cross, dead_cross
    # parameter: short(단기이동평균 기간), long(장기이동평균 기간)
    #####################################################
    if core.is_df(df) & core.is_pos(short) & core.is_pos(long) & core.is_dflen(df, [short, long]) & core.is_column(df, target):
        df.reset_index(inplace=True)
        if isinstance(target, str):
            target = [target]
        for tar in target:
            c_name = f'vwma_cross_{short}_{long}({tar})'
            df[c_name] = False
            
            pv = df[tar] * df['volume']
            short_vwma = ta.MA(pv, timeperiod=short) / ta.MA(df['volume'], timeperiod=short)
            long_vwma = ta.MA(pv, timeperiod=long) / ta.MA(df['volume'], timeperiod = long)

            df.loc[core.cross_up(short_vwma, long_vwma), c_name] = 'golden_cross'
            df.loc[core.cross_down(short_vwma, long_vwma), c_name] = 'dead_cross'

        df.set_index('Date', inplace=True)
        return df
    else:
        return 0

In [16]:
def macd(df, short=12, long=26, target='close'):
    '''MACD 양수/음수 레이블'''
    #####################################################
    # MACD 양수/음수 레이블
    # column: macd(target)
    # category: plus, minus
    # parameter: short(단기지수이동평균 기간), long(장기지수이동평균 기간), target(지표 생성 기준 컬럼)
    #####################################################
    
    ## 추가: MACD는 상장이후 모든 데이터 생성하여 레이블링한 후 기간에 맞는 컬럼을 인풋 데이터프레임에 추가 후 반환
    if core.is_df(df) & core.is_pos(short) & core.is_pos(short) & core.is_dflen(df, [short, long]) & core.is_column(df, target):
        if isinstance(target, str):
            target = [target]
        for tar in target:
            col_name = f'macd_{short}_{long}({tar})'
            df[col_name] = False
            macd = ta.EMA(df[tar], short)- ta.EMA(df[tar], long)
            df.loc[macd[macd > 0].index, col_name] = 'plus'
            df.loc[macd[macd < 0].index, col_name] = 'minus'
        return df
    else:
        return 0

In [17]:
def bbands(df, period=20, multid=2, target='close'):
    '''Bollinger Bands 상한선, 중심선, 하한선 기준 위치 레이블링'''
    #####################################################
    # Bollinger Bands 상한선, 중심선, 하한선 기준 위치 레이블링
    # column: bb_perod_multid(target)
    # category: over_up(상한선 이상), between_center_up(상한선 미만 중심선 이상), between_center_down(중심선 미만 하한선 이상), under_down(하한선 미만)
    # parameter: short(단기이동평균 기간), long(장기이동평균 기간), target(지표 생성 기준 컬럼)
    #####################################################
    if core.is_df(df) & core.is_pos(period) & (multid >= 0) & (core.is_integer(multid) | core.is_float(multid)) & core.is_column(df, target):
        if isinstance(target, str):
            target = [target]
        for tar in target:
            col_name = f'bb_{period}_{multid}({tar})'
            df[col_name] = False
            ubb, mbb, lbb = ta.BBANDS(df[tar], timeperiod=period, nbdevup=multid, nbdevdn=multid)
            over_up = df[tar] >= ubb
            under_up = df[tar] < ubb
            over_center = df[tar] >= mbb
            under_center = df[tar] < mbb
            over_down = df[tar] >= lbb
            under_down = df[tar] < lbb
            df.loc[df[over_up].index, col_name] = 'over_up'
            df.loc[df[under_up & over_center].index, col_name] = 'between_center_up'
            df.loc[df[under_center & over_down].index, col_name] = 'between_center_down'
            df.loc[df[under_down].index, col_name] = 'under_down'
        return df
    else:
        return 0

In [18]:
def macd_cross(df, short=12, long=26, signal=9, target='close'):
    '''MACD와 MACD_SIGNAL의 골든크로스/데드크로스 레이블'''
    #####################################################
    # MACD와 MACD_SIGNAL의 골든크로스/데드크로스 레이블
    # column: macd_cross_short_long_singal(target)
    # category: golden_cross(MACD가 SIGNAL을 상향돌파), dead_cross(MACD가 SIGNAL을 하향돌파)
    # parameter: short(MACD의 단기지수이동평균 기간), long(MACD의 장기지수이동평균 기간), signal(MACD의 지수이동평균), target(지표 생성 기준 컬럼)
    #####################################################
    
    if core.is_df(df) & core.is_pos(short) & core.is_pos(long) & core.is_pos(signal) & core.is_dflen(df, [short, long, signal]) & core.is_column(df, target):
        df.reset_index(inplace=True)
        if isinstance(target, str):
            target = [target]
        for tar in target:
            c_name = f'macd_cross_{short}_{long}_{signal}({tar})'
            df[c_name] = False
            macd = ta.EMA(df[tar], short) - ta.EMA(df[tar], long)
            macd_sig = ta.EMA(macd, signal)
            df.loc[core.cross_up(macd, macd_sig), c_name] = 'golden_cross'
            df.loc[core.cross_down(macd, macd_sig), c_name] = 'dead_cross'
        df.set_index('Date', inplace=True)
        return df
    else:
        return 0

In [19]:
def stochf(df, fastk_period=5, fastd_period=3):
    '''FAST STOCHASTING 골든크로스/데드코로스 레이블'''
    #####################################################
    # FAST STOCHASTING 골든크로스/데드코로스 레이블
    # column: stockf_{fastk_period}_{fastd_period}
    # category: golden_cross(%k가 %d를 상향돌파), dead_cross(%k가 %d를 하향돌파)
    # parameter: fastk_period(k곡선 기간), fastd_period(d곡선 기간)
    #####################################################
    if core.is_df(df) & core.is_pos(fastk_period) & core.is_pos(fastd_period) & core.is_dflen(df, [fastk_period, fastd_period]):
        df.reset_index(inplace=True)
        c_name = f'stochf_{fastk_period}_{fastd_period}'
        df[c_name] = False
        fastk, fastd = ta.STOCHF(df['high'], df['low'], df['close'], fastk_period=fastk_period, fastd_period=fastd_period)
        df.loc[core.cross_up(fastk, fastd), c_name] = 'golden_cross'
        df.loc[core.cross_down(fastk, fastd), c_name] = 'dead_cross'
        df.set_index('Date', inplace=True)
        return df
    else:
        return 0

In [20]:
def stoch(df, fastk_period=5, slowk_period=3, slowd_period=3):
    '''SLOW STOCHASTING 골든크로스/데드크로스 레이블'''
    #####################################################
    # SLOW STOCHASTING 골든크로스/데드코로스 레이블
    # column: stockf_{fastk_period}_{slowk_period}_{slowd_period}
    # category: golden_cross(%k가 %d를 상향돌파), dead_cross(%k가 %d를 하향돌파)
    # parameter: fastk_period(fast_k곡선 기간), slowk_period(slow%k곡선 기간), slowd_period(slow%d곡선 기간)
    #####################################################
    if core.is_df(df) & core.is_pos(fastk_period) & core.is_pos(slowk_period) & core.is_pos(slowd_period) & core.is_dflen(df, [fastk_period, slowk_period, slowd_period]):
        df.reset_index(inplace=True)
        c_name = f'stoch_{fastk_period}_{slowk_period}_{slowd_period}'
        df[c_name] = False
        # stochasting 지표 생성
        slowk, slowd = ta.STOCH(df['high'], df['low'], df['close'], fastk_period=fastk_period, slowk_period=slowk_period, slowd_period=slowd_period)
        df.loc[core.cross_up(slowk, slowd), c_name] = 'golden_cross'
        df.loc[core.cross_down(slowk, slowd), c_name] = 'dead_cross'
        df.set_index('Date', inplace=True)
        return df
    else:
        return 0

In [34]:
def make_label(df, RRPB=0.03, RFPT=0.02, TBR=0.003, BBR=0.003, target='close'):
    '''최소 수익률 point 레이블'''
    #####################################################
    # 최소 수익률 point 레이블
    # column: top, bottom, top_zone, bottom_zone
    # category: 1(True), 0(False)
    # parameter: RRPB(최소 수익률), RFPT(횡보구간 비중), TBR(Top 주변 비율), BBR(Bottom 주변 비율)
    #####################################################

    def set_minmax(df, label_target):
        '''데이터프레임의 모든 열에 대해서 local_min, local_max를 식별한다.'''
        df = df.reset_index()
        # 모든 열에 대해서
        for i in range(1, len(df) - 1):
            # 직전 지점
            prev = df.loc[i - 1, label_target]
            # 현재 지점
            curr = df.loc[i, label_target]
            # 직후 지점
            next = df.loc[i + 1, label_target]

            ### 1. 좌상우평
            if prev > curr and next == curr:
                df.loc[i, 'local_min'] = 1
            ### 2. 좌평우상
            elif prev == curr and next > curr:
                df.loc[i, 'local_min'] = 1
            ### 3. 좌하우평
            elif prev < curr and next == curr:
                df.loc[i, 'local_max'] = 1
            ### 4. 좌평우하
            elif prev == curr and next < curr:
                df.loc[i, 'local_max'] = 1
            ### 5. V자
            elif prev > curr and next > curr:
                df.loc[i, 'local_min'] = 1
            ### 6. 역V자
            elif prev < curr and next < curr:
                df.loc[i, 'local_max'] = 1

        if df[df['local_max'] == 1].index[0] < df[df['local_min'] == 1].index[0]:
            if df.loc[0, label_target] < df.loc[df[df['local_max'] == 1].index[0], label_target]:
                df.loc[0, 'local_min'] = 1
        else:
            if df.loc[0, label_target] > df.loc[df[df['local_min'] == 1].index[0], label_target]:
                df.loc[0, 'local_max'] = 1
        if df[df['local_max'] == 1].index[-1] > df[df['local_min'] == 1].index[-1]:
            if df.loc[len(df) - 1, label_target] < df.loc[df[df['local_max'] == 1].index[-1], label_target]:
                df.loc[len(df) - 1, 'local_min'] = 1
        else:
            if df.loc[len(df) - 1, label_target] > df.loc[df[df['local_min'] == 1].index[-1], label_target]:
                df.loc[len(df) - 1, 'local_max'] = 1

        df = df.set_index('Date')
        return df



    def set_tb(df, label_target, RRPB, RFPT):
        '''local_min, local_max에 대해 RRPB, RFPT를 만족하는 top, bottom을 레이블링 한다.'''
        top_column = 'top'
        bottom_column = 'bottom'

        df = df.reset_index()

        # 모든 local_min, local_max를 리스트에 저장
        local_max = df[df['local_max'] == 1].index
        local_min = df[df['local_min'] == 1].index

        # bottom 후보
        cand_min = local_min[0]

        # flag = 반복문을 빠져나오기 위한 변수
        # 마지막 local_min, local_max 까지 레이블링을 하였을 경우 flag를 0으로 만들고 종료
        w_flag = 1
        # 레이블링 작업
        while (w_flag == 1):
            f_flag = 1
            if cand_min == local_min[-1]:
                break

            for cand_max in local_max[local_max > cand_min]:
                for next_cand_min in local_min[local_min > cand_max]:

                    if core.is_RRPB(df, cand_min, cand_max, label_target, RRPB) == 1:
                        if core.is_RFPT(df, next_cand_min, cand_max, label_target, RFPT) == 1:
                            df.loc[cand_min, bottom_column] = 1
                            df.loc[cand_max, top_column] = 1
                            cand_min = next_cand_min
                            f_flag = 0
                            break
                        elif next_cand_min == local_min[-1]:
                            df.loc[cand_min, bottom_column] = 1
                            df.loc[cand_max, top_column] = 1
                            w_flag = 0
                            f_flag = 0
                            break
                        elif df.loc[local_max[local_max > next_cand_min][0], label_target] >= df.loc[
                            cand_max, label_target]:
                            break
                        elif df.loc[next_cand_min, label_target] <= df.loc[cand_min, label_target]:
                            cand_min = next_cand_min
                            f_flag = 0
                            break
                    elif next_cand_min == local_min[-1]:
                        if df.loc[next_cand_min, label_target] < df.loc[cand_min, label_target]:
                            cand_min = next_cand_min
                        if local_max[-1] > local_min[-1]:
                            if core.is_RRPB(df, cand_min, local_max[-1], label_target, RRPB):
                                df.loc[cand_min, bottom_column] = 1
                                df.loc[local_max[-1], top_column] = 1
                        f_flag = 0
                        w_flag = 0
                        break
                    elif df.loc[next_cand_min, label_target] < df.loc[cand_min, label_target]:
                        cand_min = next_cand_min
                        f_flag = 0
                        break
                    elif next_cand_min < local_min[-1]:
                        if (df.loc[local_max[local_max > next_cand_min][0], label_target] >= df.loc[
                            cand_max, label_target]):
                            break
                # 마지막 local_max까지 왔는데 못찾았으면 RRPB조건 만족시 레이블링
                if cand_max == local_max[-1] and local_max[-1] > local_min[-1]:
                    if core.is_RRPB(df, cand_min, cand_max, label_target, RRPB) == 1:
                        df.loc[cand_min, bottom_column] = 1
                        df.loc[cand_max, top_column] = 1
                    w_flag = 0
                    break
                if f_flag == 0:
                    break

        if sum(df[top_column] == 1) == 0 or sum(df[bottom_column] == 1) == 0:
            df = df.set_index('Date')
            return df

        # bottom이 첫번째 일 때, bottom이전의 top 레이블링
        top_list = df[df[top_column] == 1].index
        bottom_list = df[df[bottom_column] == 1].index
        if bottom_list[0] > 0:
            temp = bottom_list[0]
            for point in df[:bottom_list[0]].index:
                if core.is_RFPT(df, bottom_list[0], point, label_target, RFPT) == 1 and df.loc[
                    point, label_target] > \
                        df.loc[
                            bottom_list[0], label_target]:
                    if df.loc[point, label_target] > df.loc[temp, label_target]:
                        temp = point
            if temp != bottom_list[0]:
                df.loc[temp, top_column] = 1
        # top이 마지막일때 마지막 bottom 레이블링
        if top_list[-1] > bottom_list[-1]:
            temp = top_list[-1]
            for point in df[top_list[-1]:].index:
                if core.is_RFPT(df, point, top_list[-1], label_target, RFPT) == 1 and df.loc[point, label_target] < \
                        df.loc[
                            top_list[-1], label_target]:
                    if df.loc[point, label_target] < df.loc[temp, label_target]:
                        temp = point
            if temp != top_list[-1]:
                df.loc[temp, bottom_column] = 1

        # top보다 높은 top X
        # bottom보다 낮은 bottom X
        for _ in range(2):
            top_list = df[df[top_column] == 1].index
            bottom_list = df[df[bottom_column] == 1].index
            for bottom in bottom_list:
                temp_bottom = bottom
                if bottom == bottom_list[-1] and bottom_list[-1] > top_list[-1]:
                    for point in df[top_list[top_list < bottom][-1]: bottom + 1].index:
                        if df.loc[point, label_target] <= df.loc[temp_bottom, label_target]:
                            df.loc[temp_bottom, bottom_column] = 0
                            df.loc[point, bottom_column] = 1
                            temp_bottom = point
                else:
                    for point in df[bottom + 1: top_list[top_list > bottom][0]].index:
                        if df.loc[point, label_target] <= df.loc[temp_bottom, label_target]:
                            df.loc[temp_bottom, bottom_column] = 0
                            df.loc[point, bottom_column] = 1
                            temp_bottom = point
            for top in top_list:
                temp_top = top
                if top == top_list[-1] and top_list[-1] > bottom_list[-1]:
                    for point in df[bottom_list[bottom_list < top][-1]:].index:
                        if df.loc[point, label_target] >= df.loc[temp_top, label_target]:
                            df.loc[temp_top, top_column] = 0
                            df.loc[point, top_column] = 1
                            temp_top = point
                else:
                    for point in df[top + 1: bottom_list[bottom_list > top][0]].index:
                        if df.loc[point, label_target] >= df.loc[temp_top, label_target]:
                            df.loc[temp_top, top_column] = 0
                            df.loc[point, top_column] = 1
                            temp_top = point
        # visual_charts(df, label_target)
        df = df.set_index('Date')
        return df

    # In[130]:


    def set_tbzone(df, label_target, TBR, BBR):
        '''모든 top, bottom에 대해 top_zone, bottom_zone을 레이블링 한다.'''
        df = df.reset_index()
        top_list = df[df['top'] == 1].index
        bottom_list = df[df['bottom'] == 1].index

        for top in top_list:
            top_target = df.loc[top, label_target]
            for prev in range(top, -1, -1):
                prev_target = df.loc[prev, label_target]
                if prev_target <= top_target and prev_target >= top_target * (1 - TBR):
                    df.loc[prev, 'top_zone'] = 1
                else:
                    break
            for next in range(top, len(df)):
                next_target = df.loc[next, label_target]
                if next_target <= top_target and next_target >= top_target * (1 - TBR):
                    df.loc[next, 'top_zone'] = 1
                else:
                    break

        for bottom in bottom_list:
            bottom_target = df.loc[bottom, label_target]
            for prev in range(bottom, -1, -1):
                prev_target = df.loc[prev, label_target]
                if prev_target >= bottom_target and prev_target <= bottom_target * (1 + BBR):
                    df.loc[prev, 'bottom_zone'] = 1
                else:
                    break
            for next in range(bottom, len(df)):
                next_target = df.loc[next, label_target]
                if next_target >= bottom_target and next_target <= bottom_target * (1 + BBR):
                    df.loc[next, 'bottom_zone'] = 1
                else:
                    break

        df = df.set_index('Date')
        return df

    # main
    # 형 변환
    if type(target) != list:
        target = [target]
    # label_target 마다 레이블링 작업 실행
    for label_target in target:
        # column initialize
        df['top'] = 0
        df['bottom'] = 0
        df['top_zone'] = 0
        df['bottom_zone'] = 0
        df['local_max'] = 0
        df['local_min'] = 0

        # 레이블링 작업 실행
        # local_min, local_max 식별
        df = set_minmax(df, label_target)
        if sum(df['local_max'] == 1) != 0 and sum(df['local_min'] == 1) != 0:
            # top, bottom 레이블링
            df = set_tb(df, label_target, RRPB, RFPT)
        if sum(df['top'] == 1) != 0 and sum(df['bottom'] == 1) != 0:
            # top_zone, bottom_zone 레이블링
            df = set_tbzone(df, label_target, TBR, BBR)
        df[f'{label_target}_top'] = df['top']
        df[f'{label_target}_bottom'] = df['bottom']
        df[f'{label_target}_top_zone'] = df['top_zone']
        df[f'{label_target}_bottom_zone'] = df['bottom_zone']
        df.drop(['local_max', 'local_min', 'top', 'bottom', 'top_zone', 'bottom_zone'], axis='columns', inplace=True)

    return df

In [22]:
df = init()

In [18]:
candle_type(df)
candle_shape(df)
three_blue(df)
three_red(df)
n_gap(df, 1)
sma_cross(df, 5, 20)
vwma_cross(df, 5, 20)
bbands(df, 20, 2)
macd(df, 9, 26)
macd_cross(df, 9, 26, 13)
stochf(df, 5, 3)
stoch(df, 5, 3, 3)

Unnamed: 0_level_0,open,high,low,close,volume,change,candle_type,candle_shape,three_blue,three_red,gap_1%,ma_cross_5_20(close),vwma_cross_5_20(close),bb_20_2(close),macd_9_26(close),macd_cross_9_26_13(close),stochf_5_3,stoch_5_3_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2017-01-02,35980,36240,35880,36100,93012,0.001665,red,False,False,False,False,False,False,False,False,False,False,False
2017-01-03,36280,36620,36020,36480,147153,0.010526,red,False,False,False,False,False,False,False,False,False,False,False
2017-01-04,36500,36520,36100,36160,159435,-0.008772,blue,False,False,False,False,False,False,False,False,False,False,False
2017-01-05,36060,36060,35540,35560,219349,-0.016593,blue,False,False,False,False,False,False,False,False,False,False,False
2017-01-06,36180,36440,36040,36200,177619,0.017998,red,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-03-27,41200,41880,41180,41200,243553,-0.007229,False,False,False,False,False,False,False,between_center_up,plus,dead_cross,False,False
2017-03-28,41560,41840,41380,41480,164325,0.006796,blue,False,False,False,False,False,False,between_center_up,plus,False,golden_cross,False
2017-03-29,41740,41960,41580,41780,201865,0.007232,red,False,False,False,False,False,False,between_center_up,plus,False,False,golden_cross
2017-03-30,41880,42440,41880,41980,164080,0.004787,red,False,False,False,False,False,False,between_center_up,plus,False,False,False


In [35]:
make_label(df)

Unnamed: 0_level_0,open,high,low,close,volume,change,dema_cross_5_20(close),close_top,close_bottom,close_top_zone,close_bottom_zone
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-01-02,35980,36240,35880,36100,93012,0.001665,False,0,0,0,0
2017-01-03,36280,36620,36020,36480,147153,0.010526,False,1,0,1,0
2017-01-04,36500,36520,36100,36160,159435,-0.008772,False,0,0,0,0
2017-01-05,36060,36060,35540,35560,219349,-0.016593,False,0,1,0,1
2017-01-06,36180,36440,36040,36200,177619,0.017998,False,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2017-03-27,41200,41880,41180,41200,243553,-0.007229,False,0,1,0,1
2017-03-28,41560,41840,41380,41480,164325,0.006796,False,0,0,0,0
2017-03-29,41740,41960,41580,41780,201865,0.007232,False,0,0,0,0
2017-03-30,41880,42440,41880,41980,164080,0.004787,False,0,0,0,0
