In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import fmt
%matplotlib inline

In [2]:
'''
This function will mark overbought and oversold for the input and will create two columns corresponding to OB and OS.
Input: Data - DataFrame
Output: DataFrame with additional columns representing overbought and oversold.
'''
def mark_obos(df_ti):
    ob = [0]*df_ti.shape[0]
    os = [0]*df_ti.shape[0]
    for i in range(df_ti.shape[0]):
        if (df_ti["rsi"][i] > 70):
            ob[i] = 90
        elif (df_ti["rsi"][i] < 30):
            os[i] = 90
    df_ti["OB"] = ob
    df_ti["OS"] = os
    return df_ti

'''
Count number of trigger events
Input: Data - DataFrame
       Event Name - String
Output: Number of Events - Integer
'''
def get_trigger_nums(df_tia,col):
    ct = 0
    for i in range(df_tia.shape[0]):
        if (df_tia[col][i] != 0):
            ct += 1
    return ct

'''
This function will mark cross up and down for the input and will create two columns corresponding to them.
Input: Data - DataFrame
Output: DataFrame with additional columns.
'''
def mark_crosses(df_ti,signal_1,signal_2,day1,day2):
    up = [0]*df_ti.shape[0]
    dn = [0]*df_ti.shape[0]
    for i in range(df_ti.shape[0]):
        diff = df_ti[signal_1][i] - df_ti[signal_2][i]
        diff_prev = df_ti[signal_1][i-1] - df_ti[signal_2][i-1]
        #print(diff,diff_prev)
        if (diff <= 0 and diff_prev > 0):
            dn[i] = 90
        elif (diff >= 0 and diff_prev < 0):
            up[i] = 90
    df_ti["up_"+signal_1+"_"+day1+"_"+day2] = up
    df_ti["down_"+signal_1+"_"+day1+"_"+day2] = dn
    return df_ti

'''
This function will mark all break outs of Bollinger Bands
'''
def mark_BBands_Breaks(df_ti,bot,top,close):
    top_up = [0]*df_ti.shape[0]
    bot_dn = [0]*df_ti.shape[0]
    
    for i in range(df_ti.shape[0]):
        diff_top = df_ti[top][i] - df_ti[close][i]
        diff_top_prev = df_ti[top][i-1] - df_ti[close][i-1]
        diff_bot = df_ti[bot][i] - df_ti[close][i]
        diff_bot_prev = df_ti[bot][i-1] - df_ti[close][i-1]
        
        if (diff_top >= 0 and diff_top_prev < 0):
            top_up[i] = 90
        elif (diff_bot <= 0 and diff_bot_prev > 0):
            bot_dn[i] = 90
    
    df_ti["bbands_break_up"] = top_up
    df_ti["bbands_break_down"] = bot_dn
    
    return df_ti



In [4]:
'''
Get min, max, mean from two n-day intervals before and after trigger events.
Input: signal column name - String
       target column name - String
       data - DataFrame
       n - integer

Output: Two DataFrame containing statistics, [min, mean, max]
        lists of actual data of target columns within n days, the current data is included


'''
def get_stats_around_triggers(signal_col,target_col,data,n):
    stats_b = []
    stats_a = []
    val = []
    min_b = []
    min_a = []
    mean_b = []
    mean_a = []
    max_b = []
    max_a = []
    ind = []
    for i in range(data.shape[0]):
        if (data[signal_col][i] != 0):
            stats_temp_a = []
            stats_temp_b = []
            if (i < n):
                min_a.append(np.min(data[target_col][(i+1):(i+n+1)]))
                mean_a.append(np.average(data[target_col][(i+1):(i+n+1)]))
                max_a.append(np.max(data[target_col][(i+1):(i+n+1)]))
                stats_a.append(data[target_col][(i+1):(i+n+1)].tolist())
                #stats_a.append(stats_temp_a)
                
                min_b.append(np.min(data[target_col][:i]))
                mean_b.append(np.average(data[target_col][:i]))
                max_b.append(np.max(data[target_col][:i]))
                
                ind.append(data.index[i])
                val.append(data[target_col][i])
                stats_a.append(data[target_col][i:(i+n+1)].tolist())
                stats_b.append(data[target_col][:(i+1)].tolist())
                #stats_b.append(stats_temp_b)
            else:
                min_a.append(np.min(data[target_col][(i+1):(i+n+1)]))
                mean_a.append(np.average(data[target_col][(i+1):(i+n+1)]))
                max_a.append(np.max(data[target_col][(i+1):(i+n+1)]))
                #stats_a.append(stats_temp_a)
                
                min_b.append(np.min(data[target_col][(i-n):i]))
                mean_b.append(np.average(data[target_col][(i-n):i]))
                max_b.append(np.max(data[target_col][(i-n):i]))
                
                ind.append(data.index[i])
                val.append(data[target_col][i])
                stats_a.append(data[target_col][i:(i+n+1)].tolist())
                stats_b.append(data[target_col][(i-n):(i+1)].tolist())
                #stats_b.append(stats_temp_b)


    df_b = pd.DataFrame({'min':min_b,'mean':mean_b,'max':max_b,target_col:val},
                        index=ind,columns=["min","mean","max",target_col])
    df_a = pd.DataFrame({'min':min_a,'mean':mean_a,'max':max_a,target_col:val},
                        index=ind,columns=["min","mean","max",target_col])
    return df_b, df_a, stats_b, stats_a                     