In [None]:
def format_data(data):
    """
    Strategy agnostic formatting of the data. Adds two columns: 
    - NDOpen: Next day open
    - NDDate: Next day date

    In daily strategies decisions are made on this price and this date. This is called inside the constructor of this
    class.
    """

    # STRATEGY AGNOSTIC COLUMNS
    nddate = data.index.values[1:] # tmp array for next day dates
    data["NDOpen"] = data.Open.shift(-1) # next day open: the price on which trade decisions made at the prev close are taken - with a laplacian perturbation added using modify_open()
    data = data.dropna() # dropping nan to make space for date[1:] values converted into datetime values 
    data["NDDate"] = pd.to_datetime(nddate) # next day date: the date on which trade decisions made at prev close are taken

    return data
    
def find_extrema(s, bw='cv_ls'):
    """
    Input:
        s: prices as pd.series
        bw: bandwith as str or array like
    Returns:
        prices: with 0-based index as pd.series
        extrema: extrema of prices as pd.series
        smoothed_prices: smoothed prices using kernel regression as pd.series
        smoothed_extrema: extrema of smoothed_prices as pd.series
    """
    # Copy series so we can replace index and perform non-parametric
    # kernel regression.
    prices = s.copy()
    prices = prices.reset_index()
    prices.columns = ['date', 'price']
    prices = prices['price']

    kr = KernelReg(
        [prices.values],
        [prices.index.to_numpy()],
        var_type='c', bw=bw
    )
    f = kr.fit([prices.index])

    # Use smoothed prices to determine local minima and maxima
    smooth_prices = pd.Series(data=f[0], index=prices.index)
    smooth_local_max = argrelextrema(smooth_prices.values, np.greater)[0]
    smooth_local_min = argrelextrema(smooth_prices.values, np.less)[0]
    local_max_min = np.sort(
        np.concatenate([smooth_local_max, smooth_local_min]))
    smooth_extrema = smooth_prices.loc[local_max_min]

    # Iterate over extrema arrays returning datetime of passed
    # prices array. Uses idxmax and idxmin to window for local extrema.
    price_local_max_dt = []
    for i in smooth_local_max:
        if (i > 1) and (i < len(prices)-1):
            price_local_max_dt.append(prices.iloc[i-2:i+2].idxmax())

    price_local_min_dt = []
    for i in smooth_local_min:
        if (i > 1) and (i < len(prices)-1):
            price_local_min_dt.append(prices.iloc[i-2:i+2].idxmin())

    maxima = pd.Series(prices.loc[price_local_max_dt])
    minima = pd.Series(prices.loc[price_local_min_dt])
    extrema = pd.concat([maxima, minima]).sort_index()

    # Return series for each with bar as index
    return extrema, prices, smooth_extrema, smooth_prices

def head_and_shoulders(extrema, index, max_bars=35, shoulder_sensitivity=0.03):
    """
    Input:
        extrema: extrema as pd.series with bar number as index
        max_bars: max bars for pattern to play out
    Returns:
        dates: list of dates containing the start and end bar of the pattern
    """
    dates = []

    # Need to start at five extrema for pattern generation
    for i in range(5, len(extrema)+1):
        window = extrema.iloc[i-5:i]

        # A pattern must play out within max_bars (default 35)
        if (window.index[-1] - window.index[0]) > max_bars:
            continue

        # Using the notation from the paper to avoid mistakes
        e1 = window.iloc[0]
        e2 = window.iloc[1]
        e3 = window.iloc[2]
        e4 = window.iloc[3]
        e5 = window.iloc[4]

        rtop_g1 = np.mean([e1, e3, e5])
        rtop_g2 = np.mean([e2, e4])

        # Head and Shoulders
        if (e1 > e2) and (e3 > e1) and (e3 > e5) and \
                (abs(e1 - e5) <= shoulder_sensitivity*np.mean([e1, e5])) and \
                (abs(e2 - e4) <= shoulder_sensitivity*np.mean([e1, e5])):

            #dates.append((index[window.index[0]], index[window.index[-1]]))
            dates.append([index[window.index[0]], index[window.index[-1]]])

    return dates


def obtain_pattern_data(data, max_bars, shoulder_sensitivity):

    pattern_data = [[0, 0]]

    dates = []

    for i in range(0, len(data)):

        # indexing the prices of interest
        prices = data["Close"][i:i+max_bars]

        # finding the extrema
        extrema, _, _, _ = find_extrema(prices, bw=[0.85])

        # only run head and shoulders when there are 5 extrema
        if len(extrema) >= 5:  

            # detecting the head and shoulders pattern
            dates = head_and_shoulders(extrema, prices.index, max_bars, shoulder_sensitivity)

        # checking if dates has any contents
        if len(dates) >= 1:

            # ensuring duplicate patterns are not saved
            if dates != pattern_data[-1][1]:

                # setting the pattern region as a variable
                pattern_region = data["Close"].loc[dates[0][0]:dates[0][1]]

                # the last two values are the target entry point and the stoploss - latter set to the pattern's head
                pattern_data.append([prices.index[-1], dates, pattern_region.min(), pattern_region.max()])

    return pattern_data[1:]


def obtain_entry_idx(df, detection_dates, pattern_data):

    idx_activate = []
    pattern_data_idx = []

    for i in range(len(pattern_data)):
        nxt_10_days = df.loc[detection_dates[i]:detection_dates[i]+timedelta(days=10)].Close.le(pattern_data[i][2])

        try: 
            val = nxt_10_days[nxt_10_days == True].idxmin()
            idx_activate.append(val)
            pattern_data_idx.append(i)

        except:
            pass

    return idx_activate, pattern_data_idx


def format_strategy_data(data, max_bars, target_profit, shoulder_sensitivity):

    # obtaining pattern data
    pattern_data = obtain_pattern_data(data, max_bars, shoulder_sensitivity)

    # obtaining all potential patterns
    detection_dates = [pattern_data[i][0] for i in range(len(pattern_data))]

    # obtaining indices to initate positions, and idx of pattern_data list they correspond to
    idx_activate, sig_idx = obtain_entry_idx(data, detection_dates, pattern_data)

    # initialising unique strategy variables
    data["PatternDetected"] = 0
    data["Entry"] = 0
    data["StopLoss"] = 0
    data["PosTarget"] = 0 

    # storing indicator on dates when the pattern was detected
    data.loc[detection_dates, "PatternDetected"] = 1

    # storing indicator on dates to initate short position
    data.loc[idx_activate, "Entry"] = 1

    # using sig_idx list to store stoploss variables stored in pattern_data 
    stoplosses = [pattern_data[sig_idx[i]][3] for i in range(len(sig_idx))]

    # storing stoploss and position targets for each short position
    for i in range(len(idx_activate)):
        data.loc[idx_activate[i], "StopLoss"] = stoplosses[i] 
        data.loc[idx_activate[i], "PosTarget"] = data.loc[idx_activate[i]].NDOpen * (1 - target_profit)

    return data

In [None]:
lite = yf.Ticker("LITE").history(period="30y")
lite = format_data(lite)
max_bars = 35
target_profit = 0.05
shoulder_sensitivity = 0.03 # the lower this is, the closer the shoulder peaks and troughs need to be together
lite = format_strategy_data(lite, max_bars, target_profit, shoulder_sensitivity)