In [1]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform
from scipy.signal import argrelextrema

In [65]:
df = pd.DataFrame({
    "vals":[2,4,5,6,5,4,2,6,5],
    "extremes":[-1,0,0,1,0,0,-1,1,0],
})

In [22]:
df

Unnamed: 0,vals,extremes
0,2,-1
1,4,0
2,5,0
3,6,1
4,5,0
5,4,0
6,2,-1
7,6,1
8,5,0


In [67]:
threshold = 0.3

def update_extremes(df):
    return (
        pd.concat(
            [
                df,
                df.mask(df["extremes"] == 0).fillna(method="ffill").rename(columns=lambda n: "prev_"+n),
            ],
            axis=1
        )
        .eval("within_threshold = abs(vals-prev_vals)/prev_vals < @threshold")
        .eval("mask = within_threshold and extremes == 0")
        .eval("is_new_extreme = within_threshold.mask(mask).fillna(method='ffill')")
        .eval("new_extremes = prev_extremes.where(is_new_extreme).fillna(0)")
        ["new_extremes"]  # comment out this to get all columns (which may help in understanding what is going on)
        .astype(int)
    )

df2 = update_extremes(df)
df2

0   -1
1    0
2    0
3    1
4    1
5    0
6   -1
7    1
8    1
Name: new_extremes, dtype: int64

In [69]:
df3 = update_extremes(df.sort_index(ascending=False)).sort_index()
df3

0   -1
1    0
2    1
3    1
4    0
5    0
6   -1
7    1
8    0
Name: new_extremes, dtype: int64

In [59]:
df2["vals"].plot()
df2["extremes"].plot()
df2["new_extremes"].plot()
df3["new_extremes"].plot()

KeyError: 'vals'

In [89]:
def nearby_extremes(df, threshold):
    
    # Forward Pass
    forward_df = (pd.concat(
            [
                df,
                df.mask(df["extremes"] == 0).fillna(method="ffill").rename(columns=lambda n: "prev_"+n),
            ],
            axis=1
        )
        .eval("within_threshold = abs(vals-prev_vals)/prev_vals < @threshold")
        .eval("mask = within_threshold and extremes == 0")
        .eval("is_new_extreme = within_threshold.mask(mask).fillna(method='ffill')")
        .eval("forward_extremes = prev_extremes.where(is_new_extreme).fillna(0)")
        ["forward_extremes"]  # comment out this to get all columns (which may help in understanding what is going on)
        .astype(int))

    # Reverse Pass
    df = df.sort_index(ascending=False)
    reverse_df = (pd.concat(
            [
                df,
                df.mask(df["extremes"] == 0).fillna(method="ffill").rename(columns=lambda n: "prev_"+n),
            ],
            axis=1
        )
        .eval("within_threshold = abs(vals-prev_vals)/prev_vals < @threshold")
        .eval("mask = within_threshold and extremes == 0")
        .eval("is_new_extreme = within_threshold.mask(mask).fillna(method='ffill')")
        .eval("reverse_extremes = prev_extremes.where(is_new_extreme).fillna(0)")
        ["reverse_extremes"]  # comment out this to get all columns (which may help in understanding what is going on)
        .astype(int)).sort_index()
        
    # Merging
    merged_df = pd.concat([df["extremes"].sort_index(), forward_df, reverse_df], axis=1)
    final_df = merged_df.sum(axis=1).clip(lower=-1, upper=1)
    
    return final_df
    

In [91]:
nb = nearby_extremes(df, threshold=0.2)
nb

0   -1
1    0
2    1
3    1
4    1
5    0
6   -1
7    1
8    1
dtype: int64

In [92]:
def nearby_extremes(df, threshold):
    """Identify values beside peaks/valleys that are within a threshold distance and re-label them"""
    
    # Forward Pass
    forward_df = (pd.concat(
        [
            df,
            df.mask(df["raw_peaks"] == 0).fillna(method="ffill").rename(columns=lambda n: "prev_" + n),
        ],
        axis=1
    )
                  .eval("within_threshold = abs(close-prev_close)/prev_close < @threshold")
                  .eval("mask = within_threshold and raw_peaks == 0")
                  .eval("is_new_extreme = within_threshold.mask(mask).fillna(method='ffill')")
                  .eval("forward_extremes = prev_raw_peaks.where(is_new_extreme).fillna(0)")
                  ["forward_extremes"]
                  .astype(int))

    # Reverse Pass
    df = df.sort_index(ascending=False)
    reverse_df = (pd.concat(
        [
            df,
            df.mask(df["raw_peaks"] == 0).fillna(method="ffill").rename(columns=lambda n: "prev_" + n),
        ],
        axis=1
    )
                  .eval("within_threshold = abs(close-prev_close)/prev_close < @threshold")
                  .eval("mask = within_threshold and raw_peaks == 0")
                  .eval("is_new_extreme = within_threshold.mask(mask).fillna(method='ffill')")
                  .eval("reverse_extremes = prev_raw_peaks.where(is_new_extreme).fillna(0)")
                  ["reverse_extremes"]  # comment out this to get all columns (which may help in understanding what is going on)
                  .astype(int)).sort_index()

    # Merging
    merged_df = pd.concat([df["raw_peaks"].sort_index(), forward_df, reverse_df], axis=1)
    final_df = merged_df.sum(axis=1).clip(lower=-1, upper=1)

    return final_df

In [94]:
df = pd.DataFrame({
    "close":[2,4,5,6,5,4,2,6,5],
    "raw_peaks":[-1,0,0,1,0,0,-1,1,0],
})

nb = nearby_extremes(df, threshold=0.2)
nb

0   -1
1    0
2    1
3    1
4    1
5    0
6   -1
7    1
8    1
dtype: int64

In [98]:
df

Unnamed: 0,close,raw_peaks
0,2,-1
1,4,0
2,5,0
3,6,1
4,5,0
5,4,0
6,2,-1
7,6,1
8,5,0


In [112]:
df[df["raw_peaks"] != 0].last_valid_index()

7

In [113]:
len(df)

9

In [115]:
len(df[3:6])

3