In [1]:
import pandas as pd

# --- prepare test data ---
data = [
    ('cust_1', '2020-05-11', 100),
    ('cust_1', '2020-05-12', 200),
    ('cust_1', '2020-05-14', 100),
    ('cust_1', '2020-05-16', 50),
    ('cust_1', '2020-05-17', 100),
]
df = pd.DataFrame(data, columns=['customer_id', 'order_date', 'price'])
df['order_date'] = pd.to_datetime(df['order_date'])

print("Test data:")
print(df, "\n")

# --- implement MATCH_RECOGNIZE (START DOWN+) in pandas ---
results = []

# sort & reset index
df_sorted = df.sort_values(['customer_id', 'order_date']).reset_index(drop=True)
n = len(df_sorted)
i = 0

while i < n:
    start = df_sorted.iloc[i]
    # look for one or more DOWN rows immediately following
    downs = []
    j = i + 1
    while j < n and df_sorted.loc[j, 'price'] < 150:
        downs.append(df_sorted.iloc[j])
        j += 1

    # if we found at least one DOWN, record a match
    if downs:
        last_down = downs[-1]
        results.append({
            'customer_id': start['customer_id'],
            'start_price':    start['price'],
            'final_price':    last_down['price'],
            'start_date':     start['order_date'],
            'final_date':     last_down['order_date'],
        })
        # skip past the end of this match
        i = j
    else:
        # no DOWN sequence here → advance start
        i += 1

# convert to DataFrame & display
matches = pd.DataFrame(results, columns=[
    'customer_id', 'start_price', 'final_price', 'start_date', 'final_date'
])

print("Matches found:")
print(matches)


Test data:
  customer_id order_date  price
0      cust_1 2020-05-11    100
1      cust_1 2020-05-12    200
2      cust_1 2020-05-14    100
3      cust_1 2020-05-16     50
4      cust_1 2020-05-17    100 

Matches found:
  customer_id  start_price  final_price start_date final_date
0      cust_1          200          100 2020-05-12 2020-05-17


In [3]:
import pandas as pd

# Your test data
data = [
    ('cust_1', '2020-05-11', 100),
    ('cust_1', '2020-05-12', 200),
    ('cust_2', '2020-05-13',   8),
    ('cust_1', '2020-05-14', 100),
    ('cust_2', '2020-05-15',   4),
    ('cust_1', '2020-05-16',  50),
    ('cust_1', '2020-05-17', 100),
    ('cust_2', '2020-05-18',   6),
]
df = pd.DataFrame(data, columns=['customer_id', 'order_date', 'price'])
df['order_date'] = pd.to_datetime(df['order_date'])

results = []

# Process each customer partition
for cust, group in df.groupby('customer_id'):
    grp = group.sort_values('order_date').reset_index(drop=True)
    n = len(grp)
    i = 0

    while i < n:
        start = grp.iloc[i]

        # find the DOWN+ run
        downs = []
        j = i + 1
        while j < n and grp.loc[j, 'price'] < grp.loc[j-1, 'price']:
            downs.append(grp.iloc[j])
            j += 1

        if not downs:
            # no DOWN sequence here → advance start
            i += 1
            continue

        # find the UP+ run immediately after
        ups = []
        k = j
        while k < n and grp.loc[k, 'price'] > grp.loc[k-1, 'price']:
            ups.append(grp.iloc[k])
            k += 1

        if not ups:
            # had DOWN but no UP → advance start
            i += 1
            continue

        # record one match
        results.append({
            'customer_id': cust,
            'start_price':    start['price'],
            'bottom_price':   downs[-1]['price'],
            'final_price':    ups[-1]['price'],
            'start_date':     start['order_date'],
            'final_date':     ups[-1]['order_date'],
        })

        # skip past the last UP row (AFTER MATCH SKIP PAST LAST ROW)
        i = k

# assemble and show
matches = pd.DataFrame(results, columns=[
    'customer_id','start_price','bottom_price','final_price','start_date','final_date'
])
print(matches)


  customer_id  start_price  bottom_price  final_price start_date final_date
0      cust_1          200            50          100 2020-05-12 2020-05-17
1      cust_2            8             4            6 2020-05-13 2020-05-18


In [2]:
import pandas as pd

# test data
data = [
    {"id": 1,  "seq": 1, "step": 1, "event_type": "start",  "value": 100},  # A
    {"id": 2,  "seq": 1, "step": 2, "event_type": "middle", "value": 200},  # B
    {"id": 3,  "seq": 1, "step": 3, "event_type": "end",    "value": 300},  # C
    {"id": 4,  "seq": 2, "step": 1, "event_type": "middle", "value": 250},  # B
    {"id": 5,  "seq": 2, "step": 2, "event_type": "start",  "value": 150},  # A
    {"id": 6,  "seq": 2, "step": 3, "event_type": "end",    "value": 350},  # C
    {"id": 7,  "seq": 3, "step": 1, "event_type": "start",  "value": 175},  # A
    {"id": 8,  "seq": 3, "step": 2, "event_type": "end",    "value": 275},  # C
    {"id": 9,  "seq": 3, "step": 3, "event_type": "middle", "value": 375},  # B
    {"id": 10, "seq": 4, "step": 1, "event_type": "end",    "value": 225},  # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325},  # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start",  "value": 425},  # A
]
df = pd.DataFrame(data)

results = []

# for each partition
for seq_val, group in df.groupby('seq'):
    grp = group.sort_values('step').reset_index(drop=True)
    n = len(grp)
    i = 0
    match_num = 1

    # scan with a sliding window of size 3
    while i + 2 < n:
        window = grp.iloc[i : i + 3]
        types = set(window['event_type'])

        # check for exactly one of each A/B/C
        if types == {'start', 'middle', 'end'}:
            # map event_type to variable name
            var_map = {'start': 'A', 'middle': 'B', 'end': 'C'}
            # which variable matched the first row?
            first_evt = window.iloc[0]['event_type']
            classifier = var_map[first_evt]

            # extract the values bound to each variable
            a_val = window.loc[window['event_type']=='start',  'value'].iloc[0]
            b_val = window.loc[window['event_type']=='middle', 'value'].iloc[0]
            c_val = window.loc[window['event_type']=='end',    'value'].iloc[0]

            results.append({
                'seq': seq_val,
                'pattern_var': classifier,
                'match_num': match_num,
                'a_value': a_val,
                'b_value': b_val,
                'c_value': c_val,
            })

            match_num += 1
            # skip past this entire window
            i += 3
        else:
            i += 1

# build and display the result DataFrame
matches_df = pd.DataFrame(results, columns=[
    'seq','pattern_var','match_num','a_value','b_value','c_value'
])
print(matches_df)


   seq pattern_var  match_num  a_value  b_value  c_value
0    1           A          1      100      200      300
1    2           B          1      150      250      350
2    3           A          1      175      375      275
3    4           C          1      425      325      225


In [1]:
import pandas as pd

def match_recognize_abc_dip(
    df: pd.DataFrame,
    *,
    partition_col: str = "customer_id",
    date_col: str = "order_date",
    price_col: str = "price",
    overlap: bool = True,  # False ≈ SKIP PAST LAST ROW
) -> pd.DataFrame:
    """
    Find sequences A,B,C such that B.price < A.price and C.price < B.price
    within each partition, ordered by date.

    Returns columns: customer_id, start_date, end_date, bottom_price
    """
    # Ensure types and sort
    gdf = df.copy()
    gdf[date_col] = pd.to_datetime(gdf[date_col])
    gdf = gdf.sort_values([partition_col, date_col]).reset_index(drop=True)

    out_rows = []

    for key, g in gdf.groupby(partition_col, sort=False):
        g = g.reset_index(drop=True)

        # Vector signals for a strictly decreasing triple (A > B > C)
        a = g[price_col]
        b = a.shift(-1)
        c = a.shift(-2)

        is_dip_start = (b < a) & (c < b)  # True at i if A(i) > B(i+1) > C(i+2)
        candidates = is_dip_start[is_dip_start].index.tolist()

        if overlap:
            use_idxs = candidates
        else:
            # Skip past last row of each accepted triple (i -> next start is i+3)
            use_idxs = []
            next_allowed = 0
            for i in candidates:
                if i >= next_allowed and i + 2 < len(g):
                    use_idxs.append(i)
                    next_allowed = i + 3

        for i in use_idxs:
            # i, i+1, i+2 exist by construction
            out_rows.append({
                partition_col: key,
                "start_date": g.loc[i, date_col],       # A.order_date
                "end_date":   g.loc[i + 2, date_col],   # C.order_date
                "bottom_price": g.loc[i + 2, price_col] # C.price
            })

    return pd.DataFrame(out_rows)


# ------------------ Example usage ------------------
data = [
    ('cust_1', '2020-05-11', 100),
    ('cust_1', '2020-05-12', 200),
    ('cust_2', '2020-05-13',   8),
    ('cust_1', '2020-05-14', 100),
    ('cust_2', '2020-05-15',   4),
    ('cust_1', '2020-05-16',  50),
    ('cust_1', '2020-05-17', 100),
    ('cust_2', '2020-05-18',   6),
]

df = pd.DataFrame(data, columns=['customer_id', 'order_date', 'price'])

print("Overlapping matches:")
print(match_recognize_abc_dip(df, overlap=True), "\n")

print("Non-overlapping matches (≈ SKIP PAST LAST ROW):")
print(match_recognize_abc_dip(df, overlap=False))


Overlapping matches:
  customer_id start_date   end_date  bottom_price
0      cust_1 2020-05-12 2020-05-16            50 

Non-overlapping matches (≈ SKIP PAST LAST ROW):
  customer_id start_date   end_date  bottom_price
0      cust_1 2020-05-12 2020-05-16            50


In [2]:
import pandas as pd

def match_recognize_abc_dip(df: pd.DataFrame,
                            partition_col: str = "customer_id",
                            date_col: str = "order_date",
                            price_col: str = "price") -> pd.DataFrame:
    """
    Detect A,B,C where B.price < A.price and C.price < B.price
    per partition (ordered by date), using non-overlapping matches.
    Returns: [partition_col, start_date, end_date, bottom_price]
    """
    gdf = df.copy()
    gdf[date_col] = pd.to_datetime(gdf[date_col])
    gdf = gdf.sort_values([partition_col, date_col]).reset_index(drop=True)

    out_rows = []

    for key, g in gdf.groupby(partition_col, sort=False):
        g = g.reset_index(drop=True)

        a = g[price_col]
        b = a.shift(-1)
        c = a.shift(-2)

        # i is a start if A(i) > B(i+1) > C(i+2)
        is_dip_start = (b < a) & (c < b)
        candidates = is_dip_start[is_dip_start].index.tolist()

        # Non-overlapping selection: after taking i, next allowed is i+3
        next_allowed = 0
        for i in candidates:
            if i >= next_allowed and i + 2 < len(g):
                out_rows.append({
                    partition_col: key,
                    "start_date":   g.loc[i, date_col],       # A.order_date
                    "end_date":     g.loc[i + 2, date_col],   # C.order_date
                    "bottom_price": g.loc[i + 2, price_col],  # C.price
                })
                next_allowed = i + 3

    return pd.DataFrame(out_rows)


# ------------------ Example ------------------
if __name__ == "__main__":
    data = [
        ('cust_1', '2020-05-11', 100),
        ('cust_1', '2020-05-12', 200),
        ('cust_2', '2020-05-13',   8),
        ('cust_1', '2020-05-14', 100),
        ('cust_2', '2020-05-15',   4),
        ('cust_1', '2020-05-16',  50),
        ('cust_1', '2020-05-17', 100),
        ('cust_2', '2020-05-18',   6),
    ]
    df = pd.DataFrame(data, columns=['customer_id', 'order_date', 'price'])
    print(match_recognize_abc_dip(df))


  customer_id start_date   end_date  bottom_price
0      cust_1 2020-05-12 2020-05-16            50


In [4]:
import pandas as pd
df['order_date'] = pd.to_datetime(df['order_date'])
# Sort for partition
df = df.sort_values(['customer_id', 'order_date']).reset_index(drop=True)
def match_pattern_ABC(group: pd.DataFrame):
    """Find matches where B.price < A.price and C.price < B.price."""
    matches = []
    n = len(group)
    for i in range(n - 2):  # need at least 3 rows for A,B,C
        A = group.iloc[i]
        B = group.iloc[i + 1]
        C = group.iloc[i + 2]

        if (B['price'] < A['price']) and (C['price'] < B['price']):
            matches.append({
                'customer_id': A['customer_id'],
                'start_date': A['order_date'],
                'end_date': C['order_date'],
                'bottom_price': C['price']
            })
    return matches
# Apply per partition
all_matches = []
for cust_id, grp in df.groupby('customer_id'):
    all_matches.extend(match_pattern_ABC(grp))

result_df = pd.DataFrame(all_matches)
print(result_df)

  customer_id start_date   end_date  bottom_price
0      cust_1 2020-05-12 2020-05-16            50
