The thing is that I need to isolate punch cycles in a time series of a punch signal, i.e. we need to group the series into continguous non-zero segments where

1. A new "event" starts when force becomes non-zero
2. It ends when force returns to zero

In [None]:
import pandas as pd
from pathlib import Path 
import os
import numpy as np
import plotly.graph_objs as go

def find_latest_csv(base_dir):
    base_path = Path(base_dir)
    csv_files = [
        f for f in base_path.rglob('*.csv')
        if 'data_archive' not in str(f).lower() # Exclude 'data_archive' files
    ]
    try:
        latest_file = max(csv_files, key=os.path.getmtime)
        return str(latest_file)
    except ValueError:
        return None

def parse_punch_data(df, force_type: str) -> dict:
    """
    Isolates each punch in the data and obtains the maximum force value for each punch type.
    
    Parameters:
        df (pd.DataFrame): Input DataFrame containing time series punch force data.
        force_type (str): One of ['precomp', 'compr', 'eject'] indicating punch type.
    
    Returns:
        dict: Keys are punch numbers (1 to num_punches), values are lists of max force values
              from contiguous non-zero punch segments.
    """

    ''' There are 3 types of punch force data:
    1. PRECOMPRESSION: precompData, precompPunchNo
    2. COMPRESSION: compr1Data, compr1PunchNo
    3. EJECTION: ejectData, ejectPunchNo
    '''

    punch_map = {
        'precomp': ('precompPunchNo', 'precompData'),
        'compr': ('compr1PunchNo', 'compr1Data'),
        'eject': ('ejectPunchNo', 'ejectData'),
    }

    if force_type not in punch_map:
        raise ValueError("Invalid punch type. Choose from 'precomp', 'compr', or 'eject'.")

    punch_no_col, force_data_col = punch_map[force_type]
    df_selected = df[['timeStamp', punch_no_col, force_data_col]].copy()

    # Filter out zero punch numbers (inactive readings)
    mask = df_selected[punch_no_col] != 0
    group_id = (mask != mask.shift()).cumsum()

    # maybe TODO Add noise filtering for short-duration segments if needed
        
    # Group by consecutive non-zero blocks and calculate the max
    non_zero_groups = df_selected[mask].groupby(group_id[mask]).max()

    # Reset grouping index and create a dictionary that contains the max values
    non_zero_groups = non_zero_groups.reset_index(drop=True)

    # Create a dictionary with 'precomppunchno' as keys and lists of 'precompdata' as values
    max_dict = non_zero_groups.groupby(punch_no_col)[force_data_col].apply(list).to_dict()

    # Add in missing keys with empty lists
    max_dict = {i: max_dict.get(i, []) for i in range(1, 9)}

    return max_dict

def get_punch_stats(force_dict: dict) -> dict:
    """
    Returns a dict mapping punch number to (most_recent, average of maximum) force values.
    If no values exist, returns (0.0, 0.0).
    """
    return {
        punch: (int(values[-1]), int(np.mean(values))) if values else (0, 0)
        for punch, values in force_dict.items()
    }

def generate_punch_figure(stats: dict, force_type: str) -> go.Figure:
    """
    Generates a bar figure for punch statistics.
    
    Parameters:
        stats (dict): Dictionary containing punch statistics (most_recent, average of maximum).
        force_type (str): Type of punch ('precomp', 'compr', 'eject').
    
    Returns:
        go.Figure: Plotly figure object.
    """

    def default_figure(title="No data available"):
        return go.Figure(
            layout=go.Layout(
                title=title,
                plot_bgcolor="#1f2937",
                paper_bgcolor="#1f2937",
                font=dict(color="#d1d5db"),
                xaxis=dict(title="Punch Number", showgrid=True, gridcolor="rgba(255,255,255,0.1)"),
                yaxis=dict(title="Force (N)", showgrid=True, gridcolor="rgba(255,255,255,0.1)"),
                margin=dict(l=40, r=10, t=30, b=30),
            )
        )

    # If every list in stats.values() is empty, return an empty figure
    if not stats or all(len(v) == 0 for v in stats.values()):
        return default_figure()

    # Validate input
    if force_type not in {'precomp', 'compr', 'eject'}:
        return default_figure("Invalid force type specified.")

    colors = {
        "precomp": '#E91E63',  # Pink
        "compr": '#1E88E5',      # Blue
        "eject": '#4CAF50',         # Green
    }

    base_color = colors.get(force_type, '#38B2AC')

    # Generate x-axis labels
    x_labels = [str(p) for p in range(1, 9)]

    avg_force = [s[1] for s in stats.values()]
    prev_force = [s[0] for s in stats.values()]

    # Create bar trace for average maximum force
    avg_bar = go.Bar(
        x=x_labels, 
        y=avg_force, 
        text=[f"{f:.0f}" for f in avg_force],
        textposition='outside',
        name="Avg of recent",
        marker=dict(color=base_color),
        hovertemplate="Punch %{x}<br>Avg: %{y:.1f} N<extra></extra>"
    )

    # Create bar trace for most recent maximum force
    prev_bar = go.Bar(
        x=x_labels, 
        y=prev_force, 
        text=[f"{f:.0f}" for f in prev_force],
        textposition='outside',
        name="Max of recent",
        marker=dict(color='#F59E0B'),  # Amber color
        hovertemplate="Punch %{x}<br>Max: %{customdata:.1f} N<extra></extra>",
        customdata=avg_force,  
    )

    # Create layout for the figure
    layout = go.Layout(
        title=dict(
            text='Force by Punch Number',
            font=dict(size=10)  
        ),
        xaxis=dict(
            title=dict(
                text="Punch Number",
                font=dict(size=10),  
                standoff=5, 
            ),
            showgrid=True,
            gridcolor='rgba(255,255,255,0.1)',
            categoryorder='array',
            categoryarray=x_labels
        ),
        yaxis=dict(
            title=dict(
                text="Force (N)",
                font=dict(size=10),  
                standoff=5,  
            ),
            showgrid=True,
            gridcolor='rgba(255,255,255,0.1)',
            range=[0, max(max(prev_force) * 1.25, 1)] 
        ),
        plot_bgcolor='#1f2937',
        paper_bgcolor='#1f2937',
        font=dict(color='#d1d5db'),
        # barmode='stack',
        uniformtext=dict(
            mode='hide',
            minsize=8
        ),
        margin=dict(l=40, r=10, t=40, b=25),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            font=dict(size=10)  
        )
    )

    return go.Figure(data=[avg_bar, prev_bar], layout=layout)

def update_stats_store(old_stats: dict, addendum: dict, k=3) -> dict:
    """
    Updates the stats store with new data, keeping at most the last `k` entries per key.

    Parameters:
        old_stats (dict): Existing stats dictionary with keys as strings '1' to '8'.
        addendum (dict): New stats to add with keys as strings '1' to '8'.
        k (int): Maximum number of entries to keep per key. Default is 3.

    Returns:
        dict: Updated stats dictionary with at most `k` entries per key.
    
    NOTE: The keys in `old_stats` are strings as dcc store serializes these as JSON.
    """
    return {
        i: (old_stats.get(str(i), []) + addendum.get(i, []))[-k:]
        for i in range(1, 9)
    }

df = pd.read_csv(find_latest_csv('./output/20250515'))
parse_punch_data(df, 'precomp')

In [None]:
xd  = {1: [249, 204, 203], 2: [204, 607, 4200], 3: [500, 203, 522], 4: [204, 5066, 600], 5: [249], 6: [249], 7: [247], 8: [247]}

stats = get_punch_stats(xd)
fig = generate_punch_figure(stats, 'compr')

fig.show()

### Test for punch graphs

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Output, Input, State
import plotly.graph_objs as go
import pandas as pd

from pathlib import Path
import os


# GLOBAL VARIABLES
df = pd.DataFrame()
last_csv_file = None
last_file_size = 0
last_row_count = 0
last_db_save_time = 0
current_folder = ""

# Constants
chosen_folder = "./output/20250515"

# chosen_folder = "./output/20250602"
app = dash.Dash(__name__)

app.layout = html.Div([

    dcc.Graph(id='graph'),

    # dcc.Store(id="folder-path", data="./output/20250524")
    dcc.Interval(
        id='interval-component',
        interval=11*1000,  # 1 x 1000 ms
        n_intervals=0
    ),

    dcc.Store(id='punch-max-store', data={i: [] for i in range(1, 9)}),
])

@app.callback(
    Output('punch-max-store', 'data'),
    Output('graph', 'figure'),
    Input('interval-component', 'n_intervals'),
    State('punch-max-store', 'data'),
)
def update_graph(n, current_data):
    # global df, last_db_save_time, last_csv_file, global_start_time, last_file_size, last_row_count
    global df

    print(f'Current punch max store data: {current_data}')

    latest_csv = find_latest_csv(chosen_folder)

    ### DEAL WITH UPDATING LATER ###
    # if not latest_csv:
    #     print("No CSV files found in the specified folder.")
    #     return go.Figure()

    # latest_csv_size = Path(latest_csv).stat().st_size

    # file_changed = (latest_csv != last_csv_file) or (latest_csv_size != last_file_size)

    # if file_changed:
    #     new_data = pd.read_csv(latest_csv)
    #     new_data.columns = new_data.columns.str.lower().str.strip()
    ################################

    # Read the data
    df = pd.read_csv(latest_csv)

    # Get the latest punch maxes
    new_maxes = parse_punch_data(df, 'precomp')

    # Update the store with the new max values
    updated = update_stats_store(current_data, new_maxes, k=3)

    # Get the punch statistics (most recent, average of maximum)
    stats = get_punch_stats(updated)

    fig = generate_punch_figure(stats, 'precomp')

    print(f"Updated punch max store data: {updated}")

    return updated, fig

if __name__ == '__main__':
    app.run(debug=True)

### Test for main graph

In [1]:
import dash
from dash import dcc, html
from dash.dependencies import Output, Input, State
import plotly.graph_objs as go
import pandas as pd

import plotly.express as px

from pathlib import Path
import os

# GLOBAL VARIABLES
df = pd.DataFrame()
last_csv_file = None
last_file_size = 0
last_row_count = 0
last_db_save_time = 0
current_folder = ""

# Constants
chosen_folder = "./output/20250515"

# chosen_folder = "./output/20250602"
app = dash.Dash(__name__)

app.layout = html.Div([

    dcc.Graph(id='graph'),

    dcc.Interval(
        id='interval-component',
        interval=11*1000,  # 1 x 1000 ms
        n_intervals=0
    ),

    # Add a dropdown for the time window
    dcc.Dropdown(
        id="time-window-input",
        options=[
            {'label': '1 sec', 'value': 1000},
            {'label': '10 sec', 'value': 10000},
            {'label': '1 min', 'value': 60000},
            {'label': '5 min', 'value': 300000},
        ],
        value=1000,
        clearable=False,
    ),
])

@app.callback(
    Output('graph', 'figure'),
    Input('interval-component', 'n_intervals'),
    Input('time-window-input', 'value'),
)
def update_graph(n, time_window):
    # global df, last_db_save_time, last_csv_file, global_start_time, last_file_size, last_row_count
    global df

    latest_csv = find_latest_csv(chosen_folder)

    # Read the data
    df = pd.read_csv(latest_csv)

    df['actualTime(ms)'] = pd.to_datetime(df['actualTime(ms)'])

    latest_time = df['actualTime(ms)'].max()

    window_start = latest_time - pd.to_timedelta(time_window, unit='ms')

    full_time_range = pd.date_range(
        start=window_start,
        end=latest_time,
        freq='2ms'  # Adjust to your actual data frequency
    )

    # Filter and reindex
    filtered_df = df[df['actualTime(ms)'] >= window_start][['actualTime(ms)', 'compr1Data']]
    filtered_df = filtered_df.set_index('actualTime(ms)').reindex(full_time_range, fill_value=0).rename_axis('actualTime(ms)').reset_index()

    # Plot filtered df
    fig = px.line(filtered_df, x='actualTime(ms)', y='compr1Data', title='Force over Time (Filtered)')

    fig.update_xaxes(
        tickformat='%H:%M:%S.%f',  # Format to show time with milliseconds
        # dtick=1000  # Set the tick interval to 1 second
    )

    return fig

if __name__ == '__main__':
    app.run(debug=True)

In [53]:
from pathlib import Path

# print(__file__)


Path('./output').resolve()



PosixPath('/Users/johnahn/Desktop/ESDev/testing/output')

In [58]:
Path('./output').cwd()

PosixPath('/Users/johnahn/Desktop/ESDev/testing')

In [16]:
latest_time

Timestamp('2025-05-15 15:11:59.470000')