### Movement criteria for each particle displacement

In [None]:
## import libraries and determine functions
import os
import pandas as pd
import numpy as np

def reshape_events_long(df):
    """
    Reshape wide particle displacement data into long format.
    Handles multiple events with .1, .2, ... suffixes.
    """
    base_cols = ["particleID", "gravelometer_size"]
    
    # Find all Date1 columns (event starts)
    date1_cols = [c for c in df.columns if c.startswith("Date1")]
    
    long_rows = []
    
    for i, date1_col in enumerate(date1_cols):
        # Columns for this event block
        # Date1, Date2, DistanceD, intersect?, MaxD, MinD, Radius1, Radius2
        suffix = "" if i == 0 else f".{i}"
        cols = [
            f"Date1{suffix}",
            f"Date2{suffix}",
            f"DistanceD{suffix}",
            f"intersect?{suffix}",
            f"intersection_area{suffix}",
            f"MaxD{suffix}",
            f"MinD{suffix}",
            f"Radius1{suffix}",
            f"Radius2{suffix}",
        ]
        
        temp = df[base_cols + cols].copy()
        # Rename to standard names
        temp.columns = base_cols + ["Date1", "Date2", "DistanceD", "intersect", "intersect_area", "MaxD", "MinD", "Radius1", "Radius2"]
        # Add event label
        temp["event"] = f"M{i+1}"
        
        long_rows.append(temp)
    
    # Combine all events
    long_df = pd.concat(long_rows, ignore_index=True)
    return long_df

def valid_measurements(df):
    """
    Determine valid measurements and movements based on criteria.
    A measurement is valid if:
        - distance > Radius1
        - distance > Radius2

    We compute this separately for:
        - Euclidean Distance (DistanceD)
        - Minimum Distance (MinD)
        - Maximum Distance (MaxD)
    """
    df = df.copy()
    # euclidean distance validity
    df["valid_euc"] = ((df["DistanceD"] > df["Radius1"]) & (df["DistanceD"] > df["Radius2"]))
    # minimum distance validity
    df["valid_min"] = ((df["MinD"] > df["Radius1"]) & (df["MinD"] > df["Radius2"]))
    # maximum distance validity
    df["valid_max"] = ((df["MaxD"] > df["Radius1"]) & (df["MaxD"] > df["Radius2"]))
    return df

def define_movement(df, valid_col):
    """
    defines movement if a measurement is valid and that the distance > particle diameter 
    """
    df = df.copy()
    moved_col = valid_col.replace("valid", "moved") # define moved column name
    df[moved_col] = False # initialize moved column with False

    for idx in df.index:
        if not df.at[idx, valid_col]: # skip if measurement is not valid
            continue

        D = df.at[idx, "DistanceD"] # get distance
        dia = df.at[idx, "gravelometer_size"] # get particle diameter

        if D > dia/1000: # check if distance > diameter (converted to meters)
            df.at[idx, moved_col] = True # mark as moved
    return df

def apply_counting_with_history(df, moved_col):
    """
    - first TRUE movement is NOT counted
    - second+ TRUE movements ARE counted
    - if particle disappears the "memory" is reset 
    """

    df = df.copy()
    counted_col = f"counted_{moved_col}"
    df[counted_col] = False

    df = df.sort_values(["particleID", "event"])

    for pid in df["particleID"].unique():
        sub = df[df["particleID"] == pid]

        has_moved_before = False  # memory of previous movement

        for idx in sub.index:
            present = not pd.isna(df.at[idx, "DistanceD"])
            moved_now = df.at[idx, moved_col]

            # particle not present, reset memory
            if not present:
                has_moved_before = False
                continue

            # true movement
            if moved_now:
                if has_moved_before:
                    # counted movement
                    df.at[idx, counted_col] = True
                else:
                    # first movement (ignored)
                    has_moved_before = True
    return df

Load and reshape data

In [2]:
df = pd.read_csv('../particle_displacement/displacement_results2.csv')
df = reshape_events_long(df) 
# Sort by particleID first, then by event number
df["event_num"] = df["event"].str.replace("M","").astype(int)
df = df.sort_values(["particleID", "event_num"]).drop(columns="event_num").reset_index(drop=True)
print(df.head())

   particleID  gravelometer_size       Date1       Date2  DistanceD intersect  \
0           1              128.0  03_28_2022  07_06_2022   0.223329     False   
1           1              128.0  07_06_2022  08_06_2022   0.086301     False   
2           1              128.0  08_06_2022  08_09_2022   0.027935     False   
3           1              128.0  08_09_2022    08_24_22   0.053109     False   
4           1              128.0    08_24_22  03_24_2023   0.128443     False   

   intersect_area      MaxD      MinD   Radius1   Radius2 event  
0             0.0  0.379837  0.066822  0.127602  0.028905    M1  
1             0.0  0.164265  0.008337  0.028905  0.049058    M2  
2             0.0  0.177992  0.000000  0.049058  0.100999    M3  
3             0.0  0.361647  0.000000  0.100999  0.207539    M4  
4             0.0  0.384113  0.000000  0.207539  0.048131    M5  


Valid measurement and determine movement

In [3]:
df = valid_measurements(df)
print(df.head())

   particleID  gravelometer_size       Date1       Date2  DistanceD intersect  \
0           1              128.0  03_28_2022  07_06_2022   0.223329     False   
1           1              128.0  07_06_2022  08_06_2022   0.086301     False   
2           1              128.0  08_06_2022  08_09_2022   0.027935     False   
3           1              128.0  08_09_2022    08_24_22   0.053109     False   
4           1              128.0    08_24_22  03_24_2023   0.128443     False   

   intersect_area      MaxD      MinD   Radius1   Radius2 event  valid_euc  \
0             0.0  0.379837  0.066822  0.127602  0.028905    M1       True   
1             0.0  0.164265  0.008337  0.028905  0.049058    M2       True   
2             0.0  0.177992  0.000000  0.049058  0.100999    M3      False   
3             0.0  0.361647  0.000000  0.100999  0.207539    M4      False   
4             0.0  0.384113  0.000000  0.207539  0.048131    M5      False   

   valid_min  valid_max  
0      False      

In [4]:
df = define_movement(df, "valid_euc")
df = define_movement(df, "valid_min")
df = define_movement(df, "valid_max")

Counting particle movement (with particle continuity)

In [5]:
df = apply_counting_with_history(df, "moved_euc")
df = apply_counting_with_history(df, "moved_min")  
df = apply_counting_with_history(df, "moved_max")


Movement Summary

In [15]:
def summarize_particle_movement(df, moved_cols=["moved_min","moved_euc","moved_max"]):
    """
    summarizes particle movements per event and gravelometer size.
    Counts total surveyed as all particles present (DistanceD not NaN),
    excluding particles experiencing their first movement and invalid measurements.
    """
    df = df.copy()
    sizes = sorted(df["gravelometer_size"].unique())
    summary_rows = []

    for event in sorted(df["event"].unique(), key=lambda x: int(x.replace("M",""))):
        sub = df[df["event"] == event]

        # Calculate eligible particles separately for each distance metric
        for moved_col in moved_cols:
            # Get corresponding validity column
            valid_col = moved_col.replace("moved", "valid")
            counted_col = f"counted_{moved_col}"
            
            # Create exclusion mask specific to this distance metric
            exclude_mask = pd.Series(False, index=sub.index)
            
            # Exclude if this specific metric had first movement (moved but not counted)
            if counted_col in sub.columns:
                first_movement_mask = (sub[moved_col] == True) & (sub[counted_col] == False)
                exclude_mask = exclude_mask | first_movement_mask
            
            # Exclude if this specific metric is invalid
            if valid_col in sub.columns:
                invalid_mask = sub[valid_col] == False
                exclude_mask = exclude_mask | invalid_mask
            
            # Count eligible particles for this specific distance metric
            eligible_particles = sub[~sub["DistanceD"].isna() & ~exclude_mask]
            
            # Total surveyed for this metric
            total_surveyed = eligible_particles.groupby("gravelometer_size")["particleID"].nunique().reindex(sizes, fill_value=0)
            total_surveyed["Total"] = total_surveyed.sum()
            summary_rows.append({"event": event, "metric": f"Total surveyed, {moved_col}", **total_surveyed.to_dict()})
            
            # Count moved particles for this metric
            moved_counts = eligible_particles.groupby("gravelometer_size")[moved_col].sum().reindex(sizes, fill_value=0)
            moved_counts["Total"] = moved_counts.sum()
            summary_rows.append({"event": event, "metric": f"Rocks moved, {moved_col}", **moved_counts.to_dict()})

    summary_df = pd.DataFrame(summary_rows)
    summary_df = summary_df[["event", "metric"] + sizes + ["Total"]]
    return summary_df

In [16]:
summary = summarize_particle_movement(df, moved_cols=["moved_min", "moved_euc", "moved_max"])
print(summary)

   event                     metric  11.0  16.0  22.6  32.0  45.0  64.0  90.0  \
0     M1  Total surveyed, moved_min     0     0     0     0     0     0     0   
1     M1     Rocks moved, moved_min     0     0     0     0     0     0     0   
2     M1  Total surveyed, moved_euc     0     0     0     0     0     1     1   
3     M1     Rocks moved, moved_euc     0     0     0     0     0     0     0   
4     M1  Total surveyed, moved_max     0     0     0     1     3     5     2   
5     M1     Rocks moved, moved_max     0     0     0     0     0     0     0   
6     M2  Total surveyed, moved_min     0     0     0     0     1     1     0   
7     M2     Rocks moved, moved_min     0     0     0     0     1     0     0   
8     M2  Total surveyed, moved_euc     0     1     1     3     4     3     1   
9     M2     Rocks moved, moved_euc     0     1     1     3     4     2     1   
10    M2  Total surveyed, moved_max     0     2     5    12    11    11     3   
11    M2     Rocks moved, mo

Export to csv

In [17]:
df.to_csv('particle_movement_detailed.csv', index=False)
summary.to_csv('particle_movement_summary.csv', index=False)