In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import os

import whisker_smart_merger as wsm

plt.rcParams['figure.figsize'] = (14, 6)

In [2]:
INPUT_CSV = "C:\\Users\\wanglab\\Desktop\\Mel\\shortened\\lines_output.csv"
OUTPUT_CSV = "C:\\Users\\wanglab\\Desktop\\Mel\\shortened\\lines_output_smart_merged.csv"

TARGET_WHISKER_COUNT = 5
DISTANCE_THRESHOLD = 30.0
MIN_MERGE_SCORE = 0

In [3]:
df_original = pd.read_csv(INPUT_CSV)
original_counts = df_original.groupby('Frame').size()
original_distribution = Counter(original_counts)

print("Original Data Whisker Distribution:")
print("="*40)
for count in sorted(original_distribution.keys()):
    num_frames = original_distribution[count]
    percentage = (num_frames / len(original_counts) * 100)
    print(f"{count} whiskers: {num_frames:5d} frames ({percentage:5.2f}%)")
print("="*40)

frames_over_target = sum(1 for c in original_counts if c > TARGET_WHISKER_COUNT)
print(f"\nFrames with >{TARGET_WHISKER_COUNT} whiskers: {frames_over_target} ({frames_over_target/len(original_counts)*100:.2f}%)")

Original Data Whisker Distribution:
2 whiskers:   530 frames ( 0.45%)
3 whiskers:   603 frames ( 0.51%)
4 whiskers:  1037 frames ( 0.88%)
5 whiskers: 98715 frames (83.83%)
6 whiskers: 14564 frames (12.37%)
7 whiskers:  2094 frames ( 1.78%)
8 whiskers:   169 frames ( 0.14%)
9 whiskers:    28 frames ( 0.02%)
10 whiskers:     6 frames ( 0.01%)
11 whiskers:     4 frames ( 0.00%)

Frames with >5 whiskers: 16865 (14.32%)


In [4]:
result_df, statistics = wsm.smart_merge_dataset(
    INPUT_CSV,
    output_path=OUTPUT_CSV,
    target_whisker_count=TARGET_WHISKER_COUNT,
    distance_threshold=DISTANCE_THRESHOLD,
    min_merge_score=MIN_MERGE_SCORE,
    verbose=True
)

print("\nSmart merging completed!")

Loading: C:\Users\wanglab\Desktop\Mel\shortened\lines_output.csv
Total rows: 604342
Total frames: 117750

Parameters:
  Distance threshold: 30.0px
  Min merge score: 0
  Target whisker count: 5
  Identified 4 fixture points (protected bases)
  Identified 4 fixture points (protected bases)
  Identified 4 fixture points (protected bases)
  Identified 4 fixture points (protected bases)
  Identified 3 fixture points (protected bases)
  Identified 3 fixture points (protected bases)
  Identified 4 fixture points (protected bases)
  Identified 3 fixture points (protected bases)
  Identified 3 fixture points (protected bases)
  Identified 2 fixture points (protected bases)
  Identified 3 fixture points (protected bases)
  Identified 2 fixture points (protected bases)
  Frame 511: Merging whiskers 2 and 4
    Score: 120.0, Reason: collinear, moderate_endpoints, natural_connection, similar_length, very_straight
    Distance: 29.7px, Type: base1-tip2
Frame 511: 7 → 6 whiskers
  Merged: 1, Noise f

In [None]:
merged_counts = result_df.groupby('Frame').size()
merged_distribution = Counter(merged_counts)

print("Merged Data Whisker Distribution:")
print("="*40)
for count in sorted(merged_distribution.keys()):
    num_frames = merged_distribution[count]
    percentage = (num_frames / len(merged_counts) * 100)
    print(f"{count} whiskers: {num_frames:5d} frames ({percentage:5.2f}%)")
print("="*40)

frames_over_target_merged = sum(1 for c in merged_counts if c > TARGET_WHISKER_COUNT)
print(f"\nFrames with >{TARGET_WHISKER_COUNT} whiskers: {frames_over_target_merged} ({frames_over_target_merged/len(merged_counts)*100:.2f}%)")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

counts_original = sorted(original_distribution.keys())
freqs_original = [original_distribution[c] for c in counts_original]

axes[0].bar(counts_original, freqs_original, color='steelblue', alpha=0.7)
axes[0].axvline(TARGET_WHISKER_COUNT, color='red', linestyle='--', linewidth=2, label=f'Target ({TARGET_WHISKER_COUNT})')
axes[0].set_xlabel('Number of Whiskers')
axes[0].set_ylabel('Number of Frames')
axes[0].set_title('Original Distribution')
axes[0].legend()
axes[0].grid(axis='y', alpha=0.3)

counts_merged = sorted(merged_distribution.keys())
freqs_merged = [merged_distribution[c] for c in counts_merged]

axes[1].bar(counts_merged, freqs_merged, color='green', alpha=0.7)
axes[1].axvline(TARGET_WHISKER_COUNT, color='red', linestyle='--', linewidth=2, label=f'Target ({TARGET_WHISKER_COUNT})')
axes[1].set_xlabel('Number of Whiskers')
axes[1].set_ylabel('Number of Frames')
axes[1].set_title('After Smart Merging')
axes[1].legend()
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nImprovement: {frames_over_target - frames_over_target_merged} fewer frames with >{TARGET_WHISKER_COUNT} whiskers")

In [None]:
test_frame = df_original[df_original['Frame'] == df_original['Frame'].unique()[0]].copy()
frame_num = test_frame['Frame'].iloc[0]

print(f"Testing frame {frame_num} with {len(test_frame)} whiskers\n")

whiskers = []
for idx, row in test_frame.iterrows():
    x_coords = wsm.parse_coordinates(row['X'])
    y_coords = wsm.parse_coordinates(row['Y'])
    
    if len(x_coords) < 2:
        continue
    
    base, tip = wsm.get_whisker_endpoints(x_coords, y_coords)
    angle = wsm.calculate_whisker_angle(x_coords, y_coords)
    length = wsm.calculate_whisker_length(x_coords, y_coords)
    
    whiskers.append({
        'index': idx,
        'x_coords': x_coords,
        'y_coords': y_coords,
        'base': base,
        'tip': tip,
        'angle': angle,
        'length': length,
        'row': row
    })

print("Whisker properties:")
for i, w in enumerate(whiskers):
    print(f"Whisker {i}: length={w['length']:.1f}px, angle={np.degrees(w['angle']) if w['angle'] else 0:.1f}°")
    is_noise = wsm.is_noise_whisker(w, whiskers)
    if is_noise:
        print(f"  ⚠️  LIKELY NOISE (too short)")

print(f"\nFinding merge candidates (distance threshold: {DISTANCE_THRESHOLD}px)...\n")

candidates = wsm.find_best_merge_candidates(whiskers, DISTANCE_THRESHOLD, min_score=MIN_MERGE_SCORE)

if candidates:
    print(f"Found {len(candidates)} merge candidates:\n")
    for rank, (i, j, score_info) in enumerate(candidates[:10], 1):
        print(f"Rank {rank}: Whiskers {i} ↔ {j}")
        print(f"  Score: {score_info['score']:.1f}")
        print(f"  Reason: {score_info['reason']}")
        print(f"  Distance: {score_info['distance']:.1f}px")
        print(f"  Merge type: {score_info['merge_type']}")
        print()
else:
    print("No merge candidates found with current threshold and score requirements")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

for i, w in enumerate(whiskers):
    axes[0].plot(w['x_coords'], w['y_coords'], 'o-', linewidth=2, markersize=3, label=f'W{i}')
    axes[0].plot(w['base'][0], w['base'][1], 'go', markersize=8)
    axes[0].plot(w['tip'][0], w['tip'][1], 'ro', markersize=8)

axes[0].set_xlabel('X')
axes[0].set_ylabel('Y')
axes[0].set_title(f'Original Frame {frame_num} ({len(whiskers)} whiskers)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[0].invert_yaxis()

merged_frame = result_df[result_df['Frame'] == frame_num].copy()
for i, row in enumerate(merged_frame.iterrows()):
    idx, row_data = row
    x = wsm.parse_coordinates(row_data['X'])
    y = wsm.parse_coordinates(row_data['Y'])
    if x and y:
        axes[1].plot(x, y, 'o-', linewidth=2, markersize=3, label=f'W{i}')
        axes[1].plot(x[0], y[0], 'go', markersize=8)
        axes[1].plot(x[-1], y[-1], 'ro', markersize=8)

axes[1].set_xlabel('X')
axes[1].set_ylabel('Y')
axes[1].set_title(f'After Smart Merging ({len(merged_frame)} whiskers)')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].invert_yaxis()

plt.tight_layout()
plt.show()

In [None]:
print("PARAMETER TUNING GUIDE:")
print("="*60)
print("\n1. DISTANCE_THRESHOLD (currently: {:.1f}px)".format(DISTANCE_THRESHOLD))
print("   - Larger value: More liberal, will merge whiskers further apart")
print("   - Smaller value: More conservative, only merge very close whiskers")
print("   - Recommended: 20-50px for split whiskers with gaps")
print("\n2. MIN_MERGE_SCORE (currently: {})".format(MIN_MERGE_SCORE))
print("   - Higher value: More selective, only merge high-confidence pairs")
print("   - Lower/negative: More permissive, allow questionable merges")
print("   - Recommended: 0-30 for balanced approach")
print("   - Try -50 if being too conservative, +50 if merging wrong pairs")
print("\nScoring system breakdown:")
print("  +50: Collinear (same direction)")
print("  +30: Very close endpoints (<50% threshold)")
print("  +20: Natural end-to-end connection")
print("  +20: Similar lengths")
print("  -40: Base-to-base or tip-to-tip (U-shape penalty)")
print("  -100: Not collinear (different directions)")
print("\n3. Noise filtering:")
print("   - Automatically removes whiskers <30% of median length")
print("   - Modify min_length_ratio in is_noise_whisker() to adjust")
print("="*60)