## 1. Import Required Libraries

In [6]:
import pandas as pd
import os
from collections import Counter
import whisker_counter as wc

## 2. Configuration Settings

**Edit these settings to match your data:**

In [7]:
# ===== CONFIGURATION =====

# Path to your CSV file containing frame and whisker line data
CSV_PATH = "C:\\Users\\wanglab\\Desktop\\Mel\\shortened\\lines_output.csv"

# Threshold settings
MIN_WHISKERS = 5  # Frames with LESS than this number will be filtered
MAX_WHISKERS = 5  # Frames with MORE than this number will be filtered

# Output directory for saved CSV files
OUTPUT_DIR = "./results"

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Configuration loaded successfully!")
print(f"CSV Path: {CSV_PATH}")
print(f"Output Directory: {OUTPUT_DIR}")
print(f"Filtering for frames with < {MIN_WHISKERS} or > {MAX_WHISKERS} whiskers")

Configuration loaded successfully!
CSV Path: C:\Users\wanglab\Desktop\Mel\shortened\lines_output.csv
Output Directory: ./results
Filtering for frames with < 5 or > 5 whiskers


## 3. Load and Inspect Data

In [8]:
# Check if file exists
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"CSV file not found: {CSV_PATH}")

# Load the CSV file
df = pd.read_csv(CSV_PATH)

print("Data loaded successfully!")
print(f"Total rows: {len(df)}")
print(f"\nColumn names: {list(df.columns)}")
print(f"\nFirst few rows:")
df.head()

Data loaded successfully!
Total rows: 604342

Column names: ['Frame', 'X', 'Y']

First few rows:


Unnamed: 0,Frame,X,Y
0,0,"300.0,251.0,210.9,175.9,140.9","212.9,217.2,225.8,239.8,260.8"
1,0,"291.0,220.0,193.0,157.0,102.0,51.2","200.2,196.5,196.6,199.6,209.0,225.5"
2,0,"443.1,519.1,573.8,597.9,627.8,654.8,669.1","210.5,221.6,236.5,246.1,261.3,281.3,293.9"
3,0,"455.0,539.0,600.0,629.0,653.2,678.9,701.1","197.0,200.2,207.0,212.8,219.5,229.1,239.9"
4,0,"283.0,198.0,134.0,79.0,25.0","183.9,167.2,157.3,152.2,152.2"


## 4. Analyze Whisker Distribution

Get an overview of whisker counts across all frames:

In [9]:
# Count whiskers per frame
whisker_counts = df.groupby('Frame').size().to_dict()

# Get distribution
distribution = Counter(whisker_counts.values())

# Create summary DataFrame
summary_data = []
total_frames = len(whisker_counts)

for whisker_count in sorted(distribution.keys()):
    frame_count = distribution[whisker_count]
    percentage = (frame_count / total_frames * 100)
    summary_data.append({
        'Whisker_Count': whisker_count,
        'Number_of_Frames': frame_count,
        'Percentage': f"{percentage:.2f}%"
    })

summary_df = pd.DataFrame(summary_data)

print("\n" + "="*60)
print("WHISKER DISTRIBUTION SUMMARY")
print("="*60)
print(f"Total frames analyzed: {total_frames}\n")
print(summary_df.to_string(index=False))
print("="*60)


WHISKER DISTRIBUTION SUMMARY
Total frames analyzed: 117750

 Whisker_Count  Number_of_Frames Percentage
             2               530      0.45%
             3               603      0.51%
             4              1037      0.88%
             5             98715     83.83%
             6             14564     12.37%
             7              2094      1.78%
             8               169      0.14%
             9                28      0.02%
            10                 6      0.01%
            11                 4      0.00%


## 5. Filter Frames by Whisker Count

### 5A. Frames with LESS than threshold

In [10]:
# Get frames with fewer whiskers than MIN_WHISKERS
frames_less_than = [frame for frame, count in whisker_counts.items() if count < MIN_WHISKERS]
frames_less_than.sort()

# Get frames with more whiskers than MAX_WHISKERS
frames_more_than = [frame for frame, count in whisker_counts.items() if count > MAX_WHISKERS]
frames_more_than.sort()

print(f"\nFrames with LESS than {MIN_WHISKERS} whiskers:")
print(f"Total: {len(frames_less_than)} frames")
print(f"Percentage: {(len(frames_less_than)/total_frames*100):.2f}%")

print(f"\nFrames with MORE than {MAX_WHISKERS} whiskers:")
print(f"Total: {len(frames_more_than)} frames")
print(f"Percentage: {(len(frames_more_than)/total_frames*100):.2f}%")

# Combine both into a single DataFrame
all_filtered_frames = []

for frame in frames_less_than:
    all_filtered_frames.append({
        'Frame': frame, 
        'Whisker_Count': whisker_counts[frame],
        'Filter_Type': 'Less_Than_5'
    })

for frame in frames_more_than:
    all_filtered_frames.append({
        'Frame': frame, 
        'Whisker_Count': whisker_counts[frame],
        'Filter_Type': 'More_Than_5'
    })

if len(all_filtered_frames) > 0:
    frames_filtered_df = pd.DataFrame(all_filtered_frames)
    frames_filtered_df = frames_filtered_df.sort_values('Frame').reset_index(drop=True)
    
    print(f"\n{'='*60}")
    print(f"COMBINED FILTERED FRAMES")
    print(f"{'='*60}")
    print(f"Total filtered frames: {len(frames_filtered_df)}")
    print(f"\nSample of filtered frames:")
    display(frames_filtered_df.head(20))
    
    # Save to CSV
    output_path = os.path.join(OUTPUT_DIR, "frames_not_equal_to_5_whiskers.csv")
    frames_filtered_df.to_csv(output_path, index=False)
    print(f"\n✓ Saved {len(frames_filtered_df)} frames to: {output_path}")
    print(f"{'='*60}")
else:
    print("\nNo frames found matching the filter criteria.")
    frames_filtered_df = pd.DataFrame()


Frames with LESS than 5 whiskers:
Total: 2170 frames
Percentage: 1.84%

Frames with MORE than 5 whiskers:
Total: 16865 frames
Percentage: 14.32%

COMBINED FILTERED FRAMES
Total filtered frames: 19035

Sample of filtered frames:


Unnamed: 0,Frame,Whisker_Count,Filter_Type
0,491,6,More_Than_5
1,492,6,More_Than_5
2,493,6,More_Than_5
3,494,6,More_Than_5
4,495,6,More_Than_5
5,505,6,More_Than_5
6,506,6,More_Than_5
7,507,6,More_Than_5
8,508,6,More_Than_5
9,509,6,More_Than_5



✓ Saved 19035 frames to: ./results\frames_not_equal_to_5_whiskers.csv
