In [1]:
#cleaner.py
import pandas as pd
import numpy as np
import math

# Load the movement log
# Ensure your CSV has columns: frame, id, x, y
input_csv = 'movement_log.csv'
output_csv = 'movement_log_final1.csv'

# Parameters
merge_distance_threshold = 20      # pixels: merge based on overall-centroid proximity
outlier_distance_threshold = 50   # pixels: max allowed frame-to-frame jump

# Read data
orig_df = pd.read_csv(input_csv)
df = orig_df.copy()

# Phase 1: Reassign unknowns → nearest known centroid
df['old_id'] = df['id']
def compute_centroids(data):
    known = data[data['old_id'] != 'unknown']
    return known.groupby('old_id')[['x','y']].mean().to_dict('index')
centroids = compute_centroids(df)

total_unknown = (df['old_id'] == 'unknown').sum()
def assign_nearest(row, cents):
    if row['old_id'] == 'unknown' and cents:
        best, min_d = None, float('inf')
        for k, coord in cents.items():
            d = math.hypot(row['x']-coord['x'], row['y']-coord['y'])
            if d < min_d:
                best, min_d = k, d
        return best if best is not None else 'unknown'
    return row['old_id']

df['id_assigned'] = df.apply(lambda r: assign_nearest(r, centroids), axis=1)
df = df[df['id_assigned'] != 'unknown'].copy()

assigned_unknown = total_unknown - ((orig_df['id'] == 'unknown') & (df['id_assigned'] == 'unknown')).sum()
print(f"Phase 1: {total_unknown} unknown → {assigned_unknown} reassigned, {total_unknown - assigned_unknown} removed.")

# Phase 2: Continuity correction
all_frames = sorted(orig_df['frame'].unique())
x_min, x_max = df['x'].min(), df['x'].max()
y_min, y_max = df['y'].min(), df['y'].max()
continuity_rows = []
for pid, group in df.groupby('id_assigned'):
    positions = dict(zip(group['frame'], zip(group['x'], group['y'])))
    for f in all_frames:
        if f not in positions:
            prev_frames = [pf for pf in positions if pf < f]
            next_frames = [nf for nf in positions if nf > f]
            if prev_frames and next_frames:
                pf = max(prev_frames)
                nf = min(next_frames)
                px, py = positions[pf]
                nx, ny = positions[nf]
                if x_min <= px <= x_max and y_min <= py <= y_max and x_min <= nx <= x_max and y_min <= ny <= y_max:
                    continuity_rows.append({
                        'frame': f,
                        'old_id': pid,
                        'id_assigned': pid,
                        'x': px,
                        'y': py
                    })
inserted = len(continuity_rows)
if continuity_rows:
    df = pd.concat([df, pd.DataFrame(continuity_rows)], ignore_index=True)
print(f"Phase 2: Inserted {inserted} continuity rows.")

# Phase 3: Merge close ID clusters based on overall centroids
centroids2 = df.groupby('id_assigned')[['x','y']].mean().to_dict('index')
ids = list(centroids2.keys())
merge_map = {}
for i, id1 in enumerate(ids):
    for id2 in ids[i+1:]:
        d = math.hypot(centroids2[id1]['x'] - centroids2[id2]['x'], 
                       centroids2[id1]['y'] - centroids2[id2]['y'])
        if d < merge_distance_threshold:
            merge_map[id2] = id1

df['id_merged'] = df['id_assigned'].apply(lambda x: merge_map.get(x, x))
if merge_map:
    print("Phase 3: Merged the following IDs:")
    for src, dst in merge_map.items():
        print(f"  - ID {src} → ID {dst}")
else:
    print("Phase 3: No IDs merged.")

# Phase 4: Remove outliers by frame-to-frame jump
cleaned = []
removed_outliers = 0
for pid, group in df.groupby('id_merged'):
    grp = group.sort_values('frame')
    keep = [True] * len(grp)
    coords = list(zip(grp['x'], grp['y']))
    for i in range(1, len(coords)):
        if math.hypot(coords[i][0] - coords[i-1][0], coords[i][1] - coords[i-1][1]) > outlier_distance_threshold:
            keep[i] = False
            removed_outliers += 1
    cleaned.append(grp[keep])
clean_df = pd.concat(cleaned)
print(f"Phase 4: Removed {removed_outliers} outlier jumps.")

# Final output to (frame, old_id, id, x, y)
def try_int(v):
    try: return int(v)
    except: return v

final_df = clean_df.copy()
final_df['old_id'] = final_df['old_id'].apply(try_int)
final_df['id']     = final_df['id_merged'].apply(try_int)

final_df = ( final_df
    .loc[:, ['frame','old_id','id','x','y']]
    .sort_values(['frame','id'])
    .reset_index(drop=True)
)

final_df.to_csv(output_csv, index=False)
print(f"Final cleaned log saved to '{output_csv}'")


Phase 1: 0 unknown → 0 reassigned, 0 removed.
Phase 2: Inserted 0 continuity rows.
Phase 3: No IDs merged.


ValueError: No objects to concatenate

In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import matplotlib.cm as cm

# Parameters
video_path = 'video/Ev1_2mp4.mp4'
csv_path = 'movement_log_final.csv'  # processed log with x,y coordinates

# Output filenames
output_frame_image = 'heatmap_overlay_frame.png'
output_white_image = 'heatmap_overlay_white.png'

# ✅ Increase these numbers to make grid smaller (finer)
grid_rows = 20  # number of vertical divisions
grid_cols = 20  # number of horizontal divisions

alpha = 0.6    # weight for background
beta = 0.4     # weight for heatmap


def create_heatmap_overlay():
    # Read first frame to get dimensions
    cap = cv2.VideoCapture(video_path)
    success, frame = cap.read()
    cap.release()
    if not success:
        raise IOError(f"Cannot read frame 1 from {video_path}")
    height, width = frame.shape[:2]

    # Grid cell dimensions
    zone_h = height // grid_rows
    zone_w = width // grid_cols

    # Read movement log and count visits per cell
    df = pd.read_csv(csv_path)
    counts = np.zeros((grid_rows, grid_cols), dtype=int)
    for _, row in df.iterrows():
        x, y = int(row['x']), int(row['y'])
        r = min(y // zone_h, grid_rows - 1)
        c = min(x // zone_w, grid_cols - 1)
        counts[r, c] += 1

    # Normalize counts to [0, 255] for heatmap intensity
    norm_counts = counts.astype(float)
    max_count = norm_counts.max()
    if max_count > 0:
        norm_counts = (norm_counts / max_count) * 255
    heatmap = norm_counts.astype(np.uint8)
    heatmap_color = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    heatmap_full = cv2.resize(heatmap_color, (width, height), interpolation=cv2.INTER_NEAREST)

    # Overlay heatmap on frame and white background
    overlay_frame = cv2.addWeighted(frame, alpha, heatmap_full, beta, 0)
    background_white = np.full((height, width, 3), 255, dtype=np.uint8)
    overlay_white = cv2.addWeighted(background_white, alpha, heatmap_full, beta, 0)

    # Draw grid + zone numbers
    for overlay in (overlay_frame, overlay_white):
        # Draw grid lines
        for i in range(grid_rows + 1):
            cv2.line(overlay, (0, i * zone_h), (width, i * zone_h), (255, 255, 255), 1)
        for j in range(grid_cols + 1):
            cv2.line(overlay, (j * zone_w, 0), (j * zone_w, height), (255, 255, 255), 1)

        # Dynamically set font scale based on cell size
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = max(min(zone_w, zone_h) / 60.0, 0.3)  # ensures readable but not too large
        thickness = 1 if min(zone_w, zone_h) > 25 else 1

        # Draw zone numbers at center of each cell
        for r in range(grid_rows):
            for c in range(grid_cols):
                zone_id = r * grid_cols + c + 1
                text = str(zone_id)
                text_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
                tx = c * zone_w + (zone_w - text_size[0]) // 2
                ty = r * zone_h + (zone_h + text_size[1]) // 2
                color = (0, 0, 0) if overlay is overlay_white else (255, 255, 255)
                cv2.putText(overlay, text, (tx, ty), font, font_scale, color, thickness, cv2.LINE_AA)

    # Save overlays
    cv2.imwrite(output_frame_image, overlay_frame)
    cv2.imwrite(output_white_image, overlay_white)

    # Display with colorbar
    norm = Normalize(vmin=0, vmax=max_count)
    mapper = cm.ScalarMappable(norm=norm, cmap='jet')
    mapper.set_array([])
    for img, title in [(overlay_frame, 'Heatmap on Frame Background'),
                       (overlay_white, 'Heatmap on White Background')]:
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.figure(figsize=(10, 6))
        plt.imshow(img_rgb)
        plt.title(title)
        plt.axis('off')
        plt.colorbar(mapper, fraction=0.046, pad=0.04).set_label('Visit Frequency')
    plt.show()


def analyze_stalls_and_table():
    stall_zones = {
        'Stall 1': [51, 52, 61, 62, 71, 72],
        'Stall 2': [53, 63, 73],
        'Stall 3': [54, 64, 74],
        'Stall 4': [56, 57, 66, 67]
    }
    staff_zones = {51, 52, 53, 54, 56, 61, 62}

    df = pd.read_csv(csv_path)
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    success, frame = cap.read()
    cap.release()

    height, width = frame.shape[:2]
    zone_h = height // grid_rows
    zone_w = width // grid_cols

    # Assign zone ID for each log entry
    df['zone'] = df.apply(
        lambda row: (min(int(row['y']) // zone_h, grid_rows - 1)) * grid_cols +
                    min(int(row['x']) // zone_w, grid_cols - 1) + 1,
        axis=1
    )
    df['role'] = df['zone'].apply(lambda z: 'staff' if z in staff_zones else 'customer')

    # Table for each stall and role
    table = []
    for stall, zones in stall_zones.items():
        d = df[df['zone'].isin(zones)]
        for role in ['customer', 'staff']:
            grp = d[d['role'] == role]
            total = grp['id'].nunique()
            stops = grp.groupby('id').size()
            stops_cnt = (stops >= fps * 20).sum()
            table.append({'stall': stall, 'role': role, 'total': total, 'stops20s': stops_cnt})
    table_df = pd.DataFrame(table)
    print(table_df)

    # Bar chart
    stalls = list(stall_zones.keys())
    x = np.arange(len(stalls))
    width = 0.35
    fig, ax = plt.subplots(figsize=(8, 5))
    for i, role in enumerate(['customer', 'staff']):
        pos = x + (i * 2 - 1) * width / 2
        dfr = table_df[table_df['role'] == role]
        totals = dfr['total'].values
        stops = dfr['stops20s'].values
        ax.bar(pos, totals, width, label=f'{role} total',
               color=('skyblue' if role == 'customer' else 'lightcoral'))
        ax.bar(pos, stops, width, label=f'{role} ≥20s',
               color=('blue' if role == 'customer' else 'red'))
    ax.set_xticks(x)
    ax.set_xticklabels(stalls)
    ax.set_xlabel('Stall')
    ax.set_ylabel('Unique ID Count')
    ax.set_title('Visits per Stall by Role with ≥20s Overlay')
    ax.legend()
    plt.show()


if __name__ == '__main__':
    create_heatmap_overlay()
    analyze_stalls_and_table()
