In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.patches import Patch
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [None]:
df = pd.read_parquet("landmarks_2024-11-06.parquet")
df.head()

In [None]:
df[(df.landmark_type == "pose") & (df.landmark_index == 16) & (df.frame == 22937)]

In [None]:
df[(df.landmark_type == "pose") & (df.landmark_index == 16) & (df.frame == 22940)]

In [None]:
df[(df.landmark_type == "pose") & (df.landmark_index == 16) & (df.frame == 22915)]

In [None]:
df[(df.landmark_type == "pose") & (df.landmark_index == 16) & (df.frame == 22902)]

In [None]:
# Dropping the visibility column
df = df.drop(columns=['visibility'])

# Melt the dataframe to restructure it for easier feature naming
df_melted = df.melt(
    id_vars=["frame", "landmark_type", "landmark_index"],
    value_vars=["x", "y", "z"],
    var_name="coordinate",
    value_name="value"
)

# Create a feature name column based on landmark_type, landmark_index, and coordinate
df_melted["feature"] = (
    df_melted["landmark_type"]
    + "-"
    + df_melted["landmark_index"].astype(str)
    + "_"
    + df_melted["coordinate"]
)

# Pivot the data so that each feature becomes a column
df_transformed = df_melted.pivot(index="frame", columns="feature", values="value")

df_transformed.columns.name = None
df_transformed.head()

In [None]:
def rule_based_classification(row):
    if row["pose-15_y"] > 0.9 and row["pose-16_y"] > 0.9:
        return 0
    else:
        return 1

df_transformed["class_name"] = df_transformed.apply(rule_based_classification, axis=1)
df_transformed["class_name"].value_counts()

In [None]:
# Group consecutive frames by their class
df_transformed['group'] = (df_transformed['class_name'] != df_transformed['class_name'].shift()).cumsum()
grouped = df_transformed.groupby('group')

# Extract start and end frame indices for each class
classes = []
frame_ranges = []
for _, group in grouped:
    classes.append(group['class_name'].iloc[0])
    frame_ranges.append((group.index.min(), group.index.max()))

plt.figure(figsize=(15, 4))

for (start, end), class_name in zip(frame_ranges, classes):
    color = 'orange' if class_name == 0 else 'blue'
    plt.barh([0], width=end-start, left=start, color=color, height=0.8)

legend_elements = [
    Patch(facecolor='blue', label='Sign Language'),
    Patch(facecolor='orange', label='Neutral Pose')
]
plt.legend(handles=legend_elements, loc='upper right')

# Add labels and title
plt.title("Rule Based Classification Timeline")
plt.xlabel("Frame (Time)")
plt.yticks([], [])
plt.xticks(np.linspace(0, max(df_transformed.index), 10))
plt.tight_layout()
plt.show()

In [None]:
# Assuming the DataFrame `timestamps_df` is already loaded
timestamps_df = pd.read_json("timestamps.json")

# Ensure the DataFrame is sorted by frame
timestamps_df = timestamps_df.sort_values(by="frame").reset_index(drop=True)

# Prepare the plot
plt.figure(figsize=(15, 4))

# Iterate over each row and draw the bars
for i in range(len(timestamps_df) - 1):
    start_frame = timestamps_df.loc[i, 'frame']
    end_frame = timestamps_df.loc[i + 1, 'frame']
    color = 'blue' if timestamps_df.loc[i, 'signLanguage'] else 'orange'
    plt.barh([0], width=end_frame - start_frame, left=start_frame, color=color, height=0.8)

# Add legend
legend_elements = [
    Patch(facecolor='blue', label='Sign Language'),
    Patch(facecolor='orange', label='Neutral Pose')
]
plt.legend(handles=legend_elements, loc='upper right')

# Customize the plot
plt.title("Manual Marker Timeline")
plt.xlabel("Frame (Time)")
plt.yticks([], [])  # Remove y-axis ticks for simplicity
plt.tight_layout()
plt.show()

In [None]:
# Load and process the class_name data (df_transformed)
df_transformed['group'] = (df_transformed['class_name'] != df_transformed['class_name'].shift()).cumsum()
grouped_classes = df_transformed.groupby('group')

classes = []
frame_ranges = []
for _, group in grouped_classes:
    classes.append(group['class_name'].iloc[0])
    frame_ranges.append((group.index.min(), group.index.max()))

# Load and process the sign language data (timestamps_df)
timestamps_df = pd.read_json("timestamps.json")
timestamps_df = timestamps_df.sort_values(by="frame").reset_index(drop=True)

# Prepare the plot
plt.figure(figsize=(15, 6))

# Plot the classes at height 1
for (start, end), class_name in zip(frame_ranges, classes):
    color = 'orange' if class_name == 0 else 'blue'
    plt.barh([1], width=end - start, left=start, color=color, height=0.8)

# Plot the sign language timeline at height 0
for i in range(len(timestamps_df) - 1):
    start_frame = timestamps_df.loc[i, 'frame']
    end_frame = timestamps_df.loc[i + 1, 'frame']
    color = 'blue' if timestamps_df.loc[i, 'signLanguage'] else 'orange'
    plt.barh([0], width=end_frame - start_frame, left=start_frame, color=color, height=0.8)

# Add legend
legend_elements = [
    Patch(facecolor='blue', label='Sign Language'),
    Patch(facecolor='orange', label='Neutral Pose')
]
plt.legend(handles=legend_elements, loc='upper right')

# Customize the plot
plt.title("Comparison of Rule Based Classification and Manual Marker Timelines")
plt.xlabel("Frame (Time)")
plt.yticks([0, 1], labels=["Manual Markers", "Rule Based Classification"])
plt.tight_layout()
plt.show()

In [None]:
# Create frame-level labels for both rule-based and manual timelines
total_frames = max(df_transformed.index.max(), timestamps_df['frame'].max()) + 1
rule_based_labels = np.zeros(total_frames, dtype=int)
manual_labels = np.zeros(total_frames, dtype=int)

# Map rule-based classification labels
for (start, end), class_name in zip(frame_ranges, classes):
    rule_based_labels[start:end + 1] = class_name

# Map manual timeline labels
for i in range(len(timestamps_df) - 1):
    start_frame = timestamps_df.loc[i, 'frame']
    end_frame = timestamps_df.loc[i + 1, 'frame']
    manual_labels[start_frame:end_frame] = timestamps_df.loc[i, 'signLanguage']

# Calculate metrics
precision = precision_score(manual_labels[144:], rule_based_labels[144:])
recall = recall_score(manual_labels[144:], rule_based_labels[144:])
f1 = f1_score(manual_labels[144:], rule_based_labels[144:])
accuracy = accuracy_score(manual_labels[144:], rule_based_labels[144:])

# Present the results
metrics = {
    "Precision": precision,
    "Recall": recall,
    "F1 Score": f1,
    "Accuracy": accuracy
}

for metric in metrics.keys():
    print(f"{metric}: {metrics[metric]}")

In [None]:
# Load the subtitle data
subtitle_df = pd.read_csv("whisper_subtitle.csv")

# Convert start and end times to frames
framerate = 25
subtitle_df['Start_Frame'] = (subtitle_df['Start'] * framerate).astype(int)
subtitle_df['End_Frame'] = (subtitle_df['End'] * framerate).astype(int)

# Existing classes and timeline data processing here...

# Prepare the plot
plt.figure(figsize=(15, 8))

# Plot the classes at height 1
for (start, end), class_name in zip(frame_ranges, classes):
    color = 'orange' if class_name == 0 else 'blue'
    plt.barh([1], width=end - start, left=start, color=color, height=0.8)

# Plot the sign language timeline at height 0
for i in range(len(timestamps_df) - 1):
    start_frame = timestamps_df.loc[i, 'frame']
    end_frame = timestamps_df.loc[i + 1, 'frame']
    color = 'blue' if timestamps_df.loc[i, 'signLanguage'] else 'orange'
    plt.barh([0], width=end_frame - start_frame, left=start_frame, color=color, height=0.8)

# Plot the sentences at height 2
for _, row in subtitle_df.iterrows():
    plt.barh([2], width=row['End_Frame'] - row['Start_Frame'], 
             left=row['Start_Frame'], color='green', height=0.8)

# Add legend
legend_elements = [
    Patch(facecolor='blue', label='Sign Language'),
    Patch(facecolor='orange', label='Neutral Pose'),
    Patch(facecolor='green', label='Sentences')
]
plt.legend(handles=legend_elements, loc='upper right')

# Customize the plot
plt.title("Comparison of Rule Based Classification, Manual Marker Timelines, and Sentences")
plt.xlabel("Frame (Time)")
plt.yticks([0, 1, 2], labels=["Manual Markers", "Rule Based Classification", "Sentences"])
plt.tight_layout()
plt.show()