In [1]:
import pandas as pd
import numpy as np
import os
import re

In [6]:
def categorize_sharpness(sharpness):
    if sharpness >= 85:
        return 'Sharp'
    elif 70 <= sharpness < 85:
        return 'Medium'
    else:
        return 'Blunt'
    
activity_labels = {0: "Idle", 1: "Walking", 2: "Steeling", 3: "Reaching", 4: "Cutting", 5: "Slicing", 6: "Pulling", 7: "Placing", 8: "Dropping"}

def extract_and_categorize_sharpness(filename):
    # Extract sharpness value using regex (assumes sharpness is the number before the last dash)
    sharpness_value = int(re.search(r'-([0-9]+)-', filename).group(1))
    return categorize_sharpness(sharpness_value)

def split_to_chunk(df, frame_size=60, step=5, knife_label="None"):
    # Check and remove any unnecessary columns
    df = df.drop(columns=['Unnamed: 0', 'Marker', 'Frame_acce'], errors='ignore')

    if 'Label' not in df.columns:
        raise ValueError("DataFrame must contain 'Label' column")

    # Split into chunks based on changes in label value
    chunks = []
    current_chunk = [df.iloc[0]]

    for i in range(1, len(df)):
        if df['Label'].iloc[i] == df['Label'].iloc[i - 1]:
            current_chunk.append(df.iloc[i])
        else:
            chunks.append(pd.DataFrame(current_chunk))
            current_chunk = [df.iloc[i]]
            # display(pd.DataFrame(current_chunk))
    chunks.append(pd.DataFrame(current_chunk))  # Append the last chunk

    # print("Total chunks:", len(chunks))

    # samples, labels = [], []
    output_dir = "chunk_output"
    
    # Iterate through each chunk and create samples
    for chunk_idx, chunk in enumerate(chunks):
        if len(chunk) >= frame_size:
            for start in range(0, len(chunk) - frame_size + 1, step):
                sample = chunk[start:start + frame_size]
                label = sample['Label'].iloc[0]
                # display(sample)
                sample_filename = os.path.join(output_dir, f"{activity_labels[label]}_chunk_{chunk_idx}_knife_{knife_label}.csv")
                sample.to_csv(sample_filename, index=False)
                # break
                # samples.append(sample.drop(columns=['Label']))
                # labels.append(sample['Label'].iloc[0])  # Use the first label in the sample

    # print("Generated samples:", len(samples), "Generated labels:", len(labels))
    # print("labels", labels)
    # return samples, labels


In [7]:
for file in os.listdir("raw_data"):
    if file.endswith(".csv"):
        df = pd.read_csv(os.path.join("raw_data", file))
        knife_label = extract_and_categorize_sharpness(file)
        print(file, " ", knife_label)
        split_to_chunk(df, knife_label=knife_label)
        # print(len(samples), len(labels))
        # print(labels)

MVN-J-Boning-64-001.csv   Blunt
MVN-J-Boning-64-002.csv   Blunt
MVN-J-Boning-64-003.csv   Blunt
MVN-J-Boning-64-004.csv   Blunt
MVN-J-Boning-64-005.csv   Blunt
MVN-J-Boning-64-006.csv   Blunt
MVN-J-Boning-79-001.csv   Medium
MVN-J-Boning-90-001.csv   Sharp
MVN-J-Boning-90-002.csv   Sharp
MVN-J-Boning-90-003.csv   Sharp
MVN-J-Boning-90-004.csv   Sharp
MVN-J-Slicing-64-001.csv   Blunt
MVN-J-Slicing-73-001.csv   Medium
MVN-J-Slicing-87-001.csv   Sharp
MVN-S-Boning-63-001.csv   Blunt
MVN-S-Boning-63-002.csv   Blunt
MVN-S-Boning-63-003.csv   Blunt
MVN-S-Boning-76-001.csv   Medium
MVN-S-Boning-76-002.csv   Medium
MVN-S-Boning-89-001.csv   Sharp
MVN-S-Boning-89-002.csv   Sharp
MVN-S-Boning-89-003.csv   Sharp
MVN-S-Boning-89-004.csv   Sharp
MVN-S-Slicing-63-001.csv   Blunt
MVN-S-Slicing-73-001.csv   Medium
MVN-S-Slicing-87-001.csv   Sharp
