In [74]:
import pandas as pd
import numpy as np
from copy import deepcopy

In [75]:
SIDES=["left", "right"]
CLIP_DURATION = 5

In [76]:
def load_data(path):
    with open(path, "rb") as f:
        data = pd.read_pickle(f)
    return data

def save_data(data, path):
    with open(path, "wb") as f:
        pd.to_pickle(data, f)

In [83]:
# Function to process each row of the DataFrame
def data_augmentation(data:pd.DataFrame, duration=5, num_clips=None):
    if num_clips is None:
        augmented=[]
        for tup in data.iterrows():
            row = tup[1].copy()
            tot_time =  row["stop"]- row["start"]
            if tot_time>duration:
                cuts = np.arange(row["start"], row["stop"], duration)
                for c in cuts:
                    new_row = row.copy()
                    new_row["start"] = c
                    if c+duration>row["stop"]:
                        new_row["stop"] = row["stop"]
                    else:
                        new_row["stop"] = c+duration
                    left_indexes=(new_row["myo_left_timestamps"]>=c) & (new_row["myo_left_timestamps"]<c+duration)
                    right_indexes=(new_row["myo_right_timestamps"]>=c) & (new_row["myo_right_timestamps"]<c+duration)
                    new_row["myo_left_timestamps"] = new_row["myo_left_timestamps"][left_indexes]
                    new_row["myo_right_timestamps"] = new_row["myo_right_timestamps"][right_indexes]
                    new_row["myo_left_readings"] = new_row["myo_left_readings"][left_indexes,:]
                    new_row["myo_right_readings"] = new_row["myo_right_readings"][right_indexes,:]
                    augmented.append(new_row)
            else:
                augmented.append(row)
        return pd.DataFrame(augmented)
    else:
        augmented=[]
        for tup in data.iterrows():
            row = tup[1].copy()
            tot_time =  row["stop"]- row["start"]
            duration = min(duration, tot_time)
            highest_offset=max(row["start"], row["stop"]-duration)
            cuts = np.linspace(row["start"], highest_offset, num_clips)
            for c in cuts:
                new_row = row.copy()
                new_row["start"] = c
                if c+duration>row["stop"]:
                    new_row["stop"] = row["stop"]
                else:
                    new_row["stop"] = c+duration
                left_indexes=(new_row["myo_left_timestamps"]>=c) & (new_row["myo_left_timestamps"]<=c+duration)
                right_indexes=(new_row["myo_right_timestamps"]>=c) & (new_row["myo_right_timestamps"]<=c+duration)
                new_row["myo_left_timestamps"] = new_row["myo_left_timestamps"][left_indexes]
                new_row["myo_right_timestamps"] = new_row["myo_right_timestamps"][right_indexes]
                new_row["myo_left_readings"] = new_row["myo_left_readings"][left_indexes,:]
                new_row["myo_right_readings"] = new_row["myo_right_readings"][right_indexes,:]
                augmented.append(new_row)
        return pd.DataFrame(augmented)

In [84]:
def save_augmented_data(path, duration, name, num_clips=None):
    data = load_data(path)
    a = data_augmentation(data, duration, num_clips)
    print("Original train data:", data.shape)
    print("Augmented train data:", a.shape)
    print(a.head())
    print("Saving augmented data")
    save_data(a, f"data/ActionNet/{name}_EMG_augmented.pkl")

In [88]:
split="test"
save_augmented_data(f"data/ActionNet_EMG/{split}_EMG_filtered.pkl", 10, split, num_clips=20)

Original train data: (527, 9)
Augmented train data: (10540, 9)
                      description         start          stop  \
40  Spread jelly on a bread slice  1.657739e+09  1.657739e+09   
40  Spread jelly on a bread slice  1.657739e+09  1.657739e+09   
40  Spread jelly on a bread slice  1.657739e+09  1.657739e+09   
40  Spread jelly on a bread slice  1.657739e+09  1.657739e+09   
40  Spread jelly on a bread slice  1.657739e+09  1.657739e+09   

                                  myo_left_timestamps  \
40  [1657738827.4506874, 1657738827.458187, 165773...   
40  [1657738828.076678, 1657738828.080177, 1657738...   
40  [1657738828.710679, 1657738828.718178, 1657738...   
40  [1657738829.3326783, 1657738829.340179, 165773...   
40  [1657738829.963178, 1657738829.9706826, 165773...   

                                    myo_left_readings  \
40  [[4, 12, 21, -21, 3, -2, -5, 7], [-6, -5, -5, ...   
40  [[-5, -7, -6, -66, -24, -23, -6, -14], [-5, -2...   
40  [[-7, -12, -21, -38, 5, -10,