In [74]:
import pandas as pd
import numpy as np
from copy import deepcopy

In [75]:
SIDES=["left", "right"]
CLIP_DURATION = 5

In [76]:
def load_data(path):
    with open(path, "rb") as f:
        data = pd.read_pickle(f)
    return data

def save_data(data, path):
    with open(path, "wb") as f:
        pd.to_pickle(data, f)

In [83]:
# Function to process each row of the DataFrame
def data_augmentation(data:pd.DataFrame, duration=5, num_clips=None):
    if num_clips is None:
        augmented=[]
        for tup in data.iterrows():
            row = tup[1].copy()
            tot_time =  row["stop"]- row["start"]
            if tot_time>duration:
                cuts = np.arange(row["start"], row["stop"], duration)
                for c in cuts:
                    new_row = row.copy()
                    new_row["start"] = c
                    if c+duration>row["stop"]:
                        new_row["stop"] = row["stop"]
                    else:
                        new_row["stop"] = c+duration
                    left_indexes=(new_row["myo_left_timestamps"]>=c) & (new_row["myo_left_timestamps"]<c+duration)
                    right_indexes=(new_row["myo_right_timestamps"]>=c) & (new_row["myo_right_timestamps"]<c+duration)
                    new_row["myo_left_timestamps"] = new_row["myo_left_timestamps"][left_indexes]
                    new_row["myo_right_timestamps"] = new_row["myo_right_timestamps"][right_indexes]
                    new_row["myo_left_readings"] = new_row["myo_left_readings"][left_indexes,:]
                    new_row["myo_right_readings"] = new_row["myo_right_readings"][right_indexes,:]
                    augmented.append(new_row)
            else:
                augmented.append(row)
        return pd.DataFrame(augmented)
    else:
        augmented=[]
        for tup in data.iterrows():
            row = tup[1].copy()
            tot_time =  row["stop"]- row["start"]
            duration = min(duration, tot_time)
            highest_offset=max(row["start"], row["stop"]-duration)
            cuts = np.linspace(row["start"], highest_offset, num_clips)
            for c in cuts:
                new_row = row.copy()
                new_row["start"] = c
                if c+duration>row["stop"]:
                    new_row["stop"] = row["stop"]
                else:
                    new_row["stop"] = c+duration
                left_indexes=(new_row["myo_left_timestamps"]>=c) & (new_row["myo_left_timestamps"]<=c+duration)
                right_indexes=(new_row["myo_right_timestamps"]>=c) & (new_row["myo_right_timestamps"]<=c+duration)
                new_row["myo_left_timestamps"] = new_row["myo_left_timestamps"][left_indexes]
                new_row["myo_right_timestamps"] = new_row["myo_right_timestamps"][right_indexes]
                new_row["myo_left_readings"] = new_row["myo_left_readings"][left_indexes,:]
                new_row["myo_right_readings"] = new_row["myo_right_readings"][right_indexes,:]
                augmented.append(new_row)
        return pd.DataFrame(augmented)

In [84]:
def save_augmented_data(path, duration, name, num_clips=None):
    data = load_data(path)
    a = data_augmentation(data, duration, num_clips)
    print("Original train data:", data.shape)
    print("Augmented train data:", a.shape)
    print(a.head())
    print("Saving augmented data")
    save_data(a, f"data/ActionNet/{name}_EMG_augmented.pkl")

In [89]:
split="test"
save_augmented_data(f"data/ActionNet_EMG/{split}_EMG_filtered.pkl", 10, split, num_clips=20)

Original train data: (59, 9)
Augmented train data: (1180, 9)
       description         start          stop  \
4  Peel a cucumber  1.655172e+09  1.655172e+09   
4  Peel a cucumber  1.655172e+09  1.655172e+09   
4  Peel a cucumber  1.655172e+09  1.655172e+09   
4  Peel a cucumber  1.655172e+09  1.655172e+09   
4  Peel a cucumber  1.655172e+09  1.655172e+09   

                                 myo_left_timestamps  \
4  [1655172364.999943, 1655172365.0039296, 165517...   
4  [1655172367.01022, 1655172367.0251698, 1655172...   
4  [1655172369.02747, 1655172369.0314565, 1655172...   
4  [1655172371.037739, 1655172371.045712, 1655172...   
4  [1655172373.048013, 1655172373.051503, 1655172...   

                                   myo_left_readings  \
4  [[-1, -46, 29, 37, -8, 25, -10, 18], [4, 33, -...   
4  [[-8, 14, 118, 115, 2, 14, -6, -7], [31, 23, -...   
4  [[3, -18, 2, 8, 20, 32, 5, 2], [8, 7, -1, 45, ...   
4  [[0, 12, 4, -15, -18, 8, 25, 21], [-31, -38, -...   
4  [[25, -3, 13, 4, 1