Run the following code to initialize working environment

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from typing import Tuple
import matplotlib.ticker as ticker
import time
import datetime
import os
import shutil

def extract_header_info(filename: str, header_size: int = 5) -> Tuple[str, str, int, str, str]:
    """
    :param filename: Path to recording file.
    :param header_size: The size of the header, defaults to 5.
    :returns: A 5-tuple containing the sensor type, activity type, activity subtype, subject id and any notes.
    """
    sensor_type = ""
    activity_type = ""
    activity_subtype = ""
    subject_id = ""
    notes = ""

    with open(filename) as f:
        head = [next(f).rstrip().split('# ')[1] for x in range(header_size)]
        for l in head:
            print(l)

            title, value = l.split(":")

            if title == "Sensor type":
                sensor_type = value.strip()
            elif title == "Activity type":
                activity_type = value.strip()
            elif title == "Activity subtype":
                activity_subtype = value.strip()
            elif title == "Subject id":
                subject_id = value.strip()
            elif title == "Notes":
                notes = value.strip()

    return sensor_type, activity_type, activity_subtype, subject_id, notes

def get_frequency(dataframe: pd.DataFrame, ts_column: str = 'timestamp') -> float:
    """
    :param dataframe: Dataframe containing sensor data. It needs to have a 'timestamp' column.
    :param ts_column: The name of the column containing the timestamps. Default is 'timestamp'.
    :returns: Frequency in Hz (samples per second)
    """

    return len(dataframe) / ((dataframe[ts_column].iloc[-1] - dataframe[ts_column].iloc[0]) / 1000)

def get_recording_length(dataframe: pd.DataFrame):
  """
  :param dataframe: Dataframe containing sensor data.
  """
  return len(dataframe) / get_frequency(dataframe)

def plot_data(dataframe: pd.DataFrame, plot_title):
  # Calculate the number of data points in your dataset
  num_data_points = len(dataframe)

  # Calculate a suitable figure width based on the number of data points
  # You can adjust the multiplier as needed to control the figure size
  figure_width = num_data_points / 10  # Adjust the divisor to control the size


  # Set a fixed aspect ratio for the figure (optional)
  aspect_ratio = 0.3  # You can adjust this value as needed

  # Calculate the figure height based on the aspect ratio and width
  figure_height = figure_width * aspect_ratio

  # Create the figure with the calculated size
  fig, ax = plt.subplots(2, 1, figsize=(figure_width, figure_height))

  plot_title = plot_title

  line_width = 6

  # Plot respeck with custom line width
  ax[0].plot(dataframe['accel_x'], label="accel_x", linewidth=line_width)
  ax[0].plot(dataframe['accel_y'], label="accel_y", linewidth=line_width)
  ax[0].plot(dataframe['accel_z'], label="accel_z", linewidth=line_width)
  ax[0].legend()

  ax[0].set_title(f"{dataframe['sensor_type'].values[0]} - {dataframe['activity_type'].values[0]} \n Accelerometer data")

  # Plot gyroscope data
  ax[1].plot(dataframe['gyro_x'], label="gyro_x", linewidth=line_width)
  ax[1].plot(dataframe['gyro_y'], label="gyro_y", linewidth=line_width)
  ax[1].plot(dataframe['gyro_z'], label="gyro_z", linewidth=line_width)
  ax[1].legend()

  num_xticks = len(dataframe)//10
  ax[0].xaxis.set_major_locator(ticker.MaxNLocator(num_xticks))
  ax[1].xaxis.set_major_locator(ticker.MaxNLocator(num_xticks))

  fnt_size = 60
  fnt_size2 = 40

  ax[1].set_xlabel("Data point no", fontsize=fnt_size)  # Adjust fontsize for the x-axis label
  ax[0].set_ylabel("Acceleration", fontsize=fnt_size)  # Adjust fontsize for the y-axis label
  ax[1].set_ylabel("Gyroscope", fontsize=fnt_size)

  # Adjust fontsize of individual ticks on the x-axis and y-axis for both subplots
  ax[0].tick_params(axis='both', labelsize=fnt_size2)
  ax[1].tick_params(axis='both', labelsize=fnt_size2)

  # Rotate x-axis tick labels by 45 degrees for both subplots
  ax[0].tick_params(axis='x', labelrotation=45)
  ax[1].tick_params(axis='x', labelrotation=45)

  ax[0].set_title(plot_title, size=fnt_size)

  # Add vertical grid lines (gridlines along the x-axis)
  ax[0].grid(axis='x', linestyle='--', linewidth=line_width)
  ax[1].grid(axis='x', linestyle='--', linewidth=line_width)

  plt.tight_layout()
  plt.show()
    
def generate_new_timestamps(starting_timestamp, number_of_timestamps):
  # Set the initial timestamp in milliseconds
  initial_timestamp = starting_timestamp

  # Calculate the time interval in seconds (1 / 25 Hz)
  time_interval = 1.0 / 25

  # Specify the number of timestamps you want to generate
  num_timestamps = number_of_timestamps

  for counter in range(num_timestamps):
    # Calculate the next timestamp by adding the counter multiplied by the time interval
    next_timestamp = initial_timestamp + (counter * time_interval * 1000)

Run this cell to loop through the files in the given directory and trim them

In [None]:
sourcedir = 'Enter source directory'
targetdir = 'Enter target directory'
header_size = 5

filenames = os.listdir(sourcedir)

for filename in filenames:
    input(f'\nFile: {filename}\n')
    
    file = os.path.join(sourcedir, filename)
    
    filename_raw = filename.split("/")[-1].split(".")[0]
    
    try:
        sensor_type, activity_type, activity_subtype, subject_id, notes = extract_header_info(filename=file)
    except:
        print('Error reading file header... Skipping file')
        continue
    
    df = pd.read_csv(file, header=header_size)

    #df['sensor_type'] = sensor_type
    #df['activity_type'] = activity_type
    #df['activity_subtype'] = activity_subtype
    #df['subject_id'] = subject_id
    #df['notes'] = notes

    #df['recording_id'] = filename_raw
    
    timestamp = datetime.datetime.fromtimestamp(df.timestamp.iat[-1] / 1000).strftime('%Y-%m-%d_%H-%M-%S')
    
    editing = true
    
    while editing:
        plot_data(df, f"{sensor_type} - {activity_type} - {activity_subtype} - {subject_id}")
        
        frequency = get_frequency(df)
        length = get_recording_length(df)

        print(f'Frequency: {frequency}')
        print(f'Length: {length}')

        if frequency < 24 or frequency > 26:
            print('Frequency is off!')
            continue

        if length < 28:
            print('Recording too short!')
            continue
        elif length > 32:
            user_input = input('Recording too long! Trim? (y|n)')

            if user_input == 'y':
                user_input = input('Trim beginning (b), end (e) or do custom (c) trim?')

                if user_input == 'b':
                    df = df[len(df) - 760:]
                elif user_input == 'e':
                    df = df[:760]
                elif user_input == 'c':
                    df['ind'] = df.index

                    to_trim = input("How many data ranges would you like to trim? ")

                    print("\n")

                    for i in range(int(to_trim)):

                      print(i+1, "Specify the range of the indexes that you would like to delete ----------------")

                      range_trim_start = int(input("Starting at index: "))
                      range_trim_end = int(input("Ending at index: "))

                      df = df[~((df['ind'] >= range_trim_start) & (df['ind'] <= range_trim_end))]
                      print("\n")

                    # Define the starting timestamp in milliseconds
                    start_timestamp_ms = df.timestamp[0]

                    # Define the number of timestamps you want to generate
                    num_timestamps = len(df)

                    # Calculate the time interval between timestamps in microseconds
                    microseconds_per_timestamp = int(1e6 / 25)

                    # Initialize a list to store the generated timestamps
                    timestamps = []

                    # Generate the timestamps
                    for i in range(num_timestamps):
                        timestamp = start_timestamp_ms + i * microseconds_per_timestamp // 1000  # Convert microseconds to milliseconds
                        timestamps.append(timestamp)

                    # Print the generated timestamps
                    # for timestamp in timestamps:
                    #    formatted_time = datetime.datetime.fromtimestamp(timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S.%f')
                    #    print(timestamp, formatted_time[:-3])  # Print the timestamp with milliseconds

                    df['timestamp'] = timestamps
                else:
                    print('invalid input')


            else:
                print('No trimming.')
                editing = false

    user_input = input('Save and remove source file? (y|n)')

    if user_input == 'y':
        filename_unprocessed = os.path.join(targetdir, f'{sensor_type}_{subject_id}_{activity_type}_{activity_subtype}_unprocessed_{timestamp}.csv')
        filename_clean = os.path.join(targetdir, f'{sensor_type}_{subject_id}_{activity_type}_{activity_subtype}_clean_{timestamp}.csv')

        shutil.copy(file, filename_unprocessed)
        df.to_csv(filename_clean)

        os.remove(file)


File: .DS_Store

Error reading file header... Skipping file
