# Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from typing import Tuple
import matplotlib.ticker as ticker
import time
import datetime

In [None]:
# you do not need this if you are not working on google colab!
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


# Functions

In [None]:
def extract_header_info(filename: str, header_size: int = 5) -> Tuple[str, str, int, str, str]:
    """
    :param filename: Path to recording file.
    :param header_size: The size of the header, defaults to 5.
    :returns: A 5-tuple containing the sensor type, activity type, activity code, subject id and any notes.
    """
    sensor_type = ""
    activity_type = ""
    activity_code = -1
    subject_id = ""
    notes = ""

    with open(filename) as f:
        head = [next(f).rstrip().split('# ')[1] for x in range(header_size)]
        for l in head:
            print(l)

            title, value = l.split(":")

            if title == "Sensor type":
                sensor_type = value.strip()
            elif title == "Activity type":
                activity_type = value.strip()
            elif title == "Activity code":
                activity_code = int(value.strip())
            elif title == "Subject id":
                subject_id = value.strip()
            elif title == "Notes":
                notes = value.strip()

    return sensor_type, activity_type, activity_code, subject_id, notes

In [None]:
def get_frequency(dataframe: pd.DataFrame, ts_column: str = 'timestamp') -> float:
    """
    :param dataframe: Dataframe containing sensor data. It needs to have a 'timestamp' column.
    :param ts_column: The name of the column containing the timestamps. Default is 'timestamp'.
    :returns: Frequency in Hz (samples per second)
    """

    return len(dataframe) / ((dataframe[ts_column].iloc[-1] - dataframe[ts_column].iloc[0]) / 1000)

In [None]:
def get_recording_length(dataframe: pd.DataFrame):
  """
  :param dataframe: Dataframe containing sensor data.
  """
  return len(dataframe) / get_frequency(dataframe)

In [None]:
def plot_data(dataframe: pd.DataFrame, plot_title):
  # Calculate the number of data points in your dataset
  num_data_points = len(dataframe)

  # Calculate a suitable figure width based on the number of data points
  # You can adjust the multiplier as needed to control the figure size
  figure_width = num_data_points / 10  # Adjust the divisor to control the size


  # Set a fixed aspect ratio for the figure (optional)
  aspect_ratio = 0.3  # You can adjust this value as needed

  # Calculate the figure height based on the aspect ratio and width
  figure_height = figure_width * aspect_ratio

  # Create the figure with the calculated size
  fig, ax = plt.subplots(2, 1, figsize=(figure_width, figure_height))

  plot_title = plot_title

  line_width = 6

  # Plot respeck with custom line width
  ax[0].plot(dataframe['accel_x'], label="accel_x", linewidth=line_width)
  ax[0].plot(dataframe['accel_y'], label="accel_y", linewidth=line_width)
  ax[0].plot(dataframe['accel_z'], label="accel_z", linewidth=line_width)
  ax[0].legend()

  # Plot gyroscope data
  ax[1].plot(dataframe['gyro_x'], label="gyro_x", linewidth=line_width)
  ax[1].plot(dataframe['gyro_y'], label="gyro_y", linewidth=line_width)
  ax[1].plot(dataframe['gyro_z'], label="gyro_z", linewidth=line_width)
  ax[1].legend()

  num_xticks = len(dataframe)//10
  ax[0].xaxis.set_major_locator(ticker.MaxNLocator(num_xticks))
  ax[1].xaxis.set_major_locator(ticker.MaxNLocator(num_xticks))

  fnt_size = 60
  fnt_size2 = 40

  ax[1].set_xlabel("Data point no", fontsize=fnt_size)  # Adjust fontsize for the x-axis label
  ax[0].set_ylabel("Acceleration", fontsize=fnt_size)  # Adjust fontsize for the y-axis label
  ax[1].set_ylabel("Gyroscope", fontsize=fnt_size)

  # Adjust fontsize of individual ticks on the x-axis and y-axis for both subplots
  ax[0].tick_params(axis='both', labelsize=fnt_size2)
  ax[1].tick_params(axis='both', labelsize=fnt_size2)

  # Rotate x-axis tick labels by 45 degrees for both subplots
  ax[0].tick_params(axis='x', labelrotation=45)
  ax[1].tick_params(axis='x', labelrotation=45)

  ax[0].set_title(plot_title, size=fnt_size)

  # Add vertical grid lines (gridlines along the x-axis)
  ax[0].grid(axis='x', linestyle='--', linewidth=line_width)
  ax[1].grid(axis='x', linestyle='--', linewidth=line_width)

  plt.tight_layout()
  plt.show()

In [None]:
def generate_new_timestamps(starting_timestamp, number_of_timestamps):
  # Set the initial timestamp in milliseconds
  initial_timestamp = starting_timestamp

  # Calculate the time interval in seconds (1 / 25 Hz)
  time_interval = 1.0 / 25

  # Specify the number of timestamps you want to generate
  num_timestamps = number_of_timestamps

  for counter in range(num_timestamps):
    # Calculate the next timestamp by adding the counter multiplied by the time interval
    next_timestamp = initial_timestamp + (counter * time_interval * 1000)

# Filtering out gaps of inactivity in data

In [None]:
filename_respeck = "/content/gdrive/Shareddrives/Ink/PDIoT/Respeck_s2255740_Sitting_06-09-2023_21-26-26.csv"
df_respeck = pd.read_csv(filename_respeck, header=5)

## Plotting raw and unprocessed data

**Note: This may take a while!**

In [None]:
plot_data(df_respeck, "Respeck sitting and hyperventilating - accelerometer and gyroscope data")

[1;30;43mThis cell output is too large and can only be displayed while logged in.[0m


## Deleting gaps

In [None]:
df_respeck['ind'] = df_respeck.index

to_trim = input("How many data ranges would you like to trim? ")

print("\n")

for i in range(int(to_trim)):

  print(i+1, "Specify the range of the indexes that you would like to delete ----------------")

  range_trim_start = int(input("Starting at index: "))
  range_trim_end = int(input("Ending at index: "))

  df_respeck = df_respeck[~((df_respeck['ind'] >= range_trim_start) & (df_respeck['ind'] <= range_trim_end))]
  print("\n")

How many data ranges would you like to trim? 5


1 Specify the range of the indexes that you would like to delete ----------------
Starting at index: 0
Ending at index: 15


2 Specify the range of the indexes that you would like to delete ----------------
Starting at index: 410
Ending at index: 490


3 Specify the range of the indexes that you would like to delete ----------------
Starting at index: 830
Ending at index: 945


4 Specify the range of the indexes that you would like to delete ----------------
Starting at index: 1250
Ending at index: 1350


5 Specify the range of the indexes that you would like to delete ----------------
Starting at index: 1470
Ending at index: 1500




## Plot resulting graph

In [None]:
print("RESULTING GRAPH -------------------------------------------------------------------")
df_respeck.reset_index(inplace=True, drop=True)
plot_data(df_respeck, "Respeck sitting and hyperventilating - accelerometer and gyroscope data")

[1;30;43mThis cell output is too large and can only be displayed while logged in.[0m


# Readjusting the timestamps to get 25Hz frequency data

In [None]:
df_respeck

Unnamed: 0,timestamp,accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z,ind
0,1694028327585,-0.488525,-0.880676,0.276794,0.015625,-0.468750,0.000000,16
1,1694028327622,-0.489746,-0.872864,0.268494,-1.765625,0.656250,-0.484375,17
2,1694028327661,-0.491455,-0.867004,0.274109,-0.328125,0.312500,-0.453125,18
3,1694028327697,-0.482910,-0.870667,0.277527,-0.796875,2.281250,-0.281250,19
4,1694028327736,-0.498779,-0.882629,0.294617,-1.109375,3.437500,-0.078125,20
...,...,...,...,...,...,...,...,...
1151,1694028385147,-0.538574,-0.801331,0.325378,2.140625,9.359375,-0.078125,1465
1152,1694028385185,-0.579834,-0.848938,0.332458,0.312500,8.359375,0.125000,1466
1153,1694028385241,-0.490479,-0.829895,0.326843,-0.734375,3.171875,1.250000,1467
1154,1694028385278,-0.498291,-0.856506,0.294128,2.109375,0.109375,-0.890625,1468


In [None]:
get_frequency(df_respeck)

20.02390396840519

In [None]:
# Define the starting timestamp in milliseconds
start_timestamp_ms = df_respeck.timestamp[0]

# Define the number of timestamps you want to generate
num_timestamps = len(df_respeck)

# Calculate the time interval between timestamps in microseconds
microseconds_per_timestamp = int(1e6 / 25)

# Initialize a list to store the generated timestamps
timestamps = []

# Generate the timestamps
for i in range(num_timestamps):
    timestamp = start_timestamp_ms + i * microseconds_per_timestamp // 1000  # Convert microseconds to milliseconds
    timestamps.append(timestamp)

# Print the generated timestamps
# for timestamp in timestamps:
#    formatted_time = datetime.datetime.fromtimestamp(timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S.%f')
#    print(timestamp, formatted_time[:-3])  # Print the timestamp with milliseconds

df_respeck['timestamp'] = timestamps

In [None]:
get_frequency(df_respeck)

25.02164502164502

# Trimming data to 30s

In [None]:
get_recording_length(df_respeck)

46.2

In [None]:
get_recording_length(df_respeck[:760])

30.360000000000003

In [None]:
df_respeck = df_respeck[:760]

In [None]:
plot_data(df_respeck, "Respeck sitting and hyperventilating - accelerometer and gyroscope data")

[1;30;43mThis cell output is too large and can only be displayed while logged in.[0m
