In [None]:
import os
from pathlib import Path
import pandas as pd
import csv
import numpy as np

## Settings

In [None]:
SOURCE_DIRECTORY = "sample_data"
LABELS = "labels.csv"

## Import data

In [None]:
df_labels = pd.read_csv(Path(SOURCE_DIRECTORY + r"/" + LABELS), names=["file_name", "label"])
df_labels.head()

In [None]:
file_names = os.listdir(SOURCE_DIRECTORY)
file_names.remove(LABELS)

In [None]:
data = []
for file_name in file_names:
    recording = []
    with open(Path(SOURCE_DIRECTORY + r"/" + file_name), newline="") as inputfile:
        for row in csv.reader(inputfile):
            recording.append(int(row[0]))
    data.append(recording)

In [None]:
df_recordings = pd.DataFrame({"file_name": file_names, "data": data})
df_recordings.head()

In [None]:
df = df_labels.merge(right=df_recordings, how="inner", on="file_name")
df.head()

## Slice data into smaller samples

In [None]:
def create_overlapping_windows(data, window_size=200, overlap=100):
    """
    Create overlapping windows from time series data.
    
    Parameters:
    data (list or np.array): The input time series data.
    window_size (int): Number of data points per window.
    overlap (int): Number of overlapping data points between consecutive windows.

    Returns:
    windows (list): A list of overlapping windows.
    """
    windows = []
    start = 0
    step = window_size - overlap

    while start + window_size <= len(data):
        window = data[start:start + window_size]
        windows.append(window)
        start += step
    
    return np.array(windows)

def slice_dataframe(df, window_size=200, overlap=100):
    """
    Slice the data column of the DataFrame into overlapping windows, 
    and keep the file_name and label the same for each slice.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame with columns ['file_name', 'label', 'data'].
    window_size (int): Number of data points per window.
    overlap (int): Number of overlapping data points between consecutive windows.
    
    Returns:
    pd.DataFrame: A new DataFrame with sliced windows.
    """
    new_rows = []

    for idx, row in df.iterrows():
        file_name = row['file_name']
        label = row['label']
        data = row['data']

        windows = create_overlapping_windows(data, window_size, overlap)

        for window in windows:
            new_rows.append({
                'file_name': file_name,
                'label': label,
                'data': window
            })

    new_df = pd.DataFrame(new_rows)
    
    return new_df

In [None]:
df_resliced = slice_dataframe(df, window_size=200, overlap=100)
df_resliced.head()

## Add metrics

In [None]:
df["mean"] = df["data"].apply(np.mean)
df["median"] = df["data"].apply(np.median)
df["std"] = df["data"].apply(np.std)
df["min"] = df["data"].apply(np.min)
df["max"] = df["data"].apply(np.max)
df.head()

In [None]:
def normalize(data : list) -> list:
    min_val = np.min(data)
    max_val = np.max(data)
    data_norm = (data - min_val) / (max_val - min_val)
    data_norm = data_norm.round(3)
    return data_norm

In [None]:
df["data_norm"] = df["data"].apply(normalize)
df.head()

In [None]:
df["mean_norm"] = df["data_norm"].apply(np.mean)
df["median_norm"] = df["data_norm"].apply(np.median)
df["std_norm"] = df["data_norm"].apply(np.std)
df.head()