In [1]:
import os
import pandas as pd
from preprocess import PreprocessAccel

pre_acc = PreprocessAccel(sampling_rate=50)

In [87]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

def compute_fft(df, record_index, sampling_rate=50):
    """
    Computes the FFT of a single record from a DataFrame and plots the time and frequency domain representations.
    
    Parameters:
        df (pd.DataFrame): DataFrame containing the signal data.
        record_index (int): Index of the record to analyze.
        sampling_rate (int, optional): Sampling rate of the signal in Hz. Default is 50 Hz.
    
    Returns:
        tuple: (frequencies, magnitude_spectrum) after FFT computation.
    """
    # Extract signal values (x1 to x250) and ensure they are numeric
    signal = df.iloc[record_index, 2:252]  # Extract x1 to x250
    
    # Convert to numeric
    signal = pd.to_numeric(signal, errors='coerce').values.astype(float)
    
    # Check for NaN values
    if np.isnan(signal).any():
        raise ValueError("Signal contains NaN values. Check the data.")

    # Compute FFT
    N = len(signal)  # Should be 250
    fft_values = np.fft.fft(signal)  # Compute FFT
    fft_values = np.abs(fft_values[:126])  # Keep first 126 components

    # Compute frequency bins
    freq = np.fft.fftfreq(N, d=1/sampling_rate)[:126]  # Adjusted to len 126

    # Plot Time-Domain Signal
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(np.arange(N) / sampling_rate, signal, label="Original Signal")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.title("Time-Domain Signal")
    plt.legend()

    # Plot Frequency-Domain (FFT) Signal with Fill
    plt.subplot(1, 2, 2)
    plt.fill_between(freq, fft_values, min(fft_values), color='red', alpha=0.4)  # Fill without touching 0
    plt.plot(freq, fft_values, label="FFT Magnitude", color='r')
    # plt.plot(freq, 2.0/N * np.abs(fft_values[0:N//2+1]))
    plt.xlabel("Frequency (Hz)")
    plt.ylabel("Magnitude")
    plt.title("Frequency-Domain (FFT) Representation")
    plt.xlim(0,)
    plt.legend()


    plt.tight_layout()
    plt.show()

    # return freq, fft_values



In [None]:
root_path = r'D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets'
window_size = 3 # window size in second
overlap = 0.35

In [None]:
root_path = r'D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets'
window_size = 3 # window size in second
overlap = 0.35

file_path = os.path.join(root_path, "hand", "walking.csv")
data = pd.read_csv(file_path)
walking = pre_acc.segment_and_flatten_magnitude(
    label='walking', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

file_path = os.path.join(root_path, "hand", "sitting_standing_transition.csv")
data = pd.read_csv(file_path)
sitting_standing = pre_acc.segment_and_flatten_magnitude(
    label='sitting_standing_transition', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

file_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets\hand\running.csv"
data = pd.read_csv(file_path)
running = pre_acc.segment_and_flatten_magnitude(
    label='running', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

file_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets\hand\descending_stairs.csv"
data = pd.read_csv(file_path)
desen_stairs = pre_acc.segment_and_flatten_magnitude(
    label='descending_stairs', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

file_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets\hand\climbing_stairs.csv"
data = pd.read_csv(file_path)
climb_stairs = pre_acc.segment_and_flatten_magnitude(
    label='climbing_stairs', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

merge_df = pd.concat([walking, sitting_standing, running, desen_stairs, climb_stairs])
merge_df['label'].value_counts()

In [None]:
file_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets\pocket\walking.csv"
data = pd.read_csv(file_path)
walking = pre_acc.segment_and_flatten_magnitude(
    label='walking', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

file_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets\pocket\sitting_standing_transition.csv"
data = pd.read_csv(file_path)
sitting_standing = pre_acc.segment_and_flatten_magnitude(
    label='sitting_standing', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

file_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets\pocket\running.csv"
data = pd.read_csv(file_path)
running = pre_acc.segment_and_flatten_magnitude(
    label='running', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

file_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets\pocket\descending_stairs.csv"
data = pd.read_csv(file_path)
desen_stairs = pre_acc.segment_and_flatten_magnitude(
    label='descending_stairs', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

file_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets\pocket\climbing_stairs.csv"
data = pd.read_csv(file_path)
climb_stairs = pre_acc.segment_and_flatten_magnitude(
    label='climbing_stairs', magnitude_column_name='magnitude', 
    df=data, window_size_sec=window_size, overlap=overlap)

In [5]:
merge_df = pd.concat([merge_df, walking, sitting_standing, running, desen_stairs, climb_stairs])
merge_df['label'].value_counts()

label
walking              270
running              176
sitting_standing     157
climbing_stairs      139
descending_stairs    133
Name: count, dtype: int64

In [6]:
merge_df.shape

(875, 252)

In [8]:
merge_df.to_csv('cleaned_combine_all_class_with_label_no_fft.csv', index=False)

In [8]:
merge_df.head()

Unnamed: 0,start_time,label,x1,x2,x3,x4,x5,x6,x7,x8,...,x241,x242,x243,x244,x245,x246,x247,x248,x249,x250
0,29,walking,9.391768,9.095416,9.116803,9.038429,8.920207,9.119304,9.344565,9.368447,...,9.120713,9.209533,9.354902,9.285413,9.279229,9.3565,9.339347,9.163902,9.274815,9.37103
1,2791,walking,9.339839,9.334993,9.27488,9.153142,9.202418,9.314295,9.340177,9.35054,...,10.843081,9.170971,7.791438,6.965673,6.593102,7.27407,8.719662,8.712233,8.784947,7.975563
2,5603,walking,9.394136,9.231343,9.252686,9.257068,9.281385,9.2941,9.152972,9.336755,...,10.808816,10.393205,9.804173,9.059288,7.812298,7.749445,7.859224,8.034451,8.413953,9.333868
3,8420,walking,7.183794,7.151433,6.730669,6.369309,7.059695,7.495365,8.232169,8.581987,...,6.619653,7.44616,7.250531,10.119066,15.669209,15.674033,13.891112,12.759346,14.375291,14.512705
4,11241,walking,9.832197,10.361916,10.715456,10.929579,11.233561,11.865534,13.391049,14.774566,...,8.623207,7.681159,7.276771,7.860083,8.214317,7.785602,7.446644,7.064347,7.367401,7.866696


In [2]:
fft_df = pre_acc.compute_fft_on_flattened_data(merge_df)
fft_df.sample(10)

NameError: name 'merge_df' is not defined

In [11]:
fft_df.to_csv('cleaned_combine_all_class_with_label_applied_fft.csv', index=False)

In [2]:
import os
import pandas as pd
import re
from glob import glob
from preprocess import PreprocessAccel

pre_acc = PreprocessAccel(sampling_rate=50)

root_path = r"C:\Git-Repository\dsp_project\ignore_dir\datasets\striped_data_many_position"
# root_path = r"D:\git_repository\dsp_project\ignore_dir\datasets\clean_datasets"
# magnitude_column_name = "filtered_magnitude"  # Adjust if needed
magnitude_column_name = "magnitude"  # raw magnitude
window_size = 2  # Window size in seconds
overlap = 0.5

# Function to extract label from filename
def extract_label(filename):
    match = re.match(r'([a-zA-Z_]+)', filename)
    return match.group(1) if match else filename

# Process all CSV files in the directory and subdirectories
all_data = []
for dirpath, _, filenames in os.walk(root_path):
    for filename in filenames:
        if filename.endswith(".csv"):  # Process only CSV files
            file_path = os.path.join(dirpath, filename)
            label = extract_label(filename)

            data = pd.read_csv(file_path)
            processed_data = pre_acc.segment_and_flatten_magnitude(
                label=label, magnitude_column_name=None, 
                df=data, window_size_sec=window_size, overlap=overlap
            )
            all_data.append(processed_data)

# Combine all processed data
if all_data:
    merged_df = pd.concat(all_data, ignore_index=True)
    print("Merged DataFrame shape:", merged_df.shape)
else:
    print("No CSV files found.")


Merged DataFrame shape: (5966, 102)


In [4]:
merged_df.sample(10)

Unnamed: 0,start_time,label,x1,x2,x3,x4,x5,x6,x7,x8,...,x91,x92,x93,x94,x95,x96,x97,x98,x99,x100
82,131473,climbing_stairs,2.915664,3.69423,2.908512,1.667282,1.453553,1.084724,-0.98746,-2.964544,...,-0.576627,-2.516471,-2.56668,-2.162824,-2.198854,-2.171382,-2.088249,-2.164957,-2.089909,-1.67193
478,112224,nothing,-0.008564,-0.081049,-0.195536,-0.147027,0.025362,0.061215,-0.008319,0.006356,...,-0.023733,0.041649,0.040232,-0.002481,-0.020346,-0.00503,0.022421,0.036057,0.023292,0.02432
306,47123,nothing,-0.033369,-0.035563,-0.034921,-0.042354,-0.039381,-0.02769,-0.00968,0.032644,...,0.004117,0.009043,0.036199,0.038887,-0.01951,-0.071182,-0.032022,0.032053,0.020614,-0.002381
399,21916,nothing,0.085236,0.074143,0.058147,0.073854,0.100123,0.115654,0.119608,0.116534,...,-0.121912,-0.003858,0.034426,-0.015052,-0.00762,0.052376,0.077578,0.036103,-0.082761,-0.187961
695,248051,running,-6.78023,-5.289771,-4.005602,-2.415397,2.509662,11.977529,22.172856,24.718154,...,-1.25456,2.956296,11.092984,18.322175,17.987515,11.152429,2.353432,-3.920217,-4.453871,-4.031208
3379,219054,walking,3.326794,5.635793,4.955017,2.073601,1.097996,2.268256,1.588729,-1.701885,...,-3.276664,-3.521195,-2.604746,-0.70455,0.285606,0.841624,3.088638,7.2043,9.236662,5.036732
2158,49383,walking,-1.28657,-0.539514,0.312915,1.069944,1.666558,2.047088,2.006797,1.581251,...,2.395253,2.301854,2.36065,2.053293,1.293044,0.699269,0.989187,1.560828,0.437855,-2.351071
65,109350,climbing_stairs,-0.527423,-0.366898,-0.432734,-0.421793,-0.23437,-0.009544,0.467705,1.26268,...,0.328692,0.123263,-0.132125,0.0009,0.413974,0.864609,1.458105,2.357757,3.042894,2.719555
4150,168212,running,-4.069708,-6.056291,-8.187987,-7.883551,-6.951133,-7.058612,-6.677206,-4.972519,...,-5.920713,-8.593138,-8.326306,-6.799063,-7.070263,-6.825028,-4.934778,-4.375583,-4.793312,-2.805708
2768,35900,nothing,0.002986,0.015881,0.004549,-0.004365,0.011822,0.031302,0.026118,-0.006992,...,-0.016089,-0.019407,0.015963,0.018967,0.008945,0.025215,0.032301,-0.001216,-0.039601,-0.038469


In [5]:
merged_df['label'].value_counts()

label
sitting_standing_transition    1147
nothing                        1127
running                        1068
walking                        1018
climbing_stairs                 839
descending_stairs               767
Name: count, dtype: int64

In [6]:
# Find the minimum count of all labels
min_count = merged_df['label'].value_counts().min()

# Downsample each class to match the smallest class count
balanced_df = (
    merged_df.groupby('label', group_keys=False)
    .apply(lambda x: x.sample(n=min_count, random_state=42))  # Ensures reproducibility
)

# Display the new class distribution
balanced_df['label'].value_counts()

  .apply(lambda x: x.sample(n=min_count, random_state=42))  # Ensures reproducibility


label
climbing_stairs                767
descending_stairs              767
nothing                        767
running                        767
sitting_standing_transition    767
walking                        767
Name: count, dtype: int64

In [7]:
balanced_df.sample(5)

Unnamed: 0,start_time,label,x1,x2,x3,x4,x5,x6,x7,x8,...,x91,x92,x93,x94,x95,x96,x97,x98,x99,x100
5285,32456,running,17.074559,23.138192,16.251149,3.470616,-4.372644,-6.590715,-6.397299,-4.388288,...,9.669316,19.399998,18.901546,8.178964,0.779977,0.233738,-0.883344,-3.023124,-3.790427,-5.441693
3058,63777,sitting_standing_transition,-0.513454,-0.647933,-0.628355,-0.572427,-0.216695,0.836463,1.596009,0.506383,...,-0.004324,-0.01964,-0.004229,-0.005666,-0.032473,-0.025606,-0.001229,-0.024823,-0.062238,-0.05435
3086,98197,sitting_standing_transition,0.708494,0.375895,1.038483,1.286714,0.095561,-1.013535,-0.643082,-0.183781,...,0.014966,-0.020493,-0.005673,0.008587,-0.032319,-0.059379,-0.02991,-0.006914,-0.033109,-0.065594
2262,166423,walking,3.371239,4.262099,3.926279,3.057438,3.070827,3.952003,4.472129,3.466503,...,0.732252,0.746007,1.298063,1.620333,1.399754,-0.071793,-2.122821,-2.487393,-1.781096,-2.23693
721,30933,sitting_standing_transition,-0.293095,-0.323806,-0.351813,-0.398044,-0.446466,-0.446412,-0.404218,-0.387244,...,-0.551914,-0.642384,-0.660758,-0.623668,-0.568633,-0.504608,-0.452699,-0.42126,-0.392261,-0.353623


In [8]:
root_path = r'C:\Git-Repository\dsp_project\ignore_dir\datasets\ready_to_train'
filename = f"flattened_no_fft_win{str(window_size)}_lab{str(int(overlap*100))}.csv"
balanced_df.to_csv(os.path.join(root_path, filename), index=False)

print(filename)

flattened_no_fft_win2_lab50.csv


In [5]:
balanced_df.shape[1]-2

250

In [11]:
fft_df = pre_acc.compute_fft_on_flattened_data(balanced_df, num_samples=balanced_df.shape[1]-2, remove_flatten=False)
fft_df.sample(4)

Unnamed: 0,start_time,label,x1,x2,x3,x4,x5,x6,x7,x8,...,x143,x144,x145,x146,x147,x148,x149,x150,x151,x152
4253,264530,walking,-0.941908,-2.167121,-2.057321,-1.771626,-1.608515,-1.172086,-0.779351,-0.492837,...,0.752406,0.806823,0.786409,0.756084,0.729455,0.707609,0.695642,0.687434,0.682316,0.680618
3395,183818,sitting_standing_transition,1.013632,1.664101,0.741127,-0.646344,-0.866288,-1.125345,-1.437734,-0.892248,...,0.156617,0.195688,0.215923,0.230652,0.241507,0.249331,0.254933,0.258677,0.260821,0.261521
65,113904,climbing_stairs,0.89215,-0.712305,-1.982475,-2.668303,-2.569149,-1.802824,-0.849595,-0.235119,...,2.067423,2.050286,2.032311,2.01516,2.002911,1.991811,1.983892,1.978124,1.974681,1.973534
4290,49136,walking,-2.01276,-1.776864,-1.347914,-0.853741,-0.276082,0.164304,0.530664,1.322477,...,0.131148,0.079901,0.067848,0.058966,0.044398,0.035601,0.026994,0.01878,0.011795,0.008346


In [12]:
root_path = r'C:\Git-Repository\dsp_project\ignore_dir\datasets\ready_to_train'
# _format = 'flattened_with_fft' 
_format = 'combine_time_feq' 
# _format = 'NO_FILTER' 
filename = f"{_format}_win{str(window_size)}_lab{str(int(overlap*100))}.csv"
fft_df.to_csv(os.path.join(root_path, filename), index=False)
filename

'combine_time_feq_win2_lab50.csv'