In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from torch.utils.data import DataLoader, Dataset, TensorDataset
from sklearn.model_selection import LeaveOneGroupOut, KFold
from keras.utils import to_categorical
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Define folders containing the CSV files
folder = '/content/drive/MyDrive/'

In [4]:
num_files_to_load = 20  # Adjust this number to load the desired amount

# Initialize a list to store loaded DataFrames
dataframes = []

# Counter to keep track of loaded files
count = 0

for filename in os.listdir(folder):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder, filename)
        print(file_path)
        df = pd.read_csv(file_path, engine='python')

        columns_to_drop = ['datetime', 'user yes/no', 'compulsive', 'urge', 'tense']
        #df = df[(df['ignore' == 0])]
        df = df.drop(columns=columns_to_drop)
        dataframes.append(df)
        count += 1

        if count >= num_files_to_load:
            break

/content/drive/MyDrive/OCDetect_01_recording_22_cd9083e3-083b-46b1-9e0d-ed0441149ae7.csv
/content/drive/MyDrive/OCDetect_01_recording_23_ca3338e3-5685-43fc-8917-28abafc1885a.csv
/content/drive/MyDrive/OCDetect_01_recording_24_31debe60-483c-43b0-9b3f-09cd398fb63b.csv
/content/drive/MyDrive/OCDetect_03_recording_06_c076109d-651c-46c4-a745-5df8b383bec3.csv
/content/drive/MyDrive/OCDetect_03_recording_07_1764e99b-066a-43fc-a9f4-cce4583da909.csv
/content/drive/MyDrive/OCDetect_03_recording_08_f08133be-a401-412e-bc35-d9a04d0d7744.csv
/content/drive/MyDrive/OCDetect_03_recording_09_ca585847-6ab5-4121-a1bf-87ca67bf0dfa.csv
/content/drive/MyDrive/OCDetect_03_recording_10_62a7d4a4-6e46-471e-8b86-c79a4b495368.csv
/content/drive/MyDrive/OCDetect_03_recording_11_2877c8f5-29ee-4b8e-a0b2-12e116579073.csv
/content/drive/MyDrive/OCDetect_03_recording_13_4b27c776-7013-4adb-8710-0fedc5b5104a.csv
/content/drive/MyDrive/OCDetect_03_recording_14_d37d0b86-feef-47b4-8668-3bd6966fafb9.csv
/content/drive/MyDriv

In [5]:
filtered_dfs = [df[df['ignore'] == 0].copy() for df in dataframes if 'ignore' in df.columns and (df['ignore'] == 0).any()]
two_class_df = [df[df['relabeled'].isin([1, 2])] for df in dataframes]
two_class_comb = pd.concat(two_class_df)

In [6]:
scaler=MinMaxScaler()
two_class_comb[["acc x","acc y","acc z","gyro x","gyro y","gyro z"]]=scaler.fit_transform(two_class_comb[["acc x","acc y","acc z","gyro x","gyro y","gyro z"]])

In [7]:
label_counts = two_class_comb['relabeled'].value_counts()
count_label_1 = label_counts.get(1, 0)  # Count of label 1, default to 0 if not found
count_label_2 = label_counts.get(2, 0)  # Count of label 2, default to 0 if not found
if(count_label_1 & count_label_2):
    print(count_label_1)
    print(count_label_2)

161585
213207


In [8]:
# Split data into train and test sets
train_data, test_data, y_train, y_test = train_test_split(two_class_comb.drop(columns=['relabeled', 'timestamp', 'ignore']), two_class_comb['relabeled'], test_size=0.4)
print(f"values of train labels: {len(np.unique(y_train))}")
print(f"values of test labels: {len(np.unique(y_test))}")

values of train labels: 2
values of test labels: 2


In [9]:
# Define window size
window_size = 150
step_size = 150
X_train = train_data.values
# Initialize lists to store windowed data and labels
train_windows = []
train_labels = []

for i in range(0,(len(X_train)-window_size+1),step_size):
    window = X_train[i:i+window_size]  # Select only sensor axes columns
    train_windows.append(window)
    label_window = y_train[i:i+window_size]  # Select the label column for majority voting
    majority_label = np.bincount(label_window).argmax()  # Majority voting
    train_labels.append(majority_label)

train_windows = np.array(train_windows)
train_labels = np.array(train_labels)

In [11]:
# Save both arrays to a single .npz file
np.savez('training_data.npz', train_windows=train_windows, train_labels=train_labels)


In [None]:
# Download the file
from google.colab import files
files.download('training_data.npz')

In [12]:
# Creating test windows and labels
test_windows = []
test_labels = []

X_test = test_data.values

# Create sliding windows and labels for testing
for i in range(0, len(X_test) - window_size + 1, step_size):
    window = X_test[i:i + window_size]  # Exclude the last column (label)
    test_windows.append(window)
    label_window = y_test[i:i + window_size]  # Label column in the window
    # Majority voting for label
    majority_label = np.bincount(label_window).argmax()
    test_labels.append(majority_label)

# Convert test windows and labels to numpy arrays
test_windows = np.array(test_windows)
test_labels = np.array(test_labels)