### Step 1 Data Subset Creation

In [None]:
# src/har_binary_classification.py

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load the dataset
def load_data():
    # Load training data
    X_train = pd.read_csv('../data/UCI HAR Dataset/train/X_train.txt', sep='\\s+', header=None)
    y_train = pd.read_csv('../data/UCI HAR Dataset/train/y_train.txt', sep='\\s+', header=None)
    
    # Load test data
    X_test = pd.read_csv('../data/UCI HAR Dataset/test/X_test.txt', sep='\\s+', header=None)
    y_test = pd.read_csv('../data/UCI HAR Dataset/test/y_test.txt', sep='\\s+', header=None)
    
    return X_train, y_train, X_test, y_test

# Filter data for binary classification (Walking vs. Not Walking)
def filter_binary_classification(X, y):
    y_binary = y[0].map(lambda x: 1 if x == 1 else 0)  # 1 for Walking, 0 for Not Walking
    return X, y_binary

X_train, y_train, X_test, y_test = load_data()
X_train, y_train = filter_binary_classification(X_train, y_train)
X_test, y_test = filter_binary_classification(X_test, y_test)

### Step 2 Data preprocessing

In [None]:
# src/har_binary_classification.py

# Normalize the data
scaler = MinMaxScaler(feature_range=(-1, 1))
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Segment the data into fixed-size windows
def segment_data(X, window_size=128):
    segments = []
    for start in range(0, len(X) - window_size + 1, window_size):
        segment = X[start:start + window_size]
        segments.append(segment)
    return np.array(segments)

X_train_segments = segment_data(X_train_scaled)
X_test_segments = segment_data(X_test_scaled)

### Step 3 Model Building & Training

In [None]:
# src/har_binary_classification.py

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout

# Build a simple CNN model with Dropout layers
def build_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        Conv1D(64, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Dropout(0.5),
        Flatten(),
        Dense(100, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

def segment_labels(y, window_size=128):
	segments = []
	for start in range(0, len(y) - window_size + 1, window_size):
		segment = y[start:start + window_size]
		segments.append(segment.iloc[0])  # Take the first label in the segment
	return np.array(segments)

y_train_segments = segment_labels(y_train)

input_shape = (X_train_segments.shape[1], X_train_segments.shape[2])
model = build_model(input_shape)

# Train the model
history = model.fit(X_train_segments, y_train_segments, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 283ms/step - accuracy: 0.5563 - loss: 0.8029 - val_accuracy: 0.9167 - val_loss: 0.9109
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - accuracy: 0.8546 - loss: 1.5083 - val_accuracy: 0.9167 - val_loss: 0.2506
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 0.7553 - loss: 0.4477 - val_accuracy: 1.0000 - val_loss: 0.4039
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step - accuracy: 0.5519 - loss: 0.9250 - val_accuracy: 0.9167 - val_loss: 0.2236
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.8190 - loss: 0.4672 - val_accuracy: 0.9167 - val_loss: 0.2157
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - accuracy: 0.8546 - loss: 0.3698 - val_accuracy: 0.9167 - val_loss: 0.1641
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━

### Step 4 Evaluation

In [6]:
# src/har_binary_classification.py

from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Segment the labels
def segment_labels(y, window_size=128):
	segments = []
	for start in range(0, len(y) - window_size + 1, window_size):
		segment = y[start:start + window_size]
		segments.append(segment.iloc[0])  # Take the first label in the segment
	return np.array(segments)

y_test_segments = segment_labels(y_test)

# Evaluate the model
y_pred = model.predict(X_test_segments)
y_pred_binary = (y_pred > 0.5).astype(int)

accuracy = accuracy_score(y_test_segments, y_pred_binary)
precision = precision_score(y_test_segments, y_pred_binary,zero_division=0)
recall = recall_score(y_test_segments, y_pred_binary)
conf_matrix = confusion_matrix(y_test_segments, y_pred_binary)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'Confusion Matrix:\n{conf_matrix}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Accuracy: 0.8260869565217391
Precision: 0.0
Recall: 0.0
Confusion Matrix:
[[19  0]
 [ 4  0]]
