### Step 1 Data Subset Creation

In [1]:
# src/har_ternary_classification.py

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load the dataset
def load_data():
    # Load training data
    X_train = pd.read_csv('../data/UCI HAR Dataset/train/X_train.txt', sep='\\s+', header=None)
    y_train = pd.read_csv('../data/UCI HAR Dataset/train/y_train.txt', sep='\\s+', header=None)
    
    # Load test data
    X_test = pd.read_csv('../data/UCI HAR Dataset/test/X_test.txt', sep='\\s+', header=None)
    y_test = pd.read_csv('../data/UCI HAR Dataset/test/y_test.txt', sep='\\s+', header=None)
    
    return X_train, y_train, X_test, y_test


# Update the filtering function to remap labels (1->0, 4->1, 5->2)
def filter_ternary_classification(X, y):
    mapping = {1: 0, 4: 1, 5: 2}
    mask = y[0].isin(mapping.keys())
    y_ternary = y[mask].replace(mapping)
    X = X[mask]
    return X, y_ternary

# X_test, y_test = filter_ternary_classification(X_test, y_test)

X_train, y_train, X_test, y_test = load_data()
X_train, y_train = filter_ternary_classification(X_train, y_train)
X_test, y_test = filter_ternary_classification(X_test, y_test)

### Step 2 Data preprocessing

In [3]:
# src/har_ternary_classification.py
from sklearn.utils.class_weight import compute_class_weight
# Normalize the data
scaler = MinMaxScaler(feature_range=(-1, 1))
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Segment the data into fixed-size windows
def segment_data(X, window_size=128):
    segments = []
    for start in range(0, len(X) - window_size + 1, window_size):
        segment = X[start:start + window_size]
        segments.append(segment)
    return np.array(segments)

# Updated segment_labels to extract a scalar label for each segment
def segment_labels(y, window_size=128):
    segments = []
    for start in range(0, len(y) - window_size + 1, window_size):
        segment = y[start:start + window_size]
        # If y is a DataFrame use y.iloc[start, 0]; if a Series, use int(y.iloc[start])
        segments.append(int(segment.iloc[0]))
    return np.array(segments)

X_train_segments = segment_data(X_train_scaled)
X_test_segments = segment_data(X_test_scaled)
y_train_segments = segment_labels(y_train)
y_test_segments = segment_labels(y_test)

X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train_segments, y_train_segments, test_size=0.2, random_state=42)

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_segments), y=y_train_segments)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

  segments.append(int(segment.iloc[0]))


### Step 3 Model Building & Training

In [4]:
# src/har_ternary_classification.py

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Convert the labels to categorical
y_train_split = to_categorical(y_train_split-1, num_classes=3)
y_val_split = to_categorical(y_val_split-1,num_classes=3)
y_test_segments = to_categorical(y_test_segments-1,num_classes=3)


# Build a simple CNN model with Dropout layers
def build_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        Conv1D(64, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Dropout(0.5),
        Conv1D(128, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Dropout(0.5),
        Flatten(),
        Dense(100, activation='relu'),
        Dropout(0.5),
        Dense(3, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


input_shape = (X_train_segments.shape[1], X_train_segments.shape[2])
model = build_model(input_shape)

# class_weights = {0: 1.0, 1: 2.0}  # Class weights for ternary classification
# Train the model
history = model.fit(X_train_split, y_train_split, epochs=20, batch_size=32, validation_data=(X_val_split, y_val_split),class_weight=class_weights_dict)

Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.2500 - loss: 1.7415 - val_accuracy: 0.5000 - val_loss: 0.9389
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step - accuracy: 0.3750 - loss: 2.3856 - val_accuracy: 0.6667 - val_loss: 0.8848
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step - accuracy: 0.2917 - loss: 1.4663 - val_accuracy: 0.1667 - val_loss: 1.0006
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step - accuracy: 0.4167 - loss: 1.4183 - val_accuracy: 0.1667 - val_loss: 1.1597
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step - accuracy: 0.5417 - loss: 1.2993 - val_accuracy: 0.1667 - val_loss: 1.1482
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step - accuracy: 0.5000 - loss: 1.1610 - val_accuracy: 0.6667 - val_loss: 0.9535
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━

### Step 4 Evaluation

In [5]:
# src/har_ternary_classification.py

from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score
import json
# Evaluate the model
y_pred = model.predict(X_test_segments)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test_segments, axis=1)

accuracy = accuracy_score(y_true_classes, y_pred_classes)
precision = precision_score(y_true_classes, y_pred_classes, average='weighted')
recall = recall_score(y_true_classes, y_pred_classes, average='weighted')
f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')
conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'Confusion Matrix:\n{conf_matrix}')

# # Save the evaluation metrics
# metrics = {
#     'accuracy': accuracy,
#     'precision': precision,
#     'recall': recall,
#     'f1_score': f1,
#     'confusion_matrix': conf_matrix.tolist()  # Convert numpy array to list for JSON serialization
# }

# with open('../outputs/har_ternary_classifier_metrics.json', 'w') as f:
#     json.dump(metrics, f)

# # Save the trained model
# model.save('../outputs/har_ternary_classifier.h5')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
Accuracy: 0.7272727272727273
Precision: 0.8636363636363636
Recall: 0.7272727272727273
F1 Score: 0.7359307359307359
Confusion Matrix:
[[2 0 2]
 [0 3 1]
 [0 0 3]]


In [6]:
import json

# Save the evaluation metrics
metrics = {
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'f1_score': f1,
    'confusion_matrix': conf_matrix.tolist()  # Convert numpy array to list for JSON serialization
}

with open('../outputs/har_ternary_classifier_metrics.json', 'w') as f:
    json.dump(metrics, f)
    

In [7]:
model.save('../outputs/har_ternary_classifier.h5')

