In [2]:
import os
import zipfile
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import joblib
import shutil  
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# --- 32-feature extractor ---
def extract_32_features(acc_window: np.ndarray, gyro_window: np.ndarray) -> np.ndarray:
    features = []
    # Accelerometer stats (x,y,z)
    for i in range(3):
        data = acc_window[:, i]
        features.extend([data.mean(), data.std(), data.min(), data.max()])
    # Gyroscope stats (x,y,z)
    for i in range(3):
        data = gyro_window[:, i]
        features.extend([data.mean(), data.std(), data.min(), data.max()])
    # Magnitude stats
    for mag in (np.linalg.norm(acc_window, axis=1), np.linalg.norm(gyro_window, axis=1)):
        features.extend([mag.mean(), mag.std(), mag.min(), mag.max()])
    return np.array(features, dtype=np.float32)

# # 1. Extract dataset
# zip_path = "/content/UCI HAR Dataset.zip"  # adjust if needed
# extract_path = "/content/"
# with zipfile.ZipFile(zip_path, 'r') as z:
#     z.extractall(extract_path)
DATASET_PATH = 'UCI HAR Dataset'

# 2. Dataset class
class HARDataset(Dataset):
    def __init__(self, dataset_path, split='train'):
        self.dataset_path = dataset_path
        # load raw windows
        def load_signal(name):
            fname = os.path.join(dataset_path, split, 'Inertial Signals', f'{name}_{split}.txt')
            return np.loadtxt(fname)
        # stack windows
        acc = np.stack([
            load_signal('total_acc_x'),
            load_signal('total_acc_y'),
            load_signal('total_acc_z')
        ], axis=2)
        gyro = np.stack([
            load_signal('body_gyro_x'),
            load_signal('body_gyro_y'),
            load_signal('body_gyro_z')
        ], axis=2)
        # extract features per window
        X = np.array([extract_32_features(acc[i], gyro[i]) for i in range(acc.shape[0])])
        y = np.loadtxt(os.path.join(dataset_path, split, f'y_{split}.txt'), dtype=int) - 1  # zero-based
        self.X = X
        self.y = y.astype(np.int64)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# 3. Preprocessing
train_dataset = HARDataset(DATASET_PATH, 'train')
test_dataset = HARDataset(DATASET_PATH, 'test')

# Fit scaler and encoder on train
scaler = StandardScaler()
train_X = scaler.fit_transform(train_dataset.X)
test_X = scaler.transform(test_dataset.X)
label_enc = LabelEncoder()
train_y = label_enc.fit_transform(train_dataset.y)
test_y = label_enc.transform(test_dataset.y)

# Convert to tensors and wrap back into dataset
train_dataset.X = torch.from_numpy(train_X)
train_dataset.y = torch.from_numpy(train_y)
test_dataset.X = torch.from_numpy(test_X)
test_dataset.y = torch.from_numpy(test_y)

# Split validation from train
X_tr, X_val, y_tr, y_val = train_test_split(
    train_dataset.X.numpy(), train_dataset.y.numpy(),
    test_size=0.2, stratify=train_dataset.y.numpy(), random_state=42
)
train_dataset.X = torch.from_numpy(X_tr)
train_dataset.y = torch.from_numpy(y_tr)
val_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))

# 4. DataLoaders
batch_size = 1024
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 5. Model definition
class HARNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256), nn.ReLU(), nn.BatchNorm1d(256), nn.Dropout(0.5),
            nn.Linear(256, 128), nn.ReLU(), nn.BatchNorm1d(128), nn.Dropout(0.4),
            nn.Linear(128, 64), nn.ReLU(), nn.BatchNorm1d(64), nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)

# initialize
input_dim = train_dataset.X.shape[1]
num_classes = len(label_enc.classes_)
model = HARNet(input_dim, num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# 6. Training loop with validation & checkpoint
best_val_loss = float('inf')
epochs, patience = 200, 15
patience_counter = 0
checkpoint_path = 'artifacts32/best_model.pth'

for epoch in range(1, epochs+1):
    model.train()
    train_loss, correct, total = 0, 0, 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        logits = model(X_batch)
        loss = criterion(logits, y_batch)
        loss.backward(); optimizer.step()
        train_loss += loss.item() * X_batch.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y_batch).sum().item()
        total += X_batch.size(0)
    train_loss /= total; train_acc = correct/total

    # validation
    model.eval()
    val_loss, val_correct, val_total = 0, 0, 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            logits = model(X_batch)
            loss = criterion(logits, y_batch)
            val_loss += loss.item() * X_batch.size(0)
            preds = logits.argmax(dim=1)
            val_correct += (preds == y_batch).sum().item()
            val_total += X_batch.size(0)
    val_loss /= val_total; val_acc = val_correct/val_total
    print(f"Epoch {epoch}: Train loss {train_loss:.4f}, acc {train_acc:.4f} | Val loss {val_loss:.4f}, acc {val_acc:.4f}")

    # early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), checkpoint_path)
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping.")
            break

# 7. Evaluation on test set
model.load_state_dict(torch.load(checkpoint_path))
model.eval()
all_preds, all_targets = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        logits = model(X_batch)
        all_preds.extend(logits.argmax(dim=1).cpu().numpy())
        all_targets.extend(y_batch.numpy())

from sklearn.metrics import classification_report
print(classification_report(all_targets, all_preds, target_names=label_enc.classes_.astype(str)))

# 8. Save artifacts
os.makedirs('artifacts', exist_ok=True)
torch.save(model.state_dict(), 'artifacts32/best_model.pth')
joblib.dump(scaler, 'artifacts32/scaler.joblib')
joblib.dump(label_enc, 'artifacts32/label_encoder.joblib')
shutil.copy(
    os.path.join(DATASET_PATH, 'activity_labels.txt'),
    'artifacts32/activity_labels.txt'
)

print("Artifacts saved to ./artifacts32/")


Epoch 1: Train loss 1.3937, acc 0.4882 | Val loss 1.5314, acc 0.5452
Epoch 2: Train loss 0.8464, acc 0.7269 | Val loss 1.0891, acc 0.7845
Epoch 3: Train loss 0.6630, acc 0.7800 | Val loss 0.7267, acc 0.8083
Epoch 4: Train loss 0.5526, acc 0.8158 | Val loss 0.5191, acc 0.8470
Epoch 5: Train loss 0.4848, acc 0.8332 | Val loss 0.4155, acc 0.8647
Epoch 6: Train loss 0.4408, acc 0.8507 | Val loss 0.3612, acc 0.8783
Epoch 7: Train loss 0.4053, acc 0.8629 | Val loss 0.3260, acc 0.8838
Epoch 8: Train loss 0.3753, acc 0.8689 | Val loss 0.3044, acc 0.8851
Epoch 9: Train loss 0.3476, acc 0.8791 | Val loss 0.2863, acc 0.8899
Epoch 10: Train loss 0.3377, acc 0.8822 | Val loss 0.2737, acc 0.8967
Epoch 11: Train loss 0.3261, acc 0.8796 | Val loss 0.2634, acc 0.9014
Epoch 12: Train loss 0.3181, acc 0.8825 | Val loss 0.2604, acc 0.8926
Epoch 13: Train loss 0.2982, acc 0.8925 | Val loss 0.2485, acc 0.9062
Epoch 14: Train loss 0.2870, acc 0.8934 | Val loss 0.2430, acc 0.9021
Epoch 15: Train loss 0.2829, 

In [5]:
import os
import zipfile
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import joblib
import shutil
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# --- 32-feature extractor with optional jerk features ---
def extract_32_features(acc_window: np.ndarray, gyro_window: np.ndarray) -> np.ndarray:
    features = []
    # Accelerometer stats (x,y,z)
    for data in [acc_window[:, i] for i in range(3)]:
        features.extend([data.mean(), data.std(), data.min(), data.max()])
    # Gyroscope stats (x,y,z)
    for data in [gyro_window[:, i] for i in range(3)]:
        features.extend([data.mean(), data.std(), data.min(), data.max()])
    # Magnitude stats
    for mag in (np.linalg.norm(acc_window, axis=1), np.linalg.norm(gyro_window, axis=1)):
        features.extend([mag.mean(), mag.std(), mag.min(), mag.max()])
    return np.array(features, dtype=np.float32)

# # 1. Extract dataset
# zip_path = "/content/UCI HAR Dataset.zip"  # adjust if needed
# extract_path = "/content/"
# with zipfile.ZipFile(zip_path, 'r') as z:
#     z.extractall(extract_path)
DATASET_PATH = 'UCI HAR Dataset'

# 2. Dataset class
class HARDataset(Dataset):
    def __init__(self, dataset_path, split='train'):
        self.dataset_path = dataset_path
        self.split = split
        self.X, self.y = self._load_data()

    def _load_data(self):
        # load raw signals
        def load_signal(name):
            fname = os.path.join(self.dataset_path, self.split, 'Inertial Signals', f'{name}_{self.split}.txt')
            return np.loadtxt(fname)
        acc = np.stack([load_signal(f'total_acc_{axis}') for axis in ('x','y','z')], axis=2)
        gyro = np.stack([load_signal(f'body_gyro_{axis}') for axis in ('x','y','z')], axis=2)
        # feature extraction
        X = np.array([extract_32_features(acc[i], gyro[i]) for i in range(acc.shape[0])])
        y = np.loadtxt(os.path.join(self.dataset_path, self.split, f'y_{self.split}.txt'), dtype=int) - 1
        return X, y.astype(np.int64)

    def __len__(self): return len(self.y)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

# 3. Prepare datasets and scalers
train_ds = HARDataset(DATASET_PATH, 'train')
test_ds = HARDataset(DATASET_PATH, 'test')

scaler = StandardScaler().fit(train_ds.X)
train_X = scaler.transform(train_ds.X)
test_X = scaler.transform(test_ds.X)
label_enc = LabelEncoder().fit(train_ds.y)
train_y = label_enc.transform(train_ds.y)
test_y = label_enc.transform(test_ds.y)

# Convert to tensors
train_ds.X, train_ds.y = torch.from_numpy(train_X), torch.from_numpy(train_y)
test_ds.X, test_ds.y = torch.from_numpy(test_X), torch.from_numpy(test_y)

# Split validation
X_tr, X_val, y_tr, y_val = train_test_split(
    train_ds.X.numpy(), train_ds.y.numpy(),
    test_size=0.2, stratify=train_ds.y.numpy(), random_state=42
)
train_ds.X, train_ds.y = torch.from_numpy(X_tr), torch.from_numpy(y_tr)
val_ds = torch.utils.data.TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))

# DataLoaders
batch_size = 512  # reduced for more gradient updates
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size)
test_loader = DataLoader(test_ds, batch_size=batch_size)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 4. Improved Model with additional layer and LeakyReLU
class HARNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512), nn.LeakyReLU(), nn.BatchNorm1d(512), nn.Dropout(0.5),
            nn.Linear(512, 256), nn.LeakyReLU(), nn.BatchNorm1d(256), nn.Dropout(0.4),
            nn.Linear(256, 128), nn.LeakyReLU(), nn.BatchNorm1d(128), nn.Dropout(0.3),
            nn.Linear(128, 64), nn.LeakyReLU(), nn.BatchNorm1d(64), nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )

    def forward(self, x): return self.net(x)

# Initialize
model = HARNet(train_ds.X.shape[1], len(label_enc.classes_)).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

# 5. Training loop
best_val_loss = float('inf')
epochs, patience = 100, 10
patience_counter = 0
checkpoint = 'artifacts/best_model.pth'

for epoch in range(1, epochs+1):
    model.train()
    total_loss, total_correct, total_samples = 0, 0, 0
    for Xb, yb in train_loader:
        Xb, yb = Xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(Xb)
        loss = criterion(out, yb)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total_loss += loss.item() * Xb.size(0)
        preds = out.argmax(dim=1)
        total_correct += (preds == yb).sum().item()
        total_samples += Xb.size(0)
    train_loss = total_loss/total_samples
    train_acc = total_correct/total_samples

    # Validation
    model.eval()
    val_loss, val_correct, val_samples = 0, 0, 0
    with torch.no_grad():
        for Xb, yb in val_loader:
            Xb, yb = Xb.to(device), yb.to(device)
            out = model(Xb)
            loss = criterion(out, yb)
            val_loss += loss.item() * Xb.size(0)
            preds = out.argmax(dim=1)
            val_correct += (preds == yb).sum().item()
            val_samples += Xb.size(0)
    val_loss /= val_samples
    val_acc = val_correct/val_samples
    scheduler.step(val_loss)
    print(f"Epoch {epoch}: Train {train_loss:.4f}/{train_acc:.4f} | Val {val_loss:.4f}/{val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss, patience_counter = val_loss, 0
        torch.save(model.state_dict(), checkpoint)
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping")
            break

# 6. Test evaluation
model.load_state_dict(torch.load(checkpoint))
model.eval()
all_preds, all_targets = [], []
with torch.no_grad():
    for Xb, yb in test_loader:
        Xb = Xb.to(device)
        out = model(Xb)
        all_preds.extend(out.argmax(dim=1).cpu().numpy())
        all_targets.extend(yb.numpy())

from sklearn.metrics import classification_report
print(classification_report(all_targets, all_preds, target_names=label_enc.classes_.astype(str)))

# 7. Save artifacts
os.makedirs('artifacts', exist_ok=True)
torch.save(model.state_dict(), checkpoint)
joblib.dump(scaler, 'artifacts/scaler.joblib')
joblib.dump(label_enc, 'artifacts/label_encoder.joblib')
shutil.copy(
    os.path.join(DATASET_PATH, 'activity_labels.txt'),
    'artifacts/activity_labels.txt'
)
print("Artifacts saved to /artifacts/")

Epoch 1: Train 0.9838/0.6419 | Val 0.9893/0.7648
Epoch 2: Train 0.5163/0.8240 | Val 0.4162/0.8532
Epoch 3: Train 0.3868/0.8653 | Val 0.3000/0.8702
Epoch 4: Train 0.3236/0.8832 | Val 0.2553/0.8906
Epoch 5: Train 0.2844/0.8954 | Val 0.2434/0.8994
Epoch 6: Train 0.2684/0.9015 | Val 0.2145/0.9157
Epoch 7: Train 0.2461/0.9089 | Val 0.2074/0.9177
Epoch 8: Train 0.2445/0.9124 | Val 0.2097/0.9225
Epoch 9: Train 0.2250/0.9167 | Val 0.2086/0.9232
Epoch 10: Train 0.2105/0.9235 | Val 0.1870/0.9273
Epoch 11: Train 0.2086/0.9189 | Val 0.1803/0.9320
Epoch 12: Train 0.2047/0.9247 | Val 0.1930/0.9252
Epoch 13: Train 0.1966/0.9259 | Val 0.1815/0.9252
Epoch 14: Train 0.1941/0.9259 | Val 0.1745/0.9327
Epoch 15: Train 0.1828/0.9325 | Val 0.1668/0.9361
Epoch 16: Train 0.1723/0.9342 | Val 0.1624/0.9341
Epoch 17: Train 0.1711/0.9345 | Val 0.1657/0.9354
Epoch 18: Train 0.1803/0.9305 | Val 0.1518/0.9388
Epoch 19: Train 0.1628/0.9366 | Val 0.1640/0.9341
Epoch 20: Train 0.1697/0.9339 | Val 0.1464/0.9395
Epoch 21: