In [159]:
# First, let's import some modules.
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from torch.utils.data import TensorDataset, DataLoader
import torch

Let's define some helper functions to segment the timeseries data to segments of 25 datapoints, with each having a 10-datapoint overlap with the previous and next segments. All the segments have purely the same activity label.

In [170]:
import numpy as np

def segment_data(df, window_size=25, step_size=15):
    segments = []
    labels = []
    for start in range(0, len(df) - window_size, step_size):
        end = start + window_size
        segment = df.iloc[start:end]
        if len(segment) == window_size:
            segment_data = segment[["x", "y", "z"]].values
            segments.append(segment_data)
            label = segment["activity"].mode()[0]
            labels.append(label)
    return np.array(segments), np.array(labels)

def segment_each_activity(df, window_size=25, step_size=15):
    all_segments = []
    all_labels = []
    for activity in df["activity"].unique():
        activity_data = df[df["activity"] == activity]
        
        segments, labels = segment_data(activity_data, window_size, step_size)
        
        all_segments.append(segments)
        all_labels.append(labels)

    all_segments = np.concatenate(all_segments, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    return all_segments, all_labels


Read the data and encode the activity label. Also, let's create a dictionary to create human-readable labels for the activities according to the [dataset description](https://archive.ics.uci.edu/ml/machine-learning-databases/00507/WISDM-dataset-description.pdf).

In [161]:
activity_dict = {1: "Walking", 2: "Jogging", 3: "Stairs", 4: "Sitting", 5: "Standing",
                 6: "Typing", 7: "Brushing Teeth", 8: "Eating Soup", 9: "Eating Chips",
                 10: "Eating Pasta", 11: "Drinking from Cup", 12: "Eating Sandwich",
                 13: "Kicking (Soccer Ball)", 14: "Playing Catch w/Tennis Ball",
                 15: "Dribbling (Basketball)", 16: "Writing", 17: "Clapping", 18: "Folding Clothes"
                 }

phone_accel = pd.read_csv("./raw/phone/accel/data_1600_accel_phone.txt", header=None, names=["user", "activity", "timestamp", "x", "y", "z"])
phone_gyro = pd.read_csv("./raw/phone/gyro/data_1600_gyro_phone.txt", header=None, names=["user", "activity", "timestamp", "x", "y", "z"])
watch_accel = pd.read_csv("./raw/watch/accel/data_1600_accel_watch.txt", header=None, names=["user", "activity", "timestamp", "x", "y", "z"])
watch_gyro = pd.read_csv("./raw/watch/gyro/data_1600_gyro_watch.txt", header=None, names=["user", "activity", "timestamp", "x", "y", "z"])

lencoder = LabelEncoder()
phone_accel["activity"] = lencoder.fit_transform(phone_accel["activity"])
phone_accel["z"] = phone_accel["z"].astype(str).str.replace(";", "", regex=False).astype(float)


In [162]:
for activity in phone_accel["activity"].unique():
    activity_data = phone_accel[phone_accel["activity"] == activity]
    print(f"Activity {activity} shape: {activity_data.shape}")

Activity 0 shape: (3574, 6)
Activity 1 shape: (3572, 6)
Activity 2 shape: (3573, 6)
Activity 3 shape: (3572, 6)
Activity 4 shape: (3572, 6)
Activity 5 shape: (3573, 6)
Activity 6 shape: (3573, 6)
Activity 7 shape: (3572, 6)
Activity 8 shape: (3571, 6)
Activity 9 shape: (3573, 6)
Activity 10 shape: (3573, 6)
Activity 11 shape: (3576, 6)
Activity 12 shape: (3573, 6)
Activity 13 shape: (3572, 6)
Activity 14 shape: (3574, 6)
Activity 15 shape: (3573, 6)
Activity 16 shape: (3573, 6)
Activity 17 shape: (3572, 6)


In [163]:
X, y = segment_each_activity(phone_accel)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


In [164]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=64)

In [165]:
class CNNModel(nn.Module):
    def __init__(self, input_dim=3, output_dim=6):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=64, kernel_size=5)
        self.pool = nn.MaxPool1d(2)
        self.fc1 = nn.Linear(64 * ((X.shape[1] - 4) // 2), output_dim)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # [batch, channels, seq_len]
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)
        return self.fc1(x)


In [166]:
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


In [167]:
def evaluate(model, dataloader, device):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total


In [168]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CNNModel(output_dim=len(np.unique(y))).to(device)  # or CNNModel(...)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    train(model, train_loader, criterion, optimizer, device)
    acc = evaluate(model, test_loader, device)
    print(f"Epoch {epoch+1}, Accuracy: {acc:.2f}")


Epoch 1, Accuracy: 0.69
Epoch 2, Accuracy: 0.85
Epoch 3, Accuracy: 0.81
Epoch 4, Accuracy: 0.90
Epoch 5, Accuracy: 0.88
Epoch 6, Accuracy: 0.90
Epoch 7, Accuracy: 0.91
Epoch 8, Accuracy: 0.91
Epoch 9, Accuracy: 0.90
Epoch 10, Accuracy: 0.92


In [169]:
test_seg_X = X[1000]
test_seg_y = y[1000]
segment_tensor = torch.tensor(test_seg_X, dtype=torch.float32).unsqueeze(0)  

model.eval()
with torch.no_grad():
    output = model(segment_tensor.to(device))  # shape: (1, num_classes)
    predicted_label = torch.argmax(output, dim=1).item()
    print("Predicted activity:", predicted_label)


Predicted activity: 4
