In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler
import os
import numpy as np

class ActivityDataset(Dataset):
    def __init__(self, root, scaler=None):
        self.root = root
        self.scaler = scaler
        self.data = []
        self.label = []
        
        for file in os.listdir(self.root):
            df = pd.read_csv(os.path.join(self.root, file))
            try:
                df.dropna(inplace=True)  # Remove rows with NaN values
                self.data.append(df.drop(columns=['activity_label', 'Unnamed: 0']).values)
                self.label.append(df['activity_label'].values)
            except:
                print(f"Error processing file: {file}")
        
        self.data = np.vstack(self.data)
        self.label = np.hstack(self.label)
        
        if self.scaler is not None:
            self.data = self.scaler.transform(self.data)
        else:
            self.scaler = StandardScaler()
            self.data = self.scaler.fit_transform(self.data)
    
    def __len__(self):
        return len(self.label)
    
    def __getitem__(self, idx):
        features = torch.tensor(self.data[idx], dtype=torch.float32)
        label = torch.tensor(self.label[idx], dtype=torch.long)
        return features, label


In [None]:
df = pd.read_csv("./final_data/MVN-J-Boning-90-001.csv")
df.columns

In [None]:
df.shape

In [None]:
dataset = ActivityDataset(root="final_data")
len(dataset)

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import numpy as np

# # Define the deep learning model
# class ActivityClassifier(nn.Module):
#     def __init__(self, input_dim, hidden_dim, output_dim):
#         super(ActivityClassifier, self).__init__()
        
#         # Increase the depth by adding more layers and units
#         self.fc1 = nn.Linear(input_dim, hidden_dim * 2)   # Double the hidden units
#         self.relu1 = nn.ReLU()
#         self.dropout1 = nn.Dropout(0.3)
        
#         self.fc2 = nn.Linear(hidden_dim * 2, hidden_dim * 2)
#         self.relu2 = nn.ReLU()
#         self.dropout2 = nn.Dropout(0.3)
        
#         self.fc3 = nn.Linear(hidden_dim * 2, hidden_dim * 2)
#         self.relu3 = nn.ReLU()
#         self.dropout3 = nn.Dropout(0.3)

#         self.fc4 = nn.Linear(hidden_dim * 2, hidden_dim)  # Add more depth with a larger hidden layer
#         self.relu4 = nn.ReLU()
#         self.dropout4 = nn.Dropout(0.3)
        
#         self.fc5 = nn.Linear(hidden_dim, hidden_dim)
#         self.relu5 = nn.ReLU()
#         self.dropout5 = nn.Dropout(0.3)
        
#         self.fc6 = nn.Linear(hidden_dim, output_dim)
    
#     def forward(self, x):
#         x = self.fc1(x)
#         x = self.relu1(x)
#         x = self.dropout1(x)
        
#         x = self.fc2(x)
#         x = self.relu2(x)
#         x = self.dropout2(x)
        
#         x = self.fc3(x)
#         x = self.relu3(x)
#         x = self.dropout3(x)

#         x = self.fc4(x)
#         x = self.relu4(x)
#         x = self.dropout4(x)
        
#         x = self.fc5(x)
#         x = self.relu5(x)
#         x = self.dropout5(x)
        
#         x = self.fc6(x)
#         return x

# # Parameters
# input_dim = 828        # Number of input features
# hidden_dim = 1024       # Increased hidden units
# output_dim = 9         # Number of activity classes
# learning_rate = 0.0001
# num_epochs = 20

# Define a Transformer-based activity classifier model
class ActivityClassifierTransformer(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_layers):
        super(ActivityClassifierTransformer, self).__init__()
        
        self.embedding = nn.Linear(input_dim, hidden_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # Final fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        # Embed the input features
        x = self.embedding(x)
        
        # Apply the transformer encoder
        x = self.transformer(x)
        
        # Take the mean of the transformer output (pooling) for classification
        x = x.mean(dim=1)
        
        # Output layer
        x = self.fc(x)
        return x

# Parameters
input_dim = 828        # Number of input features
hidden_dim = 256       # Hidden dimension for transformer
output_dim = 9         # Number of activity classes
num_heads = 8          # Number of attention heads in transformer
num_layers = 4         # Number of transformer layers
learning_rate = 0.001
num_epochs = 20

# Initialize the model, loss function, and optimizer
model = ActivityClassifierTransformer(input_dim, hidden_dim, output_dim, num_heads, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Initialize dataset and split into training and validation sets
dataset = ActivityDataset(root="final_data")
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# # # Initialize the model, loss function, and optimizer
# model = ActivityClassifier(input_dim, hidden_dim, output_dim)
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# # Training loop
# for epoch in range(num_epochs):
#     model.train()
#     for features, labels in train_loader:
#         optimizer.zero_grad()
#         outputs = model(features)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
    
#     # Validation step
#     model.eval()
#     val_loss = 0.0
#     correct = 0
#     total = 0
#     with torch.no_grad():
#         for features, labels in val_loader:
#             outputs = model(features)
#             loss = criterion(outputs, labels)
#             val_loss += loss.item()
            
#             _, predicted = torch.max(outputs, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
    
#     val_accuracy = correct / total
#     print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.4f}")

# Training loop
for epoch in range(num_epochs):
    model.train()
    for features, labels in train_loader:
        # Ensure features have the correct shape for the transformer
        features = features.view(features.size(0), 1, input_dim)  # Reshape to [batch_size, seq_length, feature_dim]
        
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    # Validation step
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in val_loader:
            features = features.view(features.size(0), 1, input_dim)  # Reshape for validation as well
            outputs = model(features)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    val_accuracy = correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.4f}")

# After training, evaluate the model on the validation set
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Print classification report
from sklearn.metrics import classification_report
print("\nClassification Report:\n", classification_report(all_labels, all_preds))




Epoch 1/20, Loss: 2.1519, Validation Loss: 1.2953, Validation Accuracy: 0.5464
Epoch 2/20, Loss: 2.0044, Validation Loss: 1.3417, Validation Accuracy: 0.5623
Epoch 3/20, Loss: 2.0550, Validation Loss: 1.4045, Validation Accuracy: 0.5000
Epoch 4/20, Loss: 1.9899, Validation Loss: 1.4157, Validation Accuracy: 0.4989
Epoch 5/20, Loss: 1.8943, Validation Loss: 1.4829, Validation Accuracy: 0.4779
Epoch 6/20, Loss: 2.0288, Validation Loss: 1.6297, Validation Accuracy: 0.4779
Epoch 7/20, Loss: 1.9525, Validation Loss: 1.6171, Validation Accuracy: 0.4779
Epoch 8/20, Loss: 1.9157, Validation Loss: 1.6292, Validation Accuracy: 0.4779


KeyboardInterrupt: 