## Data Pre-Processing

In [1]:
import pdm_functions as fns
import torch 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

In [2]:
f_normal = pd.read_csv('../dataset/5528_drop_imbalance_normal.csv')
f_error = pd.read_csv('../dataset/5528_drop_imbalance_error.csv')

In [3]:
f_normal['created_at'] = pd.to_datetime(f_normal['created_at'], unit='s')
f_normal = f_normal.sort_values(by='created_at')
 
f_error['created_at'] = pd.to_datetime(f_error['created_at'], unit='s')
f_error = f_error.sort_values(by='created_at')

f_normal = f_normal.drop(columns=['asset_id', 'created_at', 'created_at_datetime', 'looseness_health', 'time','misalignment_health', 'bearing_health', 'imbalance_health'])
f_error = f_error.drop(columns=['asset_id', 'created_at', 'created_at_datetime', 'looseness_health', 'time','misalignment_health', 'bearing_health', 'imbalance_health'])

In [4]:
f_normal_tensor = torch.tensor(f_normal.values, dtype=torch.float32)
f_error_tensor = torch.tensor(f_error.values, dtype=torch.float32)

f_normal_tensor.shape, f_error_tensor.shape

(torch.Size([2220, 38]), torch.Size([192, 38]))

In [5]:
f_normal_test = f_normal_tensor[:1200]
f_normal_train = f_normal_tensor[1200:]

f_normal_train.shape, f_normal_test.shape

(torch.Size([1020, 38]), torch.Size([1200, 38]))

In [6]:
f_normal_label = torch.ones((1200))
f_error_label = torch.zeros((192))

In [7]:
f_normal_label.shape

torch.Size([1200])

In [8]:
f_test = torch.concat((f_normal_test, f_error_tensor), dim = 0) 
f_test_label = torch.concat((f_normal_label, f_error_label), dim = 0)

In [9]:
f_test.shape, f_test_label.shape

(torch.Size([1392, 38]), torch.Size([1392]))

In [10]:
len(f_test_label[f_test_label != 1])

192

In [11]:
f_normal_train.shape

torch.Size([1020, 38])

In [12]:
f_normal_train = f_normal_train.reshape((-1, 1, 12, 38))
f_normal_train = fns.multi_datasets_stacks(f_normal_train, multi_dim = 13, num_groups = 12 )
f_normal_train.shape

100%|█████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 342.11it/s]


torch.Size([1105, 1, 12, 38])

In [13]:
f_normal_train = f_normal_train.reshape(-1, 38)
f_normal_train.shape

torch.Size([13260, 38])

In [14]:
torch.save({'train' : f_normal_train, 'test' : f_test, 'test_label': f_test_label}, 'datasets/auto_encoder_supervised.pt')

In [15]:
data = torch.load('datasets/auto_encoder_supervised.pt')

  data = torch.load('datasets/auto_encoder_supervised.pt')


In [16]:
train_data = data['train']
test_data =data['test']
test_label = data['test_label']

In [17]:
scaler = MinMaxScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.fit_transform(test_data)

In [18]:
train_data = torch.tensor(train_data, dtype = torch.float32)
test_data = torch.tensor(test_data, dtype = torch.float32)

In [19]:
train_data = train_data.reshape((-1, 456))
test_data = test_data.reshape((-1, 456))
train_data.shape, test_data.shape

(torch.Size([1105, 456]), torch.Size([116, 456]))

In [20]:
test_label = test_label.reshape((-1,12))[:,0]
test_label.shape

torch.Size([116])

## <span style='color:white'> ================================================================================================================================================================= </span>

## Model Running

In [21]:
import torch.nn as nn

class AnomalyDetector(nn.Module):
    def __init__(self):
        super(AnomalyDetector, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(456, 228),
            nn.ReLU(),
            nn.Linear(228, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 2),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 228),
            nn.ReLU(),
            nn.Linear(228, 456),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Initialize model
autoencoder = AnomalyDetector()

In [22]:
criterion = nn.L1Loss()  # MAE in PyTorch
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001)

In [23]:
train_dataset = TensorDataset(train_data)
test_dataset = TensorDataset(test_data, test_label)

In [24]:
dataloader_train = DataLoader(train_dataset, batch_size = 512, shuffle = True)
dataloader_test = DataLoader(test_dataset, batch_size = 512, shuffle = True)

In [25]:
num_epochs = 20

for epoch in range(num_epochs):
    autoencoder.train()
    running_loss = 0.0
    for x_batch in dataloader_train:
        # inputs = x_batch
        outputs = autoencoder(x_batch[0])
        loss = criterion(outputs, x_batch[0])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader_train)}")

Epoch [1/20], Loss: 0.32682206233342487
Epoch [2/20], Loss: 0.3228279451529185
Epoch [3/20], Loss: 0.31604917844136554
Epoch [4/20], Loss: 0.3058888912200928
Epoch [5/20], Loss: 0.28914694984753925
Epoch [6/20], Loss: 0.26719648639361065
Epoch [7/20], Loss: 0.2352629005908966
Epoch [8/20], Loss: 0.1932392716407776
Epoch [9/20], Loss: 0.149220272898674
Epoch [10/20], Loss: 0.12060920894145966
Epoch [11/20], Loss: 0.11775658776362737
Epoch [12/20], Loss: 0.12220300237337749
Epoch [13/20], Loss: 0.1216493546962738
Epoch [14/20], Loss: 0.11789658665657043
Epoch [15/20], Loss: 0.11205101509888966
Epoch [16/20], Loss: 0.10610700647036235
Epoch [17/20], Loss: 0.10250098258256912
Epoch [18/20], Loss: 0.10433249175548553
Epoch [19/20], Loss: 0.10323866705099742
Epoch [20/20], Loss: 0.10107635706663132


In [26]:
from sklearn.metrics import confusion_matrix

In [50]:
autoencoder.eval()  # Set the model to evaluation mode
total_loss = 0.0
correct = 0
# error_correct = 0
total = 0
correct_cnt = 0
neg_pred_cnt = 0

threshold = 0.048

all_labels = []
all_predictions = []

with torch.no_grad():  # No gradients needed for evaluation
    for x_batch, labels in dataloader_test:  # Assuming labels are included in your test loader
        # print(x_batch.shape)
        outputs = autoencoder(x_batch)
        loss = criterion(outputs, x_batch)  # Calculate reconstruction loss
        total_loss += loss.item()
        # Calculate reconstruction error
        reconstruction_error = torch.mean((outputs - x_batch) ** 2, dim = 1 )
        print(reconstruction_error)
        # Identify anomalies based on the threshold
        predictions = (reconstruction_error > threshold).float() 

        # Compare predictions with actual labels
        # error_mask = (labels == 0)
        # error_correct += ((predictions[error_mask] == 0).sum()).item()  
        # correct += (predictions == labels).sum().item()
        total += labels.size(0)
        all_labels.extend(labels.cpu().numpy())  # Move to CPU and convert to numpy
        all_predictions.extend(predictions.cpu().numpy())

# Calculate average loss and accuracy
conf_matrix = confusion_matrix(all_labels, all_predictions)
average_loss = total_loss / len(dataloader_test)
accuracy = correct / total

TN = conf_matrix[0, 0]  
FP = conf_matrix[0, 1] 
FN = conf_matrix[1, 0]  
TP = conf_matrix[1, 1]  
Neg_precision = TN/(FN+TN)
Neg_recall =TN/(FP+TN)

for i in range(len(all_labels)) :
    if all_labels[i] == 0 and all_predictions[i] == 0:
        correct_cnt += 1
for pred in all_predictions:
    if pred == 0:
        neg_pred_cnt += 1

print(f"Test Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f},")
print(f'Neg_Precision: {Neg_precision:.4f} = {TN}/{FN + TN}, Neg_Recall: {Neg_recall:.4f} = {TN}/{FP + TN}, Neg_F1: {(Neg_precision * Neg_recall)/(Neg_precision + Neg_recall):.4f}') 
print(f'correct_cnt {correct_cnt}, neg_pred_cnt {neg_pred_cnt}')



tensor([0.0204, 0.0512, 0.0191, 0.0124, 0.0216, 0.0261, 0.0114, 0.0349, 0.0200,
        0.0492, 0.0308, 0.0304, 0.0709, 0.0337, 0.0139, 0.0375, 0.0409, 0.0530,
        0.0693, 0.0365, 0.0528, 0.0146, 0.0210, 0.0346, 0.0685, 0.0368, 0.0688,
        0.0306, 0.0292, 0.0436, 0.0676, 0.0278, 0.0319, 0.0346, 0.0370, 0.0294,
        0.0690, 0.0311, 0.0217, 0.0574, 0.0298, 0.0609, 0.0148, 0.0539, 0.0474,
        0.0367, 0.0361, 0.0274, 0.0500, 0.0229, 0.0285, 0.0225, 0.0299, 0.0382,
        0.0269, 0.0225, 0.0408, 0.0221, 0.0183, 0.0240, 0.0419, 0.0353, 0.0386,
        0.0277, 0.0337, 0.0333, 0.0258, 0.0451, 0.0115, 0.0303, 0.0236, 0.0251,
        0.0193, 0.0470, 0.0130, 0.0254, 0.0669, 0.0474, 0.0434, 0.0175, 0.0171,
        0.0398, 0.0770, 0.0420, 0.0497, 0.0545, 0.0367, 0.0423, 0.0349, 0.0174,
        0.0649, 0.0482, 0.0364, 0.0530, 0.0367, 0.0162, 0.0222, 0.0357, 0.0316,
        0.0352, 0.0784, 0.0588, 0.0205, 0.0286, 0.0190, 0.0356, 0.0438, 0.0342,
        0.0250, 0.0123, 0.0165, 0.0510, 