## Data Pre-Processing

In [5]:
import pdm_functions as fns
import torch 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

In [6]:
f_normal = pd.read_csv('../dataset/5528_drop_imbalance_normal.csv')
f_error = pd.read_csv('../dataset/5528_drop_imbalance_error.csv')

In [7]:
f_normal['created_at'] = pd.to_datetime(f_normal['created_at'], unit='s')
f_normal = f_normal.sort_values(by='created_at')
 
f_error['created_at'] = pd.to_datetime(f_error['created_at'], unit='s')
f_error = f_error.sort_values(by='created_at')

f_normal = f_normal.drop(columns=['asset_id', 'created_at', 'created_at_datetime', 'looseness_health', 'time','misalignment_health', 'bearing_health', 'imbalance_health'])
f_error = f_error.drop(columns=['asset_id', 'created_at', 'created_at_datetime', 'looseness_health', 'time','misalignment_health', 'bearing_health', 'imbalance_health'])

In [8]:
f_normal_tensor = torch.tensor(f_normal.values, dtype=torch.float32)
f_error_tensor = torch.tensor(f_error.values, dtype=torch.float32)

f_normal_tensor.shape, f_error_tensor.shape

(torch.Size([2220, 38]), torch.Size([192, 38]))

In [9]:
f_normal_test = f_normal_tensor[:1200]
f_normal_train = f_normal_tensor[1200:]

f_normal_train.shape, f_normal_test.shape

(torch.Size([1020, 38]), torch.Size([1200, 38]))

In [14]:
f_normal_label = torch.ones((1200))
f_error_label = torch.zeros((192))

In [15]:
f_normal_label.shape

torch.Size([1200])

In [16]:
f_test = torch.concat((f_normal_test, f_error_tensor), dim = 0) 
f_test_label = torch.concat((f_normal_label, f_error_label), dim = 0)

In [17]:
f_test.shape, f_test_label.shape

(torch.Size([1392, 38]), torch.Size([1392]))

In [21]:
len(f_test_label[f_test_label != 1])

192

In [24]:
f_normal_train.shape

torch.Size([1020, 38])

In [25]:
f_normal_train = f_normal_train.reshape((-1, 1, 12, 38))
f_normal_train = fns.multi_datasets_stacks(f_normal_train, multi_dim = 13, num_groups = 12 )
f_normal_train.shape

100%|█████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 406.28it/s]


torch.Size([1105, 1, 12, 38])

In [26]:
f_normal_train = f_normal_train.reshape(-1, 38)
f_normal_train.shape

torch.Size([13260, 38])

In [54]:
torch.save({'train' : f_normal_train, 'test' : f_test, 'test_label': f_test_label}, 'datasets/auto_encoder_supervised.pt')

## <span style='color:white'> ================================================================================================================================================================= </span>

## Model Running

In [55]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset

In [56]:
data = torch.load('datasets/auto_encoder_supervised.pt')

  data = torch.load('datasets/auto_encoder_supervised.pt')


In [57]:
train_data = data['train']
test_data =data['test']
test_label = data['test_label']

In [58]:
scaler = MinMaxScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.fit_transform(test_data)

In [59]:
train_data = torch.tensor(train_data, dtype = torch.float32)
test_data = torch.tensor(test_data, dtype = torch.float32)

In [60]:
import torch.nn as nn

class AnomalyDetector(nn.Module):
    def __init__(self):
        super(AnomalyDetector, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(38, 16),
            nn.ReLU(),
            nn.Linear(16, 4),
            nn.ReLU(),
            nn.Linear(4, 2),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 16),
            nn.ReLU(),
            nn.Linear(16, 38),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Initialize model
autoencoder = AnomalyDetector()

In [40]:
criterion = nn.L1Loss()  # MAE in PyTorch
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001)

In [61]:
# train_dataset = TensorDataset(train_data)
test_dataset = TensorDataset(test_data, test_label)

In [62]:
# dataloader_train = DataLoader(train_dataset, batch_size = 512, shuffle = True)
dataloader_test = DataLoader(test_dataset, batch_size = 512, shuffle = True)

In [63]:
num_epochs = 20

for epoch in range(num_epochs):
    autoencoder.train()
    running_loss = 0.0
    for x_batch in dataloader_train:
        # inputs = x_batch
        outputs = autoencoder(x_batch[0])
        loss = criterion(outputs, x_batch[0])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader_train)}")

Epoch [1/20], Loss: 0.31725832246817076
Epoch [2/20], Loss: 0.31728509297737706
Epoch [3/20], Loss: 0.3172836578809298
Epoch [4/20], Loss: 0.31725933803961825
Epoch [5/20], Loss: 0.3172763150471907
Epoch [6/20], Loss: 0.3172508397927651
Epoch [7/20], Loss: 0.31729149245298827
Epoch [8/20], Loss: 0.3172851949930191
Epoch [9/20], Loss: 0.3172650704017052
Epoch [10/20], Loss: 0.31728053322205174
Epoch [11/20], Loss: 0.31727377611857194
Epoch [12/20], Loss: 0.317266576565229
Epoch [13/20], Loss: 0.3172551015248665
Epoch [14/20], Loss: 0.31726726774985975
Epoch [15/20], Loss: 0.3172501279757573
Epoch [16/20], Loss: 0.3172668573948053
Epoch [17/20], Loss: 0.31728356503523314
Epoch [18/20], Loss: 0.31728410376952243
Epoch [19/20], Loss: 0.3172891277533311
Epoch [20/20], Loss: 0.3172932817385747


In [95]:
from sklearn.metrics import confusion_matrix

In [94]:
autoencoder.eval()  # Set the model to evaluation mode
total_loss = 0.0
# correct = 0
# error_correct = 0
# total = 0
threshold = 0.1

all_labels = []
all_predictions = []

with torch.no_grad():  # No gradients needed for evaluation
    for x_batch, labels in dataloader_test:  # Assuming labels are included in your test loader
        # print(x_batch.shape)
        outputs = autoencoder(x_batch)
        loss = criterion(outputs, x_batch)  # Calculate reconstruction loss
        total_loss += loss.item()
        # Calculate reconstruction error
        reconstruction_error = torch.mean((outputs - x_batch) ** 2, dim = 1 )
        # print(reconstruction_error)
        # Identify anomalies based on the threshold
        predictions = (reconstruction_error > threshold).float()        

        # Compare predictions with actual labels
        # error_mask = (labels == 0)
        # error_correct += ((predictions[error_mask] == 0).sum()).item()  
        # correct += (predictions == labels).sum().item()
        total += labels.size(0)
        all_labels.extend(labels.cpu().numpy())  # Move to CPU and convert to numpy
        all_predictions.extend(predictions.cpu().numpy())

# Calculate average loss and accuracy
conf_matrix = confusion_matrix(all_labels, all_predictions)
average_loss = total_loss / len(dataloader_test)
accuracy = correct / total

print(f"Test Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f},")
print(conf_matrix)



Test Loss: 0.3377, Accuracy: 0.6509,
