In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

import matplotlib.pyplot as plt
import seaborn as sns


import sys
sys.path.append('src')
import src.VAE_LSTM_CNN as vae

onBody = pd.read_pickle('dataset/onBody.pkl')
onBody_val = pd.read_pickle('dataset/onBody_Val.pkl')
anomoly = pd.read_pickle('dataset/offBody.pkl')

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAMPLE_CHOPPED = 2000

criterion = nn.MSELoss()

In [2]:
class TripletDataset(Dataset):
    def __init__(self, normal_df, anomaly_df):
        self.normal_samples = normal_df
        self.anomaly_samples = anomaly_df

        self.class_labels = normal_df['dvc'].unique()

        self.data_by_class = {}
        for class_label in self.class_labels:
        # Filter samples by class and store them
            class_samples = normal_df[normal_df['dvc'] == class_label]
            self.data_by_class[class_label] = np.array(class_samples['freq_dev'])




        self.anchor_indices = []
        for class_label, samples in self.data_by_class.items():
            n = len(samples)
            self.anchor_indices.extend([(class_label, i) for i in range(n)])

        # # Split the normal samples into two halves for anchors and positives
        # x = train_test_split(self.normal_samples, test_size=0.5, random_state=42)
        # self.anchor_samples =  x[0].reset_index(drop=True)
        # self.positive_samples = x[1].reset_index(drop=True)
        
    def __len__(self):
        # The dataset length will be the number of normal samples divided by 2, 
        # since we're using half for anchors and half for positives
        return len(self.anchor_indices)
        # return len(self.anchor_samples)

    def __getitem__(self, idx):
        class_label, anchor_idx = self.anchor_indices[idx]
        n = len(self.data_by_class[class_label]) 
        anchor = self.data_by_class[class_label][anchor_idx]
        positive_idx = (anchor_idx + np.random.randint(1, n)) % n 
        positive = self.data_by_class[class_label][positive_idx]
        

        # choose the other class_labels randomly
        other_class_label = class_label
        while other_class_label == class_label:
            other_class_label = self.class_labels[np.random.randint(len(self.class_labels))]
        
        # Randomly select a negative sample from the other class
        negative1 = self.data_by_class[other_class_label][np.random.randint(len(self.data_by_class[other_class_label]))]
        
        # Randomly select a negative sample from the anomaly samples
        negative2 = self.anomaly_samples[np.random.randint(len(self.anomaly_samples))]

        #randomly select the negative between negative1 and negative2
        negative = negative1 if np.random.random() > 0.5 else negative2

        # negative = negative2

        # anchor = self.anchor_samples.iloc[idx]['freq_dev']
        # positive = self.positive_samples.iloc[idx]['freq_dev']
        # negative = self.anomaly_samples[np.random.randint(len(self.anomaly_samples))]


        # Convert to PyTorch tensors
        anchor = torch.tensor(anchor[1500:1700], dtype=torch.float).float().unsqueeze(0)
        positive = torch.tensor(positive[1500:1700], dtype=torch.float).float().unsqueeze(0)
        negative = torch.tensor(negative[1500:1700], dtype=torch.float).unsqueeze(0)
        
        return anchor, positive, negative

In [4]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        distance_positive = (anchor - positive).pow(2).sum(1)
        distance_negative = (anchor - negative).pow(2).sum(1)
        losses = torch.relu(distance_positive - distance_negative + self.margin)
        return losses.mean()

In [5]:
#free up the GPU memory
torch.cuda.empty_cache()
# Assuming model is your neural network for embedding
batch_size = 16
margin  = 1

latent_dim = 3    # Latent space dimension


triplet_dataset = TripletDataset(onBody, anomoly)
triplet_dataloader = DataLoader(triplet_dataset, batch_size=batch_size, shuffle=True)
#validation
triplet_dataset_val = TripletDataset(onBody_val, anomoly)
triplet_dataloader_val = DataLoader(triplet_dataset_val, batch_size=batch_size, shuffle=True)

triplet_dataloader_plot = DataLoader(triplet_dataset, batch_size=1, shuffle=True)
triplet_dataloader_val_plot = DataLoader(triplet_dataset_val, batch_size=1, shuffle=True)

loss_function = TripletLoss(margin =margin).to(DEVICE) 


ONBODY_model = vae.CNNLSTMEmbeddingNet(input_length=200, num_channels=1 ,embedding_dim=latent_dim).to(DEVICE)
optimizer = optim.Adam(ONBODY_model.parameters(), lr=1e-4)

In [6]:
num_epochs = 100

for epoch in range(num_epochs):
    total_loss = 0
    for anchor, positive, negative in triplet_dataloader:
        anchor, positive, negative = anchor.to(DEVICE), positive.to(DEVICE), negative.to(DEVICE)
        print(anchor, positive, negative)
        optimizer.zero_grad()
        anchor_embed = ONBODY_model(anchor.view(16,200,1))
        positive_embed = ONBODY_model(positive.view(16,200,1))
        negative_embed = ONBODY_model(negative.view(16,200,1))
        print(anchor_embed, positive_embed, negative_embed)
        loss = loss_function(anchor_embed, positive_embed, negative_embed)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    # scheduler.step()
    # model.eval()
    with torch.no_grad():
        val_loss = 0
        for anchor, positive, negative in triplet_dataloader_val:
            anchor, positive, negative = anchor.to(DEVICE), positive.to(DEVICE), negative.to(DEVICE)
            anchor_embed = ONBODY_model(anchor.view(16,200,1))
            positive_embed = ONBODY_model(positive.view(16,200,1))
            negative_embed = ONBODY_model(negative.view(16,200,1))
            val_loss += loss_function(anchor_embed, positive_embed, negative_embed)
        print(f"Epoch {epoch+1}, Loss: {total_loss}, Val Loss: {val_loss.item()}") 


#save the model
torch.save(ONBODY_model.state_dict(), 'Models/TripletLoss.pth')

tensor([[[0.6098, 0.6313, 0.6512,  ..., 0.5715, 0.5852, 0.5982]],

        [[0.7751, 0.7675, 0.7567,  ..., 0.8413, 0.8278, 0.8108]],

        [[0.6741, 0.6628, 0.6474,  ..., 0.4722, 0.4672, 0.4633]],

        ...,

        [[0.7789, 0.7927, 0.8070,  ..., 0.7960, 0.8066, 0.8171]],

        [[0.8392, 0.8394, 0.8397,  ..., 0.8565, 0.8555, 0.8540]],

        [[0.6049, 0.6203, 0.6354,  ..., 0.5457, 0.5620, 0.5781]]],
       device='cuda:0') tensor([[[0.6281, 0.6122, 0.5954,  ..., 0.6602, 0.6446, 0.6291]],

        [[0.6271, 0.6220, 0.6182,  ..., 0.7364, 0.7358, 0.7349]],

        [[0.8787, 0.8797, 0.8800,  ..., 0.9118, 0.8990, 0.8823]],

        ...,

        [[0.1754, 0.1612, 0.1506,  ..., 0.2371, 0.2578, 0.2776]],

        [[0.0731, 0.0666, 0.0602,  ..., 0.1477, 0.1384, 0.1263]],

        [[0.1321, 0.1058, 0.0819,  ..., 0.6506, 0.6397, 0.6283]]],
       device='cuda:0') tensor([[[0.6807, 0.6604, 0.6412,  ..., 0.5806, 0.5754, 0.5746]],

        [[0.5771, 0.5954, 0.6125,  ..., 0.4933, 0.511

TypeError: unsupported operand type(s) for -: 'NoneType' and 'NoneType'