In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.preprocessing import StandardScaler

class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, input_dim)
        )
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

def build_autoencoder(input_dim):
    return Autoencoder(input_dim)

def train_model(path):
    scaler = StandardScaler()
    data = np.array(pd.read_csv(path)) 
    
    scaled_data = scaler.fit_transform(data)
    input_dim = scaled_data.shape[1]
    
    tensor_data = torch.FloatTensor(scaled_data)
    
    autoencoder = build_autoencoder(input_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)
    
    batch_size = 32
    train_loader = torch.utils.data.DataLoader(tensor_data, batch_size=batch_size, shuffle=True)
    
    for epoch in range(2):
        for batch in train_loader:
            outputs = autoencoder(batch)
            loss = criterion(outputs, batch)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    with torch.no_grad():
        reconstructed = autoencoder(tensor_data)
    
    reconstruction_error = np.mean((scaled_data - reconstructed.numpy())**2, axis=1)
    reconstruction_threshold = np.percentile(reconstruction_error, 95)
    
    return autoencoder, reconstruction_threshold

In [None]:
import os
import pandas as pd
import numpy as np

train_path=r'/home/UTTU/NewProj/linux-kernel-examples/kernelKeyLogger/OS_zip_file/Sam_train.csv'
test_path=r'/home/UTTU/NewProj/linux-kernel-examples/kernelKeyLogger/OS_zip_file/Sam_test.csv'

Sam_train=r'/home/UTTU/NewProj/linux-kernel-examples/kernelKeyLogger/OS_zip_file/Sam_train.csv'
Sam_2=r'/home/UTTU/NewProj/linux-kernel-examples/kernelKeyLogger/OS_zip_file/Sam_test.csv'
Utkarsh_train=r'/home/UTTU/NewProj/linux-kernel-examples/kernelKeyLogger/OS_zip_file/Utkarsh.csv'

In [20]:
autoencoder,reconstruction_threshold=train_model(Utkarsh_train)
torch.save(autoencoder.state_dict(),r'/home/UTTU/NewProj/linux-kernel-examples/kernelKeyLogger/OS_zip_file/current_model_ut.torch')
pd.DataFrame([reconstruction_threshold]).to_csv(r'/home/UTTU/NewProj/linux-kernel-examples/kernelKeyLogger/OS_zip_file/reconstruction_threshold_ut.csv')

In [6]:
autoencoder.load_state_dict(torch.load(r'/home/UTTU/NewProj/linux-kernel-examples/kernelKeyLogger/OS_zip_file/current_model.torch'))

<All keys matched successfully>

In [17]:
current_data=prepare_data(Sam_2)
scaler=StandardScaler()
scaled_data = scaler.fit_transform(current_data)
tensor_data=torch.Tensor(scaled_data)
reconstructed = autoencoder(tensor_data)

reconstruction_error = np.mean((scaled_data - reconstructed.detach().numpy())**2, axis=1)
# print(reconstruction_threshold)
# test_labels = prepare_data2(test_label_path)
pred_labels = []
# for i, err in enumerate(reconstruction_error):
for i,err in enumerate(reconstruction_error):
    if reconstruction_error[i] > reconstruction_threshold:
        pred_labels.append(0)
    else:
        pred_labels.append(1)
print(sum(pred_labels)/len(pred_labels))

0.9428612344264643
