# Notebook to Train the Network

In [None]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
import torch.nn.utils.rnn as rnn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from tqdm.notebook import tqdm
from os import listdir
from os.path import isfile, join
import pandas as pd
import sys
from torch.utils.data import random_split

In [None]:
def process_data_file(path, data, d_type):
    data_file = pd.read_csv(path)    
    first_sequence = True
    sample = []
    last_timestamp = 0
    for val in data_file.values:
        if val[-1] == True:
            if first_sequence == True:
                first_sequence = False
            else:
                data.append((sample, d_type))
                sample = []
            new_row = val[:-3]
            sample.append(new_row)
        else:
            new_row = val[:-3]
            sample.append(new_row)
    return data


def process_directory(path, data, d_type):
    files = [f for f in listdir(path) if isfile(join(path, f))]
    with tqdm(total=len(files), file=sys.stdout) as pbar:
        pbar.set_description(f'Processing: {path}')
        for file in files:
            data = process_data_file(path + '/' + file, data, d_type)
            pbar.update(1)
    return data


data = []
data = process_directory("./processed_data/cheater", data, d_type=1)
print("Total Samples:", len(data))
data = process_directory("./processed_data/legit", data, d_type=0)
print("Total Samples:", len(data))

In [None]:
import random

random.seed(88)
np.random.seed(88)

# Ensure dataset is shuffled randomly
random.shuffle(data)
train_n = round(len(data)*0.8)
subset_train = data[:100000]

# Perform the Z-score scaling
mean = 0
std = 0
for s in subset_train:
    mean += np.mean(s[0])
    std += np.std(s[0])
mean /= len(subset_train)
std /= len(subset_train)

for i,s in enumerate(subset_train):
    for j,e in enumerate(s[0]):
        e = (e - mean) / std
        subset_train[i][0][j] = e
        break
        
element_length = 12

In [None]:
torch.manual_seed(0)
device = torch.device('cuda')
max_seq_length = 35

class MinecraftAimbotDataset(Dataset):
    def __init__(self, data, element_length, max_seq_length, device):
        self.device = device
        self.element_length = element_length
        self.max_seq_length = max_seq_length

        self.data = data

        X_lengths = []
        for e in self.data:
            X_lengths.append(len(e[0]))

        self.longest_sent = max(X_lengths)

    @property
    def max_length(self):
        return self.longest_sent

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = self.data[index][0]
        y = self.data[index][1]
        l = len(self.data[index][0])

        # Create the padding/truncation for the data sample
        padded_x = np.zeros((self.max_length, self.element_length))
        if len(x) > self.max_length:
            padded_x = x[:self.max_length]    
        else:
            padded_x[0:len(x)] = x

        padded_x = np.array(padded_x, dtype=np.float32)
        y = np.array(y, dtype=np.int64)
        x = torch.as_tensor(padded_x, dtype=torch.long)
        y = torch.as_tensor(y, dtype=torch.long)

        x = x.to(self.device)
        y = y.to(self.device)

        return x, y, l
    

dataset = MinecraftAimbotDataset(subset_train, element_length, max_seq_length, device)

train_len = int(len(dataset)*0.8)  
train_set, test_set = random_split(dataset, [train_len, len(dataset)-train_len])

dataloader = DataLoader(train_set, batch_size=128)
test_dataloader = DataLoader(test_set, batch_size=128)

In [None]:
class AimbotDetector(nn.Module):
    def __init__(self):
        super(AimbotDetector, self).__init__()
        lstm_hidden_size = 200
        lstm_layers = 4

        self.lstm = nn.LSTM(
            input_size=element_length,
            hidden_size=lstm_hidden_size,
            num_layers=lstm_layers,
            batch_first=True,
            dropout=0.3
        )

        lstm_h = torch.empty(lstm_layers, lstm_hidden_size)  # pylint: disable=no-member
        nn.init.uniform_(lstm_h, -1., 1.)
        self.lstm_h = nn.Parameter(lstm_h)

        lstm_c = torch.empty(lstm_layers, lstm_hidden_size)  # pylint: disable=no-member

        nn.init.uniform_(lstm_c, -1., 1.)
        self.lstm_c = nn.Parameter(lstm_c)

        self.output = nn.Linear(lstm_hidden_size, 1)

    def forward(self, x, l):
        batch_size = l.size(0)

        l = l.clamp(max=max_seq_length)

        x = rnn.pack_padded_sequence(x, l, batch_first=True, enforce_sorted=False)
        lstm_h = self.lstm_h[:, None].expand(-1, batch_size, -1)
        lstm_c = self.lstm_c[:, None].expand(-1, batch_size, -1)
        x, _ = self.lstm(x, (lstm_h, lstm_c))

        x, _ = rnn.pad_packed_sequence(x, batch_first=True)

        x = self.output(x[: , -1, :])
        x = torch.sigmoid(x)

        return x

In [None]:
torch.manual_seed(0)

model = AimbotDetector()

model.to(device)
model.train()

loss_function = nn.BCELoss()
loss_function.to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)

epoch_count = 150

losses = []
test_losses = []
test_acc = []

for epoch in range(epoch_count):
    model.train()
    avg_loss = 0.
    iters = 0

    with tqdm(total=len(train_set)//128, file=sys.stdout) as pbar:
        pbar.set_description(f'Epoch {epoch} | Avg Loss: {avg_loss}')
        for x, y, l in dataloader:
            optimizer.zero_grad()
            x = model(x.float(), l)
            loss = loss_function(x, y[:,None].float())
            avg_loss += loss.cpu().detach().numpy()
            iters += 1
            
            pbar.set_description(f'Epoch {epoch} | Avg Loss: {"{:.10f}".format(avg_loss/iters)}')
            pbar.update(1)
    
            loss.backward()
            optimizer.step()

    avg_test_loss = 0.0
    test_iters = 0
    correct = 0
    threshold = 0.5
    with torch.no_grad():
        for x, y, l in test_dataloader:
            x = model(x.float(), l)
            loss = loss_function(x, y[:,None].float())
            avg_test_loss += loss.cpu().detach().numpy()
            test_iters += 1

            y_pred = (x > threshold).int()
            correct += (y_pred == y[:,None]).int().sum()
                
            test_iters += 1
    
    losses.append(avg_loss / iters)
    test_losses.append(avg_test_loss / test_iters)
    test_acc.append(correct.cpu()/len(test_set))
    print(f'Epoch [{epoch+1}/{epoch_count}], Accuracy: {"{:.2f}".format(100 * correct/len(test_set))}% [{correct}/{len(test_set)}], Test loss: {avg_test_loss / test_iters}, Train loss: {avg_loss / iters}')


In [None]:
import matplotlib.pyplot as plt

plt.plot(np.arange(0, epoch_count, 1), losses)
plt.title("train loss")
plt.show()


plt.plot(np.arange(0, epoch_count, 1), test_losses)
plt.title("test loss")
plt.show()


plt.plot(np.arange(0, epoch_count, 1), test_acc)
plt.title("accuracy")
plt.show()