In [None]:
# GRU Model for ID alone
# To predict the next ID in the sequence

In [None]:
# Importing necessary libraries
import os
import sys
import numpy as np
from libraries.utils import get_paths, read_traces, read_json, mapint2var, is_consistent

In [None]:
# Configuration
CODE = 'lora_ducy'               ### application (code) theft_protection, mamba2, lora_ducy
BEHAVIOUR_FAULTY = 'faulty_data'        ### normal, faulty_data
BEHAVIOUR_NORMAL = 'normal'             ### normal, faulty_data
THREAD = 'single'                       ### single, multi
VER = 4                                 ### format of data collection

base_dir = './trace_data'               ### can be replaced with 'csv', 'exe_plot', 'histogram'
normalbase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_NORMAL}'
faultybase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_FAULTY}'

print("Normal base path:", normalbase_path)
print("Faulty base path:", faultybase_path)

In [None]:
train_base_path = os.path.join(normalbase_path, 'train_data')
train_data_path = [os.path.join(train_base_path, x) for x in os.listdir(train_base_path)]
train_varlist_path = [os.path.join(normalbase_path, x) for x in os.listdir(normalbase_path) if 'varlist' in x]

######### get paths #######################
paths_log, paths_traces, varlist_path, paths_label = get_paths(faultybase_path)

train_data_path = [x for x in train_data_path if '.DS_Store' not in x]
train_varlist_path = [x for x in train_varlist_path if '.DS_Store' not in x]
paths_log = [x for x in paths_log if '.DS_Store' not in x]
paths_traces = [x for x in paths_traces if '.DS_Store' not in x]
varlist_path = [x for x in varlist_path if '.DS_Store' not in x]
paths_label = [x for x in paths_label if '.DS_Store' not in x]

paths_log.sort()
paths_traces.sort()
varlist_path.sort()
paths_label.sort()

test_data_path = paths_traces
test_label_path = paths_label

In [None]:
# Check consistency
if VER == 3 or VER == 4:
    check_con, _ = is_consistent([train_varlist_path[0]] + varlist_path)
    if check_con:
        to_number = read_json(varlist_path[0])
        from_number = mapint2var(to_number)
    else:
        to_number = read_json(train_varlist_path[0])
        from_number = mapint2var(to_number)

sorted_keys = list(from_number.keys())
sorted_keys.sort()
var_list = [from_number[key] for key in sorted_keys]

In [None]:
# Load training data
def load_data(file_paths):
    data = []
    for file in file_paths:
        traces = read_traces(file)
        if isinstance(traces, list):
            id_sequence = [int(trace[0]) for trace in traces]
            data.append(id_sequence)
    return data

train_data = load_data(train_data_path)
print(train_data)

In [None]:
for idx, td in enumerate(train_data):
    td_array = np.array(td)  
    print(f"Dataset {idx + 1}: shape = {td_array.shape}")

In [None]:
from sklearn.model_selection import train_test_split

# Prepare LSTM training data
sequence_length = 10       # Already tuned parameter value
X_train, y_train = [], []
for single_file_data in train_data:
    for i in range(len(single_file_data) - sequence_length):
        X_train.append(single_file_data[i:i + sequence_length])
        y_train.append(single_file_data[i + sequence_length])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

X_train_new = X_train.reshape(-1, X_train.shape[-1])
X_val_new = X_val.reshape(-1, X_val.shape[-1])

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train_new)
X_val_scaled = scaler.transform(X_val_new)

X_train = X_train_scaled.reshape(X_train.shape)
X_val = X_val_scaled.reshape(X_val.shape)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
import psutil

# Define LSTM model
# Layers 128, 64 and 32 are chosen by parameter tuning
model = Sequential([
    GRU(128, activation='relu', return_sequences=True, input_shape=(sequence_length, 1), kernel_regularizer=l2(0.001)),
    Dropout(0.1),
    GRU(64, activation='relu', return_sequences=True, kernel_regularizer=l2(0.001)),
    Dropout(0.1),
    GRU(32, activation='relu', return_sequences=False, kernel_regularizer=l2(0.001)),
    Dense(1, activation='linear')
])


In [None]:
model.compile(optimizer=Adam(),
              loss='mae',
              metrics=['mae'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=32, callbacks=[early_stopping], verbose=1)


# Finding RAM usage
ram_usage = psutil.Process().memory_info().rss / (1024 ** 2)
print(f"Total RAM usage: {ram_usage:.2f} MB")


In [None]:
# Calculating the MAE and Accuracy
from sklearn.metrics import mean_absolute_error

pred = model.predict(X_val)
mae = mean_absolute_error(y_val, pred)
print(f"Mean Absolute Error (MAE): {mae}")

correct = []
incorrect = []
pred = np.round(pred).astype(int)
for i in range(len(y_val)):
    yt_event = y_val[i]
    pred_event = pred[i]

    yt_event = yt_event.reshape(1,)
    pred_event = pred_event.reshape(1,)

    if np.abs(yt_event - pred_event) < 1:
        correct.append(y_val[i])
    else:
        incorrect.append(y_val[i])


accuracy = len(correct) / len(y_val)
print(f"Accuracy: {accuracy}")

In [None]:
# # # Saving the Model for future use
# model_path = './trained_models'
# if not os.path.exists(model_path):
#     os.makedirs(model_path)

# model_path = f'{model_path}/gru_v4_{CODE}.keras'
# if not os.path.exists(model_path): 
#     model.save(model_path)
#     print("Model saved successfully")
# else:
#     print(f"Model {model_path} exists")

In [None]:
# Saving the scaler
import joblib

scaler_path = './scalers'
if not os.path.exists(scaler_path):
    os.makedirs(scaler_path)
joblib.dump(scaler, f'{scaler_path}/scaler_gru_id_lora_ducy.pkl')