In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



In [2]:
data = pd.read_csv('../data/data_10.csv')

In [3]:
X = data.drop('malware', axis=1)
y = data['malware']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
def build_model(activation_function):
    model = Sequential()
    model.add(Dense(128, input_shape=(100,), activation=activation_function))
    for _ in range(9):
        model.add(Dense(64, activation=activation_function))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
results = {}

activation_functions = ['relu', 'sigmoid', 'tanh']

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

for activation in activation_functions:
    print(f"Training model with {activation} activation function...")
    model = build_model(activation)
    
    history = model.fit(X_train_scaled, y_train, epochs=10, validation_split=0.2, callbacks=[early_stopping], verbose=1, batch_size=128)
    
    y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Store the results
    results[activation] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }
    
    print(f"Finished training with {activation} activation function.")

In [8]:
results_df = pd.DataFrame(results).T
print(results_df)

         accuracy  precision    recall  f1_score
relu     0.984959   0.985131  0.999649  0.992337
sigmoid  0.974248   0.974248  1.000000  0.986956
tanh     0.984389   0.986582  0.997544  0.992033


To evaluate the performance of a Multi-Layer Perceptron (MLP) model with 10 hidden layers, I tested three activation functions: ReLU, sigmoid, and tanh, using the Adam solver on the "data.csv" dataset. The model's performance metrics included accuracy, precision, recall, and F1 score. ReLU yielded the best performance with an accuracy of 0.984959, precision of 0.985131, recall of 0.999649, and an F1 score of 0.992337. Tanh was slightly behind, while sigmoid performed the least effectively. ReLU's superior performance is attributed to its ability to mitigate the vanishing gradient problem, facilitating deeper network training and faster convergence. These results suggest that ReLU is the most effective activation function for this MLP configuration.