In [32]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [33]:
# Load Benign and Malware Dataset
benign_dataset = pd.read_csv("../data/benign.csv")
malware_dataset = pd.read_csv("../data/malware.csv")

print(benign_dataset.shape, malware_dataset.shape)

# Add Ground Truth Column
benign_dataset["ground_truth"] = 0
malware_dataset["ground_truth"] = 1

# Combine the Two Datasets Shuffling the Rows
combined_dataset = pd.concat([benign_dataset, malware_dataset], ignore_index=True).sample(frac=1).reset_index(drop=True)
combined_dataset.drop(combined_dataset.columns[0], axis=1, inplace=True)
print(combined_dataset.shape)

(15166, 2382) (8970, 2382)
(24136, 2382)


In [34]:
combined_dataset.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2372,2373,2374,2375,2376,2377,2378,2379,2380,ground_truth
0,0.658894,0.006651,0.006222,0.005062,0.004886,0.004337,0.00431,0.004201,0.004017,0.003433,...,0.0,0.0,0.0,8.0,8192.0,0.0,0.0,72.0,8200.0,0
1,0.176794,0.028287,0.022401,0.016022,0.017895,0.010846,0.022094,0.009408,0.008462,0.006019,...,0.0,0.0,0.0,8.0,8192.0,0.0,0.0,72.0,8200.0,0
2,0.44307,0.013716,0.009648,0.007923,0.007454,0.004515,0.009137,0.003791,0.00607,0.003834,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72.0,8192.0,0
3,0.302648,0.01387,0.010625,0.004711,0.005253,0.002154,0.00537,0.003102,0.002382,0.001039,...,0.0,0.0,0.0,8.0,8192.0,0.0,0.0,72.0,8200.0,0
4,0.344308,0.033575,0.015625,0.006789,0.010138,0.005487,0.008278,0.00558,0.007906,0.00372,...,0.0,0.0,0.0,8.0,8192.0,0.0,0.0,72.0,8200.0,0


In [35]:
X = combined_dataset.drop("ground_truth", axis=1)
y = combined_dataset["ground_truth"]
print(X.shape, y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(24136, 2381) (24136,)
(19308, 2381) (4828, 2381) (19308,) (4828,)


In [36]:
def make_model():
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout, InputLayer
    from tensorflow.keras.optimizers import Adam
    from tensorflow.keras.metrics import AUC, Precision, Recall

    feature_size = 2381

    model = Sequential()
    model.add(InputLayer(input_shape=(1, feature_size)))
    model.add(Dropout(0.2))
    model.add(Dense(1500, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy', AUC(), Precision()])
    return model

In [37]:
model = make_model()
model.summary()



In [38]:
from sklearn.preprocessing import StandardScaler
mms = StandardScaler()
X_train = mms.fit_transform(X_train)

X_train = np.reshape(X_train, (-1, 1, 2381))
y_train = np.reshape(y_train, (-1, 1))

In [39]:
from tensorflow.keras.callbacks import EarlyStopping

callback = EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(X_train, y_train, epochs=30, batch_size=128, validation_split=0.2, callbacks=[callback])


Epoch 1/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 206ms/step - accuracy: 0.9382 - auc_3: 0.9792 - loss: 0.1750 - precision_3: 0.8844 - val_accuracy: 0.9925 - val_auc_3: 0.9973 - val_loss: 0.0390 - val_precision_3: 0.9911
Epoch 2/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 205ms/step - accuracy: 0.9951 - auc_3: 0.9985 - loss: 0.0220 - precision_3: 0.9938 - val_accuracy: 0.9946 - val_auc_3: 0.9981 - val_loss: 0.0307 - val_precision_3: 0.9932
Epoch 3/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 205ms/step - accuracy: 0.9974 - auc_3: 0.9991 - loss: 0.0184 - precision_3: 0.9966 - val_accuracy: 0.9948 - val_auc_3: 0.9975 - val_loss: 0.0320 - val_precision_3: 0.9939
Epoch 4/30
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 218ms/step - accuracy: 0.9985 - auc_3: 0.9995 - loss: 0.0091 - precision_3: 0.9984 - val_accuracy: 0.9948 - val_auc_3: 0.9975 - val_loss: 0.0313 - val_precision_3: 0.9939
Epoc

In [42]:
# Save Weights
model.save_weights("../models/weights.weights.h5")

# Save Model Architecture
with open("../models/model.json", "w") as f:
    f.write(model.to_json())

print("Model Saved Successfully")

Model Saved Successfully
