# CIS-545 Malware vs Benign InceptionV3

## Import statements

In [None]:
from tensorflow import keras
from keras.applications.inception_v3 import InceptionV3
from keras import models, layers, optimizers
from keras.utils import image_dataset_from_directory,load_img,img_to_array
from keras.models import load_model
import numpy as np
import os
import random

## Getting imagenet weights for InceptionV3

In [None]:
model_conv = InceptionV3(weights='imagenet', input_shape=(250, 250, 3), include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
model_conv.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 124, 124, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 124, 124, 32  96         ['conv2d[0][0]']                 
 alization)                     )                                                      

## Making the InceptionV3 model untrainable

In [None]:
model_conv.trainable = False

## Adding flatten, dense and output layer to the Inception V3 model

In [None]:
model = models.Sequential()
model.add(model_conv)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inception_v3 (Functional)   (None, 6, 6, 2048)        21802784  
                                                                 
 flatten (Flatten)           (None, 73728)             0         
                                                                 
 dense (Dense)               (None, 256)               18874624  
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 1)                 257       
                                                                 
Total params: 40,677,665
Trainable params: 18,874,881
Non-trainable params: 21,802,784
_________________________________________________________________


## Importing the train and validation dataset and training the model

In [None]:
train_dataset = image_dataset_from_directory("dataset/train", image_size=(250, 250), batch_size=32)
validation_dataset = image_dataset_from_directory("dataset/validation", image_size=(250, 250), batch_size=32)

callbacks = [ keras.callbacks.ModelCheckpoint(
                filepath="malware_vs_benign_inceptionv3",
                save_best_only=False,
                monitor="val_loss"
            )]

model.fit(train_dataset, validation_data=validation_dataset, epochs=100, callbacks=callbacks)
model.save("malware_vs_benign_inceptionv3.h5", save_format='h5')

Found 6000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Epoch 1/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 2/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 3/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 4/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 5/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 6/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 7/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 8/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 9/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 10/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 11/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 12/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 13/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 14/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 15/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 16/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 17/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 18/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 19/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 20/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 21/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 22/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 23/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 24/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 25/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 26/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 27/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 28/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 29/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 30/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 31/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 32/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 33/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 34/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 35/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 36/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 37/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 38/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 39/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 40/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 41/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 42/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 43/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 44/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 45/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 46/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 47/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 48/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 49/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 50/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 51/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 52/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 53/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 54/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 55/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 56/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 57/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 58/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 59/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 60/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 61/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 62/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 63/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 64/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 65/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 66/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 67/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 68/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 69/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 70/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 71/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 72/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 73/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 74/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 75/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 76/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 77/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 78/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 79/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 80/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 81/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 82/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 83/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 84/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 85/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 86/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 87/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 88/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 89/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 90/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 91/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 92/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 93/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 94/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 95/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 96/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 97/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 98/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 99/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets


Epoch 100/100



INFO:tensorflow:Assets written to: malware_vs_benign\assets


INFO:tensorflow:Assets written to: malware_vs_benign\assets




## Evaluating the model performance on test dataset

In [None]:
test_dataset = image_dataset_from_directory("dataset/test", image_size=(250, 250), batch_size=32)
model = load_model("malware_vs_benign_inceptionv3.h5")

# Evaluate the model on the test dataset
evaluation_result = model.evaluate(test_dataset)

# Display the evaluation metrics
print("Loss:", evaluation_result[0])
print("Accuracy:", evaluation_result[1])

Found 1000 files belonging to 2 classes.
Loss: 0.25065019726753235
Accuracy: 0.9710000157356262


## Predicting the class for randomly selected 10 benign EXE images from test dataset

In [None]:
path = "dataset/test/benign/"
all_files = os.listdir(path)

image_files = [file for file in all_files if file.endswith(('.png'))]

selected_files = random.sample(image_files, 10)

for i, file in enumerate(selected_files):
    image = load_img(path+file,target_size=(250,250))
    img_array = img_to_array(image)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    # Make prediction
    prediction = model.predict(img_array)
    print(f"Filename: {file}")
    prediction_label = None
    if prediction[0][0] > 0.5:
        print("Given EXE image is of a Malware file")
        print(f"Prediction: {prediction[0][0]}\n")
    else:
        print("Given EXE image is of a Benign file")
        print(f"Prediction: {prediction[0][0]}\n")


Filename: Sourceforge_2514.png
Given EXE image is of a Benign file
Prediction: 6.017712486543675e-17

Filename: 2162.png
Given EXE image is of a Benign file
Prediction: 0.43861204385757446

Filename: 4c5a4bfbd6de5221c044b95c96fabc094fd854508471f9d7cb3890853ea318c3.png
Given EXE image is of a Benign file
Prediction: 7.40964134493538e-09

Filename: 2060.png
Given EXE image is of a Benign file
Prediction: 3.126108971280181e-31

Filename: d3dcb6b7c05940a0507e5ec8138a44a03c76bf2da34c757cb207c6eb4ab22b9b.png
Given EXE image is of a Benign file
Prediction: 3.426343653245567e-10

Filename: Sourceforge_6045.png
Given EXE image is of a Benign file
Prediction: 6.994156122562086e-32

Filename: CNET_247690.png
Given EXE image is of a Benign file
Prediction: 4.854671081366141e-14

Filename: CNET_19740.png
Given EXE image is of a Benign file
Prediction: 4.221456285569967e-18

Filename: Sourceforge_5349.png
Given EXE image is of a Benign file
Prediction: 1.751785048087722e-36

Filename: b493bf6d7085eb

## Predicting the class for randomly selected 10 malware EXE images from test dataset

In [None]:
path = "dataset/test/malware/"
all_files = os.listdir(path)

image_files = [file for file in all_files if file.endswith(('.png'))]

selected_files = random.sample(image_files, 10)

for i, file in enumerate(selected_files):
    image = load_img(path+file,target_size=(250,250))
    img_array = img_to_array(image)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    # Make prediction
    prediction = model.predict(img_array)
    print(f"Filename: {file}")
    prediction_label = None
    if prediction[0][0] > 0.5:
        print("Given EXE image is of a Malware file")
        print(f"Prediction: {prediction[0][0]}\n")
    else:
        print("Given EXE image is of a Benign file")
        print(f"Prediction: {prediction[0][0]}\n")

Filename: 494b1f3f7e3a2666dca7faffa85141bb7b7388094463cc45789c3e667a1437da.png
Given EXE image is of a Malware file
Prediction: 0.9990041851997375

Filename: 8ba7c267c1a89be94f8d966f67e68d7ee3de0a9266ce233ac8d87d7d45d5c9fe.png
Given EXE image is of a Malware file
Prediction: 0.9906071424484253

Filename: 04b7f03fc58b3015ad225eb057cf621c9da739bf722fa2a1f7bd8cee85928115.png
Given EXE image is of a Malware file
Prediction: 0.9507190585136414

Filename: 86aeb6e4392adb533d35a73871fec6ed3107e5dcccd1834314abdcf396b290da.png
Given EXE image is of a Malware file
Prediction: 0.9025408029556274

Filename: 6de580e7807b7c4d50b5541863af4a72430c866d256c66b183568ecae2ae8bf6.png
Given EXE image is of a Malware file
Prediction: 0.9916883111000061

Filename: 8e538b14db8de2230c908e7e7d56112675762fa9edacfbc79e165e436df9a02e.png
Given EXE image is of a Malware file
Prediction: 0.9651892185211182

Filename: 4fba51b0be53793387c15617115bbf3351e5cd11bdcf8b3bf29815882a14ab7e.png
Given EXE image is of a Malware f