In [2]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, BatchNormalization, Conv2D,MaxPool2D,Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
from tensorflow.python.client import device_lib
import itertools
import os 
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
from tensorflow.python.client import device_lib
import sklearn
import wandb
from wandb.keras import WandbCallback
import glob
import math
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, Callback, EarlyStopping

In [3]:
print(tf.test.is_built_with_cuda())
print(tf.config.list_physical_devices('GPU')) 

True
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
run = wandb.init(project='vgg16',
                 config={  # and include hyperparameters and metadata
                     "learning_rate": 0.0001,
                     "epochs": 40,
                     "batch_size": 2,
                     "loss_function": "categorical_crossentropy",
                     "architecture": "CNN",
                     "dataset": "malwareDB"
                 })
config = wandb.config

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mratkac99[0m (use `wandb login --relogin` to force relogin)
  warn("The `IPython.html` package has been deprecated since IPython 4.0. "


In [5]:
train_path = 'C:/Users/Kacper/Desktop/inzynier/code/data/legit_vs_malware/train'
valid_path = 'C:/Users/Kacper/Desktop/inzynier/code/data/legit_vs_malware/valid'
test_path = 'C:/Users/Kacper/Desktop/inzynier/code/data/legit_vs_malware/test'

In [6]:
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input).flow_from_directory(directory=train_path, target_size=(224,224),classes=['cerberus','hydra','alien','other','legit'],batch_size=2)
valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input).flow_from_directory(directory=valid_path, target_size=(224,224),classes=['cerberus','hydra','alien','other','legit'],batch_size=2)
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input).flow_from_directory(directory=test_path, target_size=(224,224),classes=['cerberus','hydra','alien','other','legit'],batch_size=2,shuffle=False)


Found 994 images belonging to 5 classes.
Found 141 images belonging to 5 classes.
Found 151 images belonging to 5 classes.


In [7]:
assert train_batches.n == 994
assert valid_batches.n == 141
assert test_batches.n == 151
assert train_batches.num_classes == valid_batches.num_classes == test_batches.num_classes == 5

In [8]:
cm_plot_labels = ['cerberus','hydra','alien','other','legit']

In [9]:
def calculateF1(cm):
    precision = 0
    recall = 0
    TP = cm[4][4]
    FP = 0
    FN = 0
    TN = 0
    for i in range(0,4):
        FP = FP + cm[i][4]
    for i in range (0,4):
        FN = FN + cm[4][i]
    for i in cm:
        for j in i:
            TN = TN + j
    TN = TN - (FP + FN + TP)
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    return 2*((precision*recall)/(precision+recall))

In [10]:
vgg16_model = tf.keras.applications.vgg16.VGG16()
model = Sequential()
for layer in vgg16_model.layers[:-1]:
    model.add(layer)

for layer in model.layers:
    layer.trainable = False

model.add(Dense(units=5, activation='softmax'))

In [11]:
optimizer = tf.keras.optimizers.Adam(learning_rate=config.learning_rate)

In [12]:
model.compile(optimizer=optimizer,loss=config.loss_function,metrics="accuracy")

In [13]:
model.fit(x=train_batches,validation_data=valid_batches,epochs=config.epochs,batch_size=config.batch_size,verbose=2,callbacks=[WandbCallback(data_type="image", validation_data=valid_batches, labels=cm_plot_labels)])

Epoch 1/40
497/497 - 16s - loss: 0.7247 - accuracy: 0.7495 - val_loss: 0.6780 - val_accuracy: 0.7092 - 16s/epoch - 32ms/step
Epoch 2/40
497/497 - 13s - loss: 0.3912 - accuracy: 0.8602 - val_loss: 0.5788 - val_accuracy: 0.7730 - 13s/epoch - 25ms/step
Epoch 3/40
497/497 - 12s - loss: 0.3106 - accuracy: 0.8883 - val_loss: 0.5262 - val_accuracy: 0.8014 - 12s/epoch - 25ms/step
Epoch 4/40
497/497 - 13s - loss: 0.2579 - accuracy: 0.9155 - val_loss: 0.4318 - val_accuracy: 0.8014 - 13s/epoch - 25ms/step
Epoch 5/40
497/497 - 12s - loss: 0.2261 - accuracy: 0.9256 - val_loss: 0.4364 - val_accuracy: 0.8511 - 12s/epoch - 24ms/step
Epoch 6/40
497/497 - 13s - loss: 0.1965 - accuracy: 0.9336 - val_loss: 0.4275 - val_accuracy: 0.8582 - 13s/epoch - 25ms/step
Epoch 7/40
497/497 - 12s - loss: 0.1731 - accuracy: 0.9447 - val_loss: 0.4920 - val_accuracy: 0.8369 - 12s/epoch - 24ms/step
Epoch 8/40
497/497 - 12s - loss: 0.1612 - accuracy: 0.9467 - val_loss: 0.4345 - val_accuracy: 0.8298 - 12s/epoch - 24ms/step


<keras.callbacks.History at 0x20b13e5b3d0>

In [14]:
predictions = model.predict(x=test_batches, verbose=0)

In [15]:
cm = confusion_matrix(y_true=test_batches.classes,y_pred=np.argmax(predictions,axis=-1))

In [16]:
wandb.log({
            "F1_score" : calculateF1(cm),
            "confusion_matrix" : wandb.sklearn.plot_confusion_matrix(test_batches.classes,
                                                            np.argmax(predictions,axis=-1),
                                                            cm_plot_labels)
            })


In [17]:
wandb.finish()

0,1
F1_score,▁
accuracy,▁▄▅▆▆▆▆▇▇▇▇▇▇▇▇█████████████████████████
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▅▇█▇▆▇▆█▆▇▆▇▆▇▇▇▆▇▆▆▇▇▆▆▆▇▇▇▇▇▆▇▇▇▇▇▇
val_loss,█▅▄▁▁▁▃▁▃▁▂▂▂▂▂▃▁▁▂▃▂▂▃▃▃▃▃▄▃▆▅▄▄▆▅▆▄▅▅▆

0,1
F1_score,0.83333
accuracy,0.99899
best_epoch,5.0
best_val_loss,0.42753
epoch,39.0
loss,0.0106
val_accuracy,0.85106
val_loss,0.60047
