In [None]:
import torch
import torch.nn as nn
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
import time
import sys
import os
import struct
import hashlib
import json
import math
from functools import reduce
from collections import Counter

print(torch.cuda.is_available())

In [2]:
# sample info (SHA1)
! sha1sum Malware/*

08b9f5874ad1dc3ee1093c9cd08737645f33f13f  Malware/Artemis
8397c1e1f0b9d53a114850f6b3ae8c1f2b2d1590  Malware/Destover
c7d6151d7831d8b75ae6760c3006de58ae2d05e5  Malware/Electro
f24b619f8938a49402e1ca615d9326e4142de598  Malware/en.py
61fab1b8451275c7fd580895d9c68e152ff46417  Malware/EquationDrug
04cf187ca093140f4c72ba0838dc8a25e059023d  Malware/EternalRock
27d99fbca067f478bb91cdbcb92f13a828b00859  Malware/Jigsaw
06d9f835efd1c05323f6a3abdf66e6be334e47c4  Malware/Lazarus
4080bb3a28c2946fd9b72f6b51fe15de74cbb1e1  Malware/Mamba
28976d0de5260fcdc620240bbad78424addd6232  Malware/Mirai
2a72d49cfa0e5a497cd957b0365f7d472f626c26  Malware/Nimda
1468417788f4e006b8983add7ab339e2f661b620  Malware/NSIS
b491c14d8cfb48636f6095b7b16555e9a575d57f  Malware/OnionDuke
a11c17e11abf11722efedc67eee143596d55b008  Malware/Pay2Key
d1c62ac62e68875085b62fa651fb17d4d7313887  Malware/Petya
d7ac7f44c67b894c3ecc244ff810e636e9449f63  Malware/sha1sum
b3074b26b346cb76605171ba19616baf821acf66  Malware/Stuxnet
52c8cbd0545caab7

# original model performance on ImageNet

In [3]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

preprocessing = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize,
])

def evaluate(test_iter, net):
    acc, num = 0.0, 0
    with torch.no_grad():
        for x, y in test_iter:
            net.eval()
            if torch.cuda.is_available():
                x = x.cuda()
                y = y.cuda()
            y_pred = net(x)
            acc += (y_pred.argmax(dim=1) == y).sum().cpu().item()
            num += y.shape[0]
    return acc / num

In [4]:
# load ImageNet
imagenet_data = torchvision.datasets.ImageNet('Imagenet', split='val', transform=preprocessing)
test_iter = torch.utils.data.DataLoader(imagenet_data, batch_size=256, shuffle=False, num_workers=1)

In [5]:
# ResNet model
resnet_net = torchvision.models.resnet50()  # pretrained=True)
resnet_net.load_state_dict(torch.load('models/resnet50-19c8e357.pth'))
# resnet_model = resnet_net.state_dict()
# print(resnet_net)  # see model structure

<All keys matched successfully>

In [6]:
# time0 = time.time()
if torch.cuda.is_available():
    resnet_net = nn.DataParallel(resnet_net).cuda()
print(evaluate(test_iter, resnet_net))
# print(time.time() - time0)

0.76132


# Embed malware in the model

In [7]:
# number of neurons in a layer
def mul(nums):
    product = reduce(lambda x, y: x*y, nums)
    return product

In [8]:
# Recursively traverse the neurons and parameters in the layer, 
# and replace 2 bytes of each parameter
def layer_op(layer):
    global malware_bytes
    global count        # parameters replaced in the net
#     global neurons      # parameters in each layer
    i = 0
    for sub_layer in layer:
        if sub_layer.dim():
            layer_op(sub_layer)
        else:
            mb = malware_bytes[count*2:count*2+2]
            if mb:
                while len(mb) < 2:
                    mb += b'\x00'
                parameter = struct.pack('!f', layer[i].data.item())
                new_parameter = parameter[:2] + mb
                layer[i] = torch.tensor(struct.unpack('!f', new_parameter))
                i += 1
                count += 1
            else:
                break
    return layer, count


In [11]:
# embedding

malware_bytes = []
count = 0
# neurons = 0

def embed_malware(model, path, malware):
    '''embed malware into model
    usage: embed_malware('resnet50', 'resnet50-19c8e357.pth', 'malware_path')'''
    global malware_bytes
    global count        # parameters replaced in the net
    global neurons      # parameters in each layer

    assert os.path.isfile(path)
    assert os.path.isfile(malware)

    malware = open(malware, 'rb')
    malware_bytes = malware.read()
    malware.close()

    sub_net = eval(f'torchvision.models.{model}')()
    sub_model = torch.load(path)
    count = 0
    layer_names = list()
    for _ in sub_net.state_dict():
        layer_names.append(_)
    layer_names.reverse()
    with torch.no_grad():
        for layer in layer_names:
            try:
                layer_shape = list(sub_model[layer].shape)
#                 neurons = mul(layer_shape)
#                 print(layer, neurons)
                sub_model[layer], count = layer_op(sub_model[layer])
                if count * 2 >= len(malware_bytes):
                    torch.save(sub_model, f'models_evil/{model}_embedded.pth')
                    print(f'evil model saved at models_evil/{model}_embedded.pth')
                    print(f'neurons replaced in the model: {count}')
                    break
            except Exception as e:
                pass
#                 print(layer, e)

In [10]:
embed_malware('resnet50', 'models/resnet50-19c8e357.pth', 'Malware/Lazarus')

layer4.2.bn3.num_batches_tracked 'layer4.2.bn3.num_batches_tracked'
layer4.2.bn2.num_batches_tracked 'layer4.2.bn2.num_batches_tracked'
layer4.2.bn1.num_batches_tracked 'layer4.2.bn1.num_batches_tracked'
layer4.1.bn3.num_batches_tracked 'layer4.1.bn3.num_batches_tracked'
layer4.1.bn2.num_batches_tracked 'layer4.1.bn2.num_batches_tracked'
layer4.1.bn1.num_batches_tracked 'layer4.1.bn1.num_batches_tracked'
evil model saved at models_evil/resnet50_malware.pth
neurons replaced in the model: 10455831


In [12]:
# evaluate the performance of the malware-embedded models
# sub_model: state_dict (parameters)
# sub_net: structure

def embed_evaluate(model, path):
    '''evaluate the performance of the embeded model
    usage: embed_evaluate('resnet50', 'resnet50_embedded.pth')'''
    assert os.path.isfile(path)
    sub_model = torch.load(path)
    sub_net = eval(f"torchvision.models.{model}")()
    sub_net.load_state_dict(sub_model)
    if torch.cuda.is_available():
        sub_net = nn.DataParallel(sub_net).cuda()
    acc = evaluate(test_iter, sub_net)
    print(f"{model}, {acc}")


In [13]:
embed_evaluate('resnet50', 'models_evil/resnet50_embedded.pth')

resnet50, 0.76136


In [14]:
# extract malware from model
def get_bytes(layer):
    global malware_bytes_ext
    global count
    global leng_mal
    i = 0
    for sub_layer in layer:
        if sub_layer.dim():
            get_bytes(sub_layer)
        else:
            parameter = struct.pack('!f', layer[i].data.item())
            malware_bytes_ext.append(parameter[2:])
            count += 1
            i += 1
        if len(malware_bytes_ext) * 2 > leng_mal:
            break
    return count

In [15]:
# extract Lazarus sample from ResNet
sub_net = torchvision.models.resnet50()
sub_model = torch.load('models_evil/resnet50_embedded.pth')
layer_protect = 0
leng_mal = 20911661
malware_bytes_ext = []
count = 0

layer_names = list()
for _ in sub_net.state_dict():
    layer_names.append(_)
layer_names.reverse()

for layer in layer_names:
    if layer_protect:
        layer_protect -= 1
        continue
    try:
        layer_shape = list(sub_model[layer].shape)
        neurons = mul(layer_shape)
        count = get_bytes(sub_model[layer])
        if len(malware_bytes_ext) > leng_mal:
            break
    except Exception as e:
#         print(e)
        pass

mb = b''.join(malware_bytes_ext)[:leng_mal]
sha256sum = hashlib.sha256(mb).hexdigest()
print("SHA256 of sample extracted from model:", sha256sum)
with open('extracted_malware', 'wb') as w:
    w.write(mb)

with open('Malware/Lazarus', 'rb') as f:
    mb0 = f.read()
sha256sum = hashlib.sha256(mb0).hexdigest()
print("SHA256 of original sample:", sha256sum)

SHA256 of sample extracted from model: 2ab58b7ce583402bf4cbc90bee643ba5f9503461f91574845264d4f7e3ccb390
SHA256 of original sample: 2ab58b7ce583402bf4cbc90bee643ba5f9503461f91574845264d4f7e3ccb390
