## Fashion Dataset Download

In [None]:
# Google Drive
!gdown --id '1OqmmCIz1uwbj5RNkjpN_9Fnk6rdjJRES' --output fashion.zip

# Unzip the dataset.
# This may take some time.
!unzip -q fashion.zip


In [66]:
# Import necessary packages.
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import torchvision.models as model
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision.datasets import DatasetFolder
import struct

import pandas as pd
import torch.hub
from typing import Any

from tqdm.auto import tqdm

## 原始模型表現


In [31]:
train_csv = pd.read_csv("fashion-mnist_train.csv")
test_csv = pd.read_csv("fashion-mnist_test.csv")

In [32]:
train_csv.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,pixel11,pixel12,pixel13,pixel14,pixel15,pixel16,pixel17,pixel18,pixel19,pixel20,pixel21,pixel22,pixel23,pixel24,pixel25,pixel26,pixel27,pixel28,pixel29,pixel30,pixel31,pixel32,pixel33,pixel34,pixel35,pixel36,pixel37,pixel38,pixel39,...,pixel745,pixel746,pixel747,pixel748,pixel749,pixel750,pixel751,pixel752,pixel753,pixel754,pixel755,pixel756,pixel757,pixel758,pixel759,pixel760,pixel761,pixel762,pixel763,pixel764,pixel765,pixel766,pixel767,pixel768,pixel769,pixel770,pixel771,pixel772,pixel773,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,0,0,105,92,101,107,100,132,0,0,2,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,150,...,211,220,214,74,0,255,222,128,0,0,0,0,0,0,0,0,0,44,12,0,0,40,134,162,191,214,163,146,165,79,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,0,114,183,112,55,23,72,102,165,160,28,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,24,188,163,93,...,171,249,207,197,202,45,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,22,21,25,69,52,45,74,39,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,0,0,0,46,0,21,68,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,187,189,...,230,237,229,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,116,112,136,147,144,121,102,63,0,0,0,0,0,0,0,0,0,0


In [33]:
class FashionDataset(Dataset):
  def __init__(self, data, transform = None):
    """Method to initilaize variables.""" 
    self.fashion_MNIST = list(data.values)
    self.transform = transform
    
    label = []
    image = []
    
    for i in self.fashion_MNIST:
      label.append(i[0])
      image.append(i[1:])
    self.labels = np.asarray(label)
    print(self.labels)
    self.images = np.asarray(image).reshape(-1, 28, 28, 1).astype('float32')

  def __getitem__(self, index):
    label = self.labels[index]
    image = self.images[index]
    
    if self.transform is not None:
      image = self.transform(image)

    return image, label

  def __len__(self):
    return len(self.images)

In [34]:
batch_size = 100
train_set = FashionDataset(train_csv, transform=transforms.Compose([transforms.ToTensor()]))
test_set = FashionDataset(test_csv, transform=transforms.Compose([transforms.ToTensor()]))

train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=8, pin_memory=True)
test_loader = DataLoader(train_set, batch_size=batch_size, num_workers=8, pin_memory=True)

[2 9 6 ... 8 8 7]
[0 1 2 ... 8 8 1]


  cpuset_checked))


In [35]:
def output_label(label):
  output_mapping = {
    0: "T-shirt/Top",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat", 
    5: "Sandal", 
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot"
    }
  input = (label.item() if type(label) == torch.Tensor else label)
  return output_mapping[input]

In [115]:
class AlexNet(nn.Module):
  def __init__(self, num_classes):
    super(AlexNet, self).__init__()
    self.features = nn.Sequential(
      nn.Conv2d(1, 32, kernel_size=3, padding=2),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
      nn.ReLU(inplace=True),
      nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
      nn.ReLU(inplace=True),
      nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=3, stride=2),
    )
    self.avgpool = nn.AdaptiveAvgPool2d((3, 3))
    self.classifier = nn.Sequential(
      nn.Dropout(),
      nn.Linear(256 * 3 * 3, 1024),
      nn.ReLU(inplace=True),
      nn.Dropout(),
      nn.Linear(1024, 512),
      nn.ReLU(inplace=True),
      nn.Linear(512, num_classes),
    )

  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.features(x)
    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    x = self.classifier(x)
    return x


In [116]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AlexNet(num_classes=10).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  for batch, (x,y) in enumerate(dataloader):
    x, y = x.to(device), y.to(device)
    pred = model(x)
    loss = loss_fn(pred,y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if batch%100==0:
    loss, current = loss.item(), batch * len(x)
    print(f"loss: {loss: >7f}[{curret: >5d}/{size: >5d}]")

def _test(dataloader, model):
  size = len(dataloader.dataset)
  model.eval()
  test_loss, correct = 0,0
  with torch.no_grad():
    for x, y in dataloader:
      x, y = x.to(device), y.to(device)
      pred = model(x)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1)==y).type(torch.float).sum().item()
  test_loss /= size
  correct /= size
  print(f"Test error: \n Acc: {correct}, Avg Loss:{test_loss}")

epoch = 10
for t in range(epoch):
  print(f"Epoch {t+1}\n-----------------")
  train(train_loader, model, loss_fn, optimizer)
  _test(test_loader, model)

Epoch 1
-----------------


  cpuset_checked))


Test error: 
 Acc: 0.7200166666666666, Avg Loss:0.007641817227005959
Epoch 2
-----------------
Test error: 
 Acc: 0.8028333333333333, Avg Loss:0.005572507152954737
Epoch 3
-----------------
Test error: 
 Acc: 0.8359166666666666, Avg Loss:0.004719647353390853
Epoch 4
-----------------
Test error: 
 Acc: 0.85215, Avg Loss:0.004239880166699489
Epoch 5
-----------------
Test error: 
 Acc: 0.8630666666666666, Avg Loss:0.0039046267439921695
Epoch 6
-----------------
Test error: 
 Acc: 0.8693333333333333, Avg Loss:0.003669848461449146
Epoch 7
-----------------
Test error: 
 Acc: 0.8754, Avg Loss:0.0034875017593304315
Epoch 8
-----------------
Test error: 
 Acc: 0.8809333333333333, Avg Loss:0.003330680879453818
Epoch 9
-----------------
Test error: 
 Acc: 0.8850666666666667, Avg Loss:0.003209066800524791
Epoch 10
-----------------
Test error: 
 Acc: 0.8889, Avg Loss:0.0031022661859790484


## 取得目標malware執行檔轉浮點數

In [117]:
def toFloat(file):
    byteArray = []
    floatArray = []
    with open(file, "rb") as f:
        while f.read(3):
          byte = f.read(3)
          byte =  bytes([60]) + byte
          byte = byte[::-1]
          byteArray.append(byte)
    
    for byte in byteArray:
        unpackBytes = struct.unpack('f', byte)[0]
        floatArray.append(unpackBytes)
        
    return floatArray

In [118]:
file = 'Virus.vbs'
malFloat = toFloat(file)

## 將parameter換成malware

In [119]:
model_ft = AlexNet(num_classes=10).to(device)
model_ft.parameters()
model_dict = model_ft.state_dict()
model_dict.keys()

odict_keys(['features.0.weight', 'features.0.bias', 'features.3.weight', 'features.3.bias', 'features.6.weight', 'features.6.bias', 'features.8.weight', 'features.8.bias', 'features.10.weight', 'features.10.bias', 'classifier.1.weight', 'classifier.1.bias', 'classifier.4.weight', 'classifier.4.bias', 'classifier.6.weight', 'classifier.6.bias'])

In [120]:
fullyConn = []
for i in model_dict.keys():
    if 'classifier' in i and 'weight' in i:
        fullyConn.append(i)
fullyConn

['classifier.1.weight', 'classifier.4.weight', 'classifier.6.weight']

In [132]:
layer = []
contaminated = []
i = 0
tensor_count = 0
done_check = 0
for tensor in nn.Linear(512, 10).weight:
  par_count = 0
  tensor_count += 1
  for par in tensor:
    par_count += 1
    if i>=len(malFloat):
      done_check = 1
      break
    if abs(par - malFloat[i]) < 0.001:
      layer.append("fc1")
      par = malFloat[i]
      i += 1
      contaminated.append([tensor_count, par_count])

if done_check!=1:
  tensor_count = 0
  for tensor in nn.Linear(1024, 512).weight:
    par_count = 0
    tensor_count += 1
    for par in tensor:
      par_count += 1
      if i>=len(malFloat):
        break
      if abs(par - malFloat[i]) < 0.001:
        layer.append("fc0")
        par = malFloat[i]
        i += 1
        contaminated.append([tensor_count, par_count])



In [133]:
Infected_Neurons = pd.DataFrame({
    'layer':layer,
    'loc':contaminated
})
Infected_Neurons.to_csv('Infected_Neurons.csv',index = None)

### 測試替換後模型表現

In [129]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AlexNet(num_classes=10).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  for batch, (x,y) in enumerate(dataloader):
    x, y = x.to(device), y.to(device)
    pred = model(x)
    loss = loss_fn(pred,y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if batch%100==0:
    loss, current = loss.item(), batch * len(x)
    print(f"loss: {loss: >7f}[{curret: >5d}/{size: >5d}]")

def _test(dataloader, model):
  size = len(dataloader.dataset)
  model.eval()
  test_loss, correct = 0,0
  with torch.no_grad():
    for x, y in dataloader:
      x, y = x.to(device), y.to(device)
      pred = model(x)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1)==y).type(torch.float).sum().item()
  test_loss /= size
  correct /= size
  print(f"Test error: \n Acc: {correct}, Avg Loss:{test_loss}")

epoch = 5
for t in range(epoch):
  print(f"Epoch {t+1}\n-----------------")
  train(train_loader, model, loss_fn, optimizer)
  _test(test_loader, model)

Epoch 1
-----------------


  cpuset_checked))


Test error: 
 Acc: 0.70345, Avg Loss:0.00797329451640447
Epoch 2
-----------------
Test error: 
 Acc: 0.7935333333333333, Avg Loss:0.005691644882162412
Epoch 3
-----------------
Test error: 
 Acc: 0.82835, Avg Loss:0.004877896945178509
Epoch 4
-----------------
Test error: 
 Acc: 0.8460666666666666, Avg Loss:0.00437354266444842
Epoch 5
-----------------
Test error: 
 Acc: 0.8582333333333333, Avg Loss:0.004017074384788672


In [135]:
torch.save(model,'Infected_Model.pt')