In [None]:
!pip install git+https://github.com/intel/openfl.git
!pip install -r requirements_workflow_interface.txt

# Uncomment this if running in Google Colab
!pip install -r https://raw.githubusercontent.com/intel/openfl/develop/openfl-tutorials/experimental/requirements_workflow_interface.txt
import os
os.environ["USERNAME"] = "colab"

In [None]:
!git clone 'https://github.com/Luis-P-Duarte/Projeto-IA-22-23-Malware-Android.git'
!pip install rarfile


Cloning into 'Projeto-IA-22-23-Malware-Android'...
remote: Enumerating objects: 227, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (26/26), done.[K
remote: Total 227 (delta 26), reused 14 (delta 12), pack-reused 188[K
Receiving objects: 100% (227/227), 130.71 MiB | 37.49 MiB/s, done.
Resolving deltas: 100% (100/100), done.
Collecting rarfile
  Downloading rarfile-4.1-py3-none-any.whl (28 kB)
Installing collected packages: rarfile
Successfully installed rarfile-4.1


In [None]:
import rarfile

# Đường dẫn đến tệp .rar trên Google Drive
rar_path = '/content/Projeto-IA-22-23-Malware-Android/Images/64x64.rar'

# Giải nén tệp .rar
with rarfile.RarFile(rar_path, 'r') as rf:
    rf.extractall('/content/data')

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder
from sklearn.model_selection import train_test_split

# n_epochs = 3
batch_size_train = 32
batch_size_test = 32
log_interval = 2


# random_seed = 1
torch.backends.cudnn.enabled = False
# torch.manual_seed(random_seed)

# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.dataset = ImageFolder(root_dir, transform=transform)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]
# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load dataset
datasetfull = CustomDataset(root_dir='/content/data/64x64', transform=transform)

# Tính kích thước cho phần dữ liệu bạn muốn giữ lại (10%)
desired_size = int(0.2 * len(datasetfull))

# Tính kích thước cho phần còn lại của dữ liệu
remaining_size = len(datasetfull) - desired_size

# Tạo phần dữ liệu bạn muốn giữ lại
dataset, _ = random_split(datasetfull, [desired_size, remaining_size])

# Split dataset into train and validation sets
train_size = int(0.7 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])


In [None]:
# Define your model
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=5, stride=1)
        self.pool = nn.MaxPool2d(2, stride=1)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 59 * 59, 500)
        self.fc2 = nn.Linear(500, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


def inference(network, test_loader):
    if torch.cuda.is_available():
        network = network.to('cuda:0')
    network.eval()
    test_loss = 0
    correct = 0
    total_samples_val = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
          if torch.cuda.is_available():
                inputs = inputs.to('cuda:0')
                labels = labels.to('cuda:0')
          outputs = network(inputs)
          test_loss += F.nll_loss(outputs, labels).item()
          _, predicted_val = torch.max(outputs.data, 1)
          total_samples_val += labels.size(0)
          correct += (predicted_val == labels).sum().item()

    test_loss /= total_samples_val
    accuracy = correct / total_samples_val

    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, total_samples_val, 100. * accuracy))

    return accuracy


In [None]:
from copy import deepcopy

from openfl.experimental.interface import FLSpec, Aggregator, Collaborator
from openfl.experimental.runtime import LocalRuntime
from openfl.experimental.placement import aggregator, collaborator


def FedAvg(models, weights=None):
    models = [model.to('cpu') for model in models]
    new_model = models[0]
    state_dicts = [model.state_dict() for model in models]
    state_dict = new_model.state_dict()
    for key in models[1].state_dict():
        state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts],
                                                      axis=0,
                                                      weights=weights))
    new_model.load_state_dict(state_dict)
    return new_model

In [None]:
class FederatedFlow(FLSpec):

    def __init__(self, model = None, optimizer = None, criterion = None, rounds=3, **kwargs):
        super().__init__(**kwargs)
        if model is not None:
            self.model = model
            self.optimizer = optimizer
            self.criterion = criterion
        else:
            self.model = MyModel()
            self.optimizer = optim.Adam(self.model.parameters(), lr=0.01)
            self.criterion = nn.CrossEntropyLoss()
        self.rounds = rounds

    @aggregator
    def start(self):
        print(f'Performing initialization for model')
        self.collaborators = self.runtime.collaborators
        self.private = 10
        self.current_round = 0
        self.next(self.aggregated_model_validation,foreach='collaborators',exclude=['private'])

    @collaborator
    def aggregated_model_validation(self):
        print(f'Performing aggregated model validation for collaborator {self.input}')
        self.agg_validation_score = inference(self.model,self.test_loader)
        print(f'{self.input} value of {self.agg_validation_score}')
        self.next(self.train)

    @collaborator
    def train(self):
      if torch.cuda.is_available():
        self.model = self.model.to('cuda:0')
      self.model.train()
      self.optimizer = optim.Adam(self.model.parameters(), lr=0.01)
      self.criterion = nn.CrossEntropyLoss()
      total_correct = 0
      total_samples = 0

      for batch_idx, (inputs, labels) in enumerate(self.train_loader):
        if torch.cuda.is_available():
          inputs = inputs.to("cuda:0")
          labels = labels.to("cuda:0")
        self.optimizer.zero_grad()
        outputs = self.model(inputs)
        loss = self.criterion(outputs, labels)
        loss.backward()
        self.optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        if batch_idx % log_interval == 0:
          print('Train Epoch: 1 [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
             batch_idx * len(inputs), len(self.train_loader.dataset),
            100. * batch_idx / len(self.train_loader), loss.item()))
          self.loss = loss.item()
          torch.save(self.model.state_dict(), 'model.pth')
          torch.save(self.optimizer.state_dict(), 'optimizer.pth')

      train_accuracy = total_correct / total_samples
      print ('Accuracy trainning: {}'.format(train_accuracy))
      self.training_completed = True
      self.next(self.local_model_validation)

    @collaborator
    def local_model_validation(self):
        self.local_validation_score = inference(self.model,self.test_loader)
        print(f'Doing local model validation for collaborator {self.input}: {self.local_validation_score}')
        self.next(self.join, exclude=['training_completed'])

    @aggregator
    def join(self,inputs):
        self.average_loss = sum(input.loss for input in inputs)/len(inputs)
        self.aggregated_model_accuracy = sum(input.agg_validation_score for input in inputs)/len(inputs)
        self.local_model_accuracy = sum(input.local_validation_score for input in inputs)/len(inputs)
        print(f'Average aggregated model validation values = {self.aggregated_model_accuracy}')
        print(f'Average training loss = {self.average_loss}')
        print(f'Average local model validation values = {self.local_model_accuracy}')
        self.model = FedAvg([input.model for input in inputs])
        self.optimizer = [input.optimizer for input in inputs][0]
        self.current_round += 1
        if self.current_round < self.rounds:
            self.next(self.aggregated_model_validation, foreach='collaborators', exclude=['private'])
        else:
            self.next(self.end)

    @aggregator
    def end(self):
        print(f'This is the end of the flow')

Aggregator step "start" registered
Collaborator step "aggregated_model_validation" registered
Collaborator step "train" registered
Collaborator step "local_model_validation" registered
Aggregator step "join" registered
Aggregator step "end" registered


In [None]:
# Setup participants
aggregator = Aggregator()
aggregator.private_attributes = {}

# Setup collaborators with private attributes
collaborator_names = ['Portland', 'Seattle']
collaborators = [Collaborator(name=name) for name in collaborator_names]
for idx, collaborator in enumerate(collaborators):
    local_train = deepcopy(train_dataset)
    local_test = deepcopy(val_dataset)
    indices_train = list(range(idx, len(train_dataset), len(collaborators)))
    local_train = torch.utils.data.Subset(train_dataset, indices_train)
    indices_val = list(range(idx, len(val_dataset), len(collaborators)))
    local_test = torch.utils.data.Subset(val_dataset, indices_val)
    collaborator.private_attributes = {
            'train_loader': torch.utils.data.DataLoader(local_train,batch_size=batch_size_train, shuffle=True),
            'test_loader': torch.utils.data.DataLoader(local_test,batch_size=batch_size_train, shuffle=True)
    }

local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend='single_process')
print(f'Local runtime collaborators = {local_runtime.collaborators}')

Local runtime collaborators = ['Portland', 'Seattle']


In [None]:
model = None
best_model = None
optimizer = None
criterion = None
flflow = FederatedFlow(model,optimizer)
flflow.runtime = local_runtime
flflow.run()

Creating local datastore in current directory (/content/.metaflow)

Calling start
Performing initialization for model
Sending state from aggregator to collaborators

Calling aggregated_model_validation
Performing aggregated model validation for collaborator Portland

Test set: Avg. loss: -0.0001, Accuracy: 73/499 (14.63%)

Portland value of 0.1462925851703407

Calling train
Accuracy trainning: 0.411006018916595

Calling local_model_validation

Test set: Avg. loss: -0.4709, Accuracy: 335/499 (67.13%)

Doing local model validation for collaborator Portland: 0.6713426853707415
Should transfer from local_model_validation to join

Calling aggregated_model_validation
Performing aggregated model validation for collaborator Seattle

Test set: Avg. loss: -0.0001, Accuracy: 77/499 (15.43%)

Seattle value of 0.15430861723446893

Calling train
Accuracy trainning: 0.3482373172828891

Calling local_model_validation

Test set: Avg. loss: -0.3759, Accuracy: 289/499 (57.92%)

Doing local model validati

In [None]:
print(f'Sample of the final model weights: {flflow.model.state_dict()["conv1.weight"][0]}')

print(f'\nFinal aggregated model accuracy for {flflow.rounds} rounds of training: {flflow.aggregated_model_accuracy}')

Sample of the final model weights: tensor([[[ 1.0152e-01, -6.0065e-02,  9.1386e-02,  2.6104e-02, -5.5867e-02],
         [-2.0925e-02, -2.9945e-03,  9.9730e-03,  1.0478e-01,  1.1841e-01],
         [ 9.1765e-02, -6.8904e-02, -3.2087e-02,  6.9336e-02,  1.2650e-04],
         [-4.4866e-02,  7.9381e-02, -5.0584e-02, -3.3520e-02, -4.6605e-02],
         [-3.4433e-02,  4.0261e-03,  1.1540e-02, -4.8760e-02,  8.3956e-02]],

        [[ 3.0238e-01,  1.7149e-01,  2.7495e-01,  3.2557e-01,  2.3936e-01],
         [ 1.4912e-01,  2.4339e-01,  1.1730e-01,  2.4477e-01,  2.0551e-01],
         [ 1.5003e-01,  2.2926e-01,  1.5484e-01,  2.9847e-01,  2.4506e-01],
         [ 2.4677e-01,  1.0762e-01,  1.9120e-01,  2.0563e-01,  2.1524e-01],
         [ 1.4816e-01,  3.6062e-01,  1.8258e-01,  2.4615e-01,  1.0399e-01]],

        [[-3.5534e-03,  1.3340e-03,  3.0253e-02,  1.0872e-01,  1.1768e-01],
         [ 8.0828e-02,  9.7216e-02,  6.9078e-02,  1.1640e-01,  7.5406e-02],
         [-2.5519e-02,  1.1671e-01,  6.7137e-04, 