# Federated Learning Practice on MNIST dataset

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
import syft as sy
import copy
hook = sy.TorchHook(torch)

In [3]:
use_cuda = torch.cuda.is_available()

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

In [None]:
# create a couple workers
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
secure_worker_a = sy.VirtualWorker(hook, id="secure_worker_a")
secure_worker_b = sy.VirtualWorker(hook, id="secure_worker_b")

In [None]:
# transforms
train_transforms = transforms.Compose([#transforms.RandomRotation(30),
                                       # transforms.RandomResizedCrop(224),
                                       # transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.5,], [0.5,])]) # mean, std
 

test_transforms = transforms.Compose([#transforms.Resize(255),
                                      #transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5,], [0.5,])]) # mean, std


# choose the training and test datasets

federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader 
    datasets.MNIST('/home/zhaojia-raoxy/data', train=True, download=True,
                   transform=train_transforms)
    .federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
    batch_size=20, shuffle=True, **kwargs)


federated_test_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader 
    datasets.MNIST('/home/zhaojia-raoxy/data', train=False, download=True,
                   transform=test_transforms)
    .federate((secure_worker_a, secure_worker_b)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
    batch_size=20, shuffle=False, **kwargs)

# 训练模型
## Net

In [None]:
class LeNet(nn.Module):
    def __init__(self, channel=3, hideen=768, num_classes=10):
        super(LeNet, self).__init__()
        act = nn.Sigmoid
        self.body = nn.Sequential(
            nn.Conv2d(channel, 12, kernel_size=5, padding=5 // 2, stride=2),
            act(),
            nn.Conv2d(12, 12, kernel_size=5, padding=5 // 2, stride=2),
            act(),
            nn.Conv2d(12, 12, kernel_size=5, padding=5 // 2, stride=1),
            act(),
        )
        self.fc = nn.Sequential(
            nn.Linear(hideen, num_classes)
        )

    def forward(self, x):
        out = self.body(x)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.03) # TODO momentum is not supported at the moment

## CNN

In [None]:
# class CNN(nn.Module):
#     def __init__(self, num_class):
#         super(CNN, self).__init__()
#         self.block1 = nn.Sequential(
#             nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5),
#             nn.ReLU(),
#             nn.AdaptiveAvgPool2d(5),
#             nn.Dropout(p=0.5)

#         )
#         self.block2 = nn.Sequential(
#             nn.Conv2d(64, 32, 5, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2),
#         )
#         self.block3 = nn.Sequential(
#             nn.Linear(32, num_class),
#             nn.LogSoftmax(dim=1)
#         )

#     def forward(self, x):
#         x = self.block1(x)
#         x = self.block2(x)
#         x = x.view(x.shape[0], -1)  # torch.Size([128, 32])
#         x = self.block3(x)
#         return x

# class CNN(nn.Module):
#     def __init__(self, num_class=10):
#         super(CNN, self).__init__()
#         self.block1 = nn.Sequential(
#             nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3),
#             nn.ReLU(),
#             nn.MaxPool2d(2),
#             nn.Dropout(p=0.5)

#         )
#         self.block2 = nn.Sequential(
#             nn.Conv2d(64, 32, 3, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2),
#         )
#         self.block3 = nn.Sequential(
#             nn.Linear(32, num_class),
#             nn.LogSoftmax(dim=1)
#         )

#     def forward(self, x):
#         x = self.block1(x)
#         x = self.block2(x)
#         x = x.view(x.shape[0], -1)  # torch.Size([128, 32])
#         x = self.block3(x)
#         return x
    
# model = CNN(10).to(device)
# optimizer = optim.SGD(model.parameters(), lr=0.03) # TODO momentum is not supported at the moment

## resnet18

In [None]:
# model_name = '/home/zhaojia-raoxy/model/resnet18-5c106cde.pth'

# def ResNet_s(model_name):
#     ''':cvar
#     返回修改好的模型，和冻结好的参数
#     '''
#     from torchvision.models import resnet18
#     pretrain_model = resnet18(pretrained=False)
#     pretrain_model.fc = nn.Linear(pretrain_model.fc.in_features, 10)  # 将全连接层改为自己想要的分类输出
#     pretrained_dict = torch.load(model_name)

#     pretrained_dict.pop('fc.weight')
#     pretrained_dict.pop('fc.bias')

#     model_dict = pretrain_model.state_dict()
#     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}

#     model_dict.update(pretrained_dict)  # 模型参数列表进行参数更新，加载参数
#     pretrain_model.load_state_dict(model_dict)  # 将满足条件的参数的 requires_grad 属性设置为False

# #     for name, value in pretrain_model.named_parameters():
# #         if (name != 'fc.weight') and (name != 'fc.bias'):
# #             value.requires_grad = False
#     params_conv = filter(lambda p: p.requires_grad, pretrain_model.parameters())  # 要更新的参数在parms_conv当中
#     return pretrain_model, params_conv

# model, params_conv = ResNet_s(model_name)
# model=model.to(device)
# optimizer = optim.SGD(params_conv, lr=0.03) # TODO momentum is not supported at the moment

# LeNet

In [None]:
class LeNet(nn.Module):
    def __init__(self, channel=3, hideen=768, num_classes=10):
        super(LeNet, self).__init__()
        act = nn.Sigmoid
        self.body = nn.Sequential(
            nn.Conv2d(channel, 12, kernel_size=5, padding=5 // 2, stride=2),
            act(),
            nn.Conv2d(12, 12, kernel_size=5, padding=5 // 2, stride=2),
            act(),
            nn.Conv2d(12, 12, kernel_size=5, padding=5 // 2, stride=1),
            act(),
        )
        self.fc = nn.Sequential(
            nn.Linear(hideen, num_classes)
        )

    def forward(self, x):
        out = self.body(x)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
model = LeNet(channel=1, hideen=hidden, num_classes=10).to(device)
optimizer = optim.SGD(params_conv, lr=0.03) # TODO momentum is not supported at the moment

# 训练

In [None]:
def train(model, device, federated_train_loader, optimizer, epoch, batch_size):
    model.train()
   
    for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
        model.send(data.location) # <-- NEW: send the model to the right location
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target.long())
#         loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        model.get() # <-- NEW: get the model back
        
        if batch_idx % 2000 == 0:
            loss = loss.get() # <-- NEW: get the loss
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * batch_size, len(federated_train_loader) * batch_size,
                100. * batch_idx / len(federated_train_loader), loss.item()))

In [None]:
def test(model, device, federated_test_loader, batch_size): 
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in federated_test_loader:
            model.send(data.location) # <-- NEW: send the model in virtual workers to Trusted Aggregator
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = F.nll_loss(output, target, reduction='sum')
            model.get()
            test_loss += loss.get() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
            correct += pred.eq(target.view_as(pred)).sum().get()
    test_loss /= len(federated_test_loader)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(federated_test_loader) * batch_size,
        100. * correct / len(federated_test_loader) / batch_size))

In [None]:
for epoch in range(50):
    train(model, device, federated_train_loader, optimizer, epoch, batch_size=20)
    test(model, device, federated_test_loader, batch_size=20)

# 保存

In [None]:
save_name='ResNet18'

In [6]:
# 保存模型的权重
torch.save(model.state_dict(), "/home/zhaojia-raoxy/model/out/{}.pt".format(save_name))
print("保存文件：","/home/zhaojia-raoxy/model/out/{}.pt".format(save_name))

NameError: name 'model' is not defined

In [None]:
# 保存整个模型
torch.save(model, "/home/zhaojia-raoxy/model/out/{}.h5".format(save_name))
print("保存文件：","/home/zhaojia-raoxy/model/out/{}.h5".format(save_name))