# 腦部腫瘤分類（Pytorch）

## 準備動作（導入模組）

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchsummary import summary

from sklearn.metrics import accuracy_score,classification_report

import pandas as pd
from tqdm import tqdm_notebook as tqdm
import time
import warnings
from collections import OrderedDict
# 資料預處理套件
from torch.utils.data import Dataset, DataLoader
import os
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import numpy as np
warnings.simplefilter("ignore")
# download the pretrained model
import torchvision.models as models

## 定義數據集處理的方法

In [2]:
def images_transforms(phase, image_size):
    if phase == 'training':
        data_transformation = transforms.Compose([
            transforms.Resize(image_size),
#             transforms.RandomEqualize(10),
            transforms.RandomRotation(degrees=(-25,20)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])
    else:
        data_transformation=transforms.Compose([
            transforms.Resize(image_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])

    return data_transformation

## 設置客戶端模型及參數配置
### 負責訓練，傳回訓練後梯度給中央端

In [3]:
class client:
    def __init__(self):
        self.name = ''
        self.net = models.resnet18(pretrained = True)
        # Early-stopping 階段參數設置
        self.batch_size = 64
        self.epochs = 20
        self.learning_rate = 0.001
        self.criterion=nn.CrossEntropyLoss()
        # 只訓練分類器
        for param in self.net.parameters():
            param.requires_grad = False
        self.optimizer = torch.optim.Adamax(self.net.parameters(), lr=self.learning_rate)

def training_dataset(self, training_path, image_size = (128,128), label = {}):
    trainset = datasets.ImageFolder(training_path, transform=images_transforms('training', image_size))
    # 定義標籤並更新
    label_dict = {k: v for k, v in label.items() if k in trainset.class_to_idx}
    trainset.class_to_idx.update(label_dict)
    self.train_loader = DataLoader(trainset, batch_size=self.batch_size, shuffle=True, num_workers=2)
client.training_dataset = training_dataset

# 建構全連結層神經網路
def net_linear(self, num_classes=4):
    self.input = self.net.fc.in_features
    self.net.fc = nn.Sequential(OrderedDict([
        ('fc1', nn.Linear(self.input,1024)),
        ('relu1', nn.ReLU(inplace=True)),
        #('dropout1', nn.Dropout(p=0.5)),
        ('fc2', nn.Linear(1024,num_classes)),
    ]))
client.net_linear = net_linear

## 設置中央服務端模型及參數配置
### 負責訓練後梯度交換計算，以及測試

In [4]:
class server:
    def __init__(self):
        self.global_model = models.resnet18(pretrained = True)
        self.testing_path = 'data/Testing'
        self.batch_size = 64
        # 定義各病徵標籤
        self.label = {'no_tumor': 0, 'meningioma_tumor': 1, 'glioma_tumor': 2, 'pituitary_tumor': 3}
        # 只訓練分類器
        for param in self.global_model.parameters():
            param.requires_grad = False
    # model aggregate
    def FedAvg(self, clients):
        # 聚合
        weight_accumulator = dict()
        # 形狀構建
        for name, params in self.global_model.state_dict().items():
            weight_accumulator[name] = torch.zeros_like(params)

        # 將不同模型之神經層參數與中央server模型取差值
        for client in clients:
            # 差值
            diff = dict()
            for name, data in client.net.state_dict().items():
                diff[name] = (data - self.global_model.state_dict()[name])
            for name, params in self.global_model.state_dict().items():
                weight_accumulator[name].add_((diff[name]/len(clients)).long())

        # 將不同模型之神經層參數權重平均後聚合
        for name, params in self.global_model.state_dict().items():
            params.add_(weight_accumulator[name])

        # 將參數權重平均後總和的server端模型更新到各client端模型之參數
        for client in clients:
            for name, params in self.global_model.state_dict().items():
                client.net.state_dict()[name].copy_(params)
# testset
def testing_dataset(self, image_size = (128,128)):
    testset = datasets.ImageFolder(self.testing_path, transform=images_transforms('test', image_size))
    # 定義標籤並更新
    label_dict = {k: v for k, v in self.label.items() if k in testset.class_to_idx}
    testset.class_to_idx.update(label_dict)
    testset, valset = torch.utils.data.random_split(testset, [150, 244])
    self.test_loader = DataLoader(testset, batch_size=self.batch_size, shuffle=True, num_workers=2)
    self.val_loader = DataLoader(valset, batch_size=self.batch_size, shuffle=True, num_workers=2)
server.testing_dataset = testing_dataset
# 建構全連結層神經網路
def net_linear(self, num_classes=4):
    self.input = self.global_model.fc.in_features
    self.global_model.fc = nn.Sequential(OrderedDict([
        ('fc1', nn.Linear(self.input,1024)),
        ('relu1', nn.ReLU(inplace=True)),
        #('dropout1', nn.Dropout(p=0.5)),
        ('fc2', nn.Linear(1024,num_classes)),
    ]))
server.net_linear = net_linear

## 初始化模型
### 設置參數

In [5]:
# model A
training_path_A = 'data/Training_A/'
# model B
training_path_B = 'data/Training_B/'
# model C
training_path_C = 'data/Training_C/'

# 定義各病徵標籤
label = {'no_tumor': 0, 'meningioma_tumor': 1, 'glioma_tumor': 2, 'pituitary_tumor': 3}

exchange_epochs = 30 # 交換資訊次數

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### 模型建置及資料集設置

In [6]:
A = client()
A.name = 'meningioma_tumor'
A.training_dataset(training_path=training_path_A, label=label)
A.net_linear(num_classes=len(label))
A.net = A.net.to(device)

In [7]:
B = client()
B.name = 'glioma_tumor'
B.training_dataset(training_path=training_path_B, label=label)
B.net_linear(num_classes=len(label))
B.net = B.net.to(device)

In [8]:
C = client()
C.name = 'pituitary_tumor'
C.training_dataset(training_path=training_path_C, label=label)
C.net_linear(num_classes=len(label))
C.net = C.net.to(device)

In [9]:
# 中央計算建置
main = server()
main.testing_dataset()
main.net_linear()
main.global_model = main.global_model.to(device)

In [18]:
for i, (images, labels) in enumerate(A.train_loader):
    images = images.to(device)
    labels = labels.to(device)
    outputs = A.net(images)
    print(outputs.shape, labels)
    break

torch.Size([64, 4]) tensor([0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
        1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1,
        0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1], device='cuda:0')


## 訓練模型，并印出損失函數

In [10]:
def local_train(model, train_loader, criterion, optimizer, epochs=10):
    for e in range(epochs):
        # Train the model
        model.train()
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

In [11]:
# models : list
def test(models, test_loader):
    with torch.no_grad():
        acc = {}
        # 每個模型測試
        for model in models:
            y_pred = []
            y_actual = []
            model.net.eval()
            for i, (images,labels) in enumerate(test_loader):
                images = images.to(device)
                labels = labels.to(device)

                outputs = model.net(images)

                y_actual += list(np.array(labels.detach().to('cpu')).flatten())
                predictes = torch.max(outputs, 1)[1]
                y_pred += list(np.array(predictes.detach().to('cpu')).flatten())

            y_actual = np.array(y_actual).flatten()
            y_pred = np.array(y_pred).flatten()
            acc[model.name]=accuracy_score(y_actual, y_pred)

        return acc

In [None]:
models = [A, B, C]
# log before exchange
before_epoch = []
before_model = {f'{model.name}':[] for model in models}
before_acc = {}
# log after exchange
after_epoch = []
after_model = {f'{model.name}':[] for model in models}
after_acc = {}
for exchange_epoch in range(exchange_epochs):
    # federated train
    for model in models:
        local_train(model.net,model.train_loader,model.criterion,model.optimizer,model.epochs)

    # testing before exchange
    print(f'testing before exchange:')
    before_acc = test(models, main.test_loader)

    before_epoch.append(str(exchange_epoch+1))
    for model in models:
        before_model[model.name].append(str(before_acc[model.name]))
    # 列出準確率
    print(f'exchange_epoch : {exchange_epoch+1}')
    for model in models:
        print(f'{model.name} : {before_acc[model.name]}')

    # exchange
    main.FedAvg(models)

    # testing after exchange
    print(f'testing after exchange:')
    after_acc = test(models, main.test_loader)

    after_epoch.append(str(exchange_epoch+1))
    for model in models:
        after_model[model.name].append(str(after_acc[model.name]))
    # 列出準確率
    print(f'exchange_epoch : {exchange_epoch+1}')
    for model in models:
        print(f'{model.name} : {after_acc[model.name]}')

# save before log
df = pd.DataFrame(before_epoch,columns=['epoch'])
for model in models:
    df[model.name] = before_model[model.name]

df.to_csv('outputs/fedavg_model_before.csv', index = False, header=True)
# save after log
df = pd.DataFrame(after_epoch,columns=['epoch'])
for model in models:
    df[model.name] = after_model[model.name]

df.to_csv('outputs/fedavg_model_after.csv', index = False, header=True)

testing before exchange:
exchange_epoch : 1
meningioma_tumor : 0.31333333333333335
glioma_tumor : 0.26
pituitary_tumor : 0.22666666666666666
testing after exchange:
exchange_epoch : 1
meningioma_tumor : 0.26
glioma_tumor : 0.26
pituitary_tumor : 0.26
testing before exchange:
exchange_epoch : 2
meningioma_tumor : 0.29333333333333333
glioma_tumor : 0.26
pituitary_tumor : 0.32666666666666666
testing after exchange:
exchange_epoch : 2
meningioma_tumor : 0.26666666666666666
glioma_tumor : 0.26666666666666666
pituitary_tumor : 0.26666666666666666
testing before exchange:
exchange_epoch : 3
meningioma_tumor : 0.28
glioma_tumor : 0.26666666666666666
pituitary_tumor : 0.32666666666666666
testing after exchange:
exchange_epoch : 3
meningioma_tumor : 0.26666666666666666
glioma_tumor : 0.26666666666666666
pituitary_tumor : 0.26666666666666666
testing before exchange:
exchange_epoch : 4
meningioma_tumor : 0.30666666666666664
glioma_tumor : 0.26666666666666666
pituitary_tumor : 0.30666666666666664
t