In [1]:
import datetime

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import copy
import numpy as np
from torchvision import datasets, transforms, models
import torch
from torch.utils.data import DataLoader
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from skimage import color, transform   # lib: scikit-image
import keras

from utils.sampling import mnist_iid, mnist_noniid, cifar_iid, cifar_noniid, fashion_mnist_iid, fashion_mnist_noniid
from utils.lsh import LSHAlgo
from models.Fed import FedAvg
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

Using TensorFlow backend.


In [2]:
user_feats = []
print('Loading dataset...')
trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist)
dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist)
dict_users = mnist_noniid(dataset_train, 100, case=1)

Loading dataset...


In [3]:
class CNNMnist(nn.Module):
    def __init__(self):
        super(CNNMnist, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=5)
        self.conv2 = nn.Conv2d(20, 50, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(800, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x

In [4]:

def run_fed(dataset_train, dataset_test, dict_users, type_exp = 'base'):
    img_size = dataset_train[0][0].shape

    dict_clusters = {}
    if type_exp == 'cluster' or type_exp == 'lsh-cluster':
        # feature map
        print('Featuring...')
        input_shape = (max(img_size[1], 32), max(img_size[2], 32), max(img_size[0], 3))
        model1 = keras.applications.resnet.ResNet50(include_top=False, weights="imagenet", input_shape=input_shape)

        if len(user_feats):
            pass
        else:
            for idx_user in dict_users:
                print('User', idx_user, 'featuring...')
                user_images = []
                for idx in dict_users[idx_user]:
                    image = dataset_train[idx][0].numpy()
                    image = color.gray2rgb(image)[0]
                    image = transform.resize(image, (32, 32))
                    user_images.append(image)

                pred = model1.predict([user_images])
                feats = np.mean([data[0][0] for data in pred], axis=0)
                user_feats.append(feats)

        if type_exp == 'lsh-cluster':
            # 局部敏感哈希
            print('LSH...')
            lsh = LSHAlgo(feat_dim=len(user_feats[0]), code_dim=512) # code_dim: 输出维度
            user_feats1 = lsh.run(user_feats)
        else:
            # 普通降维
            print('PCA...')
            pca = PCA(n_components=50, random_state=728)
            user_feats1 = pca.fit_transform(user_feats)

        # 聚类 users
        print('Clustering...')
        kmeans = KMeans(n_clusters=10, random_state=728)
        kmeans.fit(user_feats1)

        for idx_user, label in enumerate(kmeans.labels_):
            if label in dict_clusters:
                dict_clusters[label].append(idx_user)
            else:
                dict_clusters[label] = [idx_user]
        print('Clustering finished.')
        print('Dict of cluster - users: ', dict_clusters)


    # build model
    net_glob = CNNMnist().to('cuda:0')
    print(net_glob)
    net_glob.train()


    # batch training

    return batch_train(type_exp, net_glob, dataset_train, dataset_test, dict_users, dict_clusters)


In [5]:
def batch_train(type_exp, net_glob, dataset_train, dataset_test, dict_users, dict_clusters):
    loss_train_batch = []
    acc_test_batch = []

    for big_iter in range(5):
        print('Iteration ', big_iter)

        # copy weights
        net_glob_copy = copy.deepcopy(net_glob)

        # training
        loss_train = []
        acc_test = []

        for iter in range(5):
            one_loss_train, one_acc_test = train_one_round(iter, type_exp, net_glob_copy, dataset_train, dataset_test, dict_users, dict_clusters)
            loss_train.append(one_loss_train)
            acc_test.append(one_acc_test)

        loss_train_batch.append(loss_train)
        acc_test_batch.append(acc_test)

    loss_train_avg = np.mean(loss_train_batch, axis=0)
    acc_test_avg = np.mean(acc_test_batch, axis=0)

    loss_train_std = np.std(loss_train_batch, axis=0)
    acc_test_std = np.std(acc_test_batch, axis=0)

    return loss_train_avg, acc_test_avg, loss_train_std, acc_test_std

In [6]:
def test_img(net_g, datatest):
    net_g.eval()
    # testing
    test_loss = 0
    correct = 0
    data_loader = DataLoader(datatest, batch_size=128)
    l = len(data_loader)
    for idx, (data, target) in enumerate(data_loader):
        data, target = data.cuda(), target.cuda()
        log_probs = net_g(data)
        # sum up batch loss
        test_loss += F.cross_entropy(log_probs, target, reduction='sum').item()
        # get the index of the max log-probability
        y_pred = log_probs.data.max(1, keepdim=True)[1]
        correct += y_pred.eq(target.data.view_as(y_pred)).long().cpu().sum()

    test_loss /= len(data_loader.dataset)
    accuracy = 100.00 * correct / len(data_loader.dataset)
    if False:
        print('\nTest set: Average loss: {:.4f} \nAccuracy: {}/{} ({:.2f}%)\n'.format(
            test_loss, correct, len(data_loader.dataset), accuracy))
    return accuracy, test_loss

In [7]:
def train_one_round(iter, type_exp, net_glob, dataset_train, dataset_test, dict_users, dict_clusters):
    w_locals, loss_locals = [], []

    if type_exp == 'cluster' or type_exp == 'lsh-cluster':
        # 预先聚类的情况
        idxs_users = []
        for idx_cluster in dict_clusters:
            idxs_users += list(np.random.choice(list(dict_clusters[idx_cluster]), 1, replace=False))
    else:
        m = max(int(0.1 * 100), 1)
        idxs_users = np.random.choice(range(100), m, replace=False)

    for idx in idxs_users:
        local = LocalUpdate(dataset=dataset_train, idxs=dict_users[idx])
        w, loss = local.train(net=copy.deepcopy(net_glob))
        w_locals.append(copy.deepcopy(w))
        loss_locals.append(copy.deepcopy(loss))
    # update global weights
    w_glob = FedAvg(w_locals)

    # copy weight to net_glob_copy
    net_glob.load_state_dict(w_glob)

    # print loss & acc
    loss_avg = sum(loss_locals) / len(loss_locals)
    one_acc_test, one_loss_test = test_img(net_glob, dataset_test)
    print('Round {:3d}, Average loss {:.3f}, Test accuracy {:.3f}'.format(iter, loss_avg, one_acc_test))

    return loss_avg, one_acc_test

In [8]:
class LocalUpdate(object):
    def __init__(self, dataset=None, idxs=None):
        self.loss_func = nn.CrossEntropyLoss()
        self.selected_clients = []
        self.ldr_train = DataLoader(DatasetSplit(dataset, idxs), batch_size=50, shuffle=True)

    def train(self, net):
        net.train()
        # train and update
        optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.5)

        epoch_loss = []
        for iter in range(5):
            batch_loss = []
            for batch_idx, (images, labels) in enumerate(self.ldr_train):
                images, labels = images.to('cuda:0'), labels.to('cuda:0')
                net.zero_grad()
                log_probs = net(images)
                loss = self.loss_func(log_probs, labels)
                loss.backward()
                optimizer.step()
                if False and batch_idx % 10 == 0:
                    print('Update Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        iter, batch_idx * len(images), len(self.ldr_train.dataset),
                               100. * batch_idx / len(self.ldr_train), loss.item()))
                batch_loss.append(loss.item())
            epoch_loss.append(sum(batch_loss)/len(batch_loss))
        return net.state_dict(), sum(epoch_loss) / len(epoch_loss)

In [8]:
def plot(data, data_std, ylabel):
    plt.figure()    
    # colour = ['darkblue','darkred','darkgreen','black','darkmagenta','darkorange','darkcyan']
    # ecolour = ['cornflowerblue','lightcoral','lightgreen','gray','magenta','bisque','cyan']
    # i = 0
    for label in data:
        plt.plot(range(len(data[label])), data[label], label=label, linestyle=':')
        # i = i + 1
    plt.ylabel(ylabel)
    plt.legend()
    plt.savefig('./test/fed_{}_{}_{}_{}_{}_{}_{}_iid{}_{}.pdf'.format('all', ylabel, 'mnist', 'cnn', '1', '5', '5', 'noniid', datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")))

In [10]:
class DatasetSplit(Dataset):
    def __init__(self, dataset, idxs):
        self.dataset = dataset
        self.idxs = list(idxs)

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset[self.idxs[item]]
        return image, label

In [11]:
print('begin time: ', datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))

labels = ['base', 'cluster', 'lsh-cluster']
dict_train_loss = {}
dict_acc_test = {}
dict_std_train_loss = {}
dict_std_acc_test = {}
for label in labels:
    dict_train_loss[label], dict_acc_test[label], dict_std_train_loss[label], dict_std_acc_test[label] = run_fed(dataset_train, dataset_test, dict_users, type_exp = label)
print(dict_train_loss, dict_acc_test)


begin time:  2020-06-28-14-51-29
CNNMnist(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=800, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)
Iteration  0
Round   0, Average loss 0.210, Test accuracy 10.320
Round   1, Average loss 0.095, Test accuracy 16.230
Round   2, Average loss 0.101, Test accuracy 9.800
Round   3, Average loss 0.170, Test accuracy 16.470
Round   4, Average loss 0.147, Test accuracy 17.200
Iteration  1
Round   0, Average loss 0.211, Test accuracy 12.040
Round   1, Average loss 0.096, Test accuracy 10.100
Round   2, Average loss 0.084, Test accuracy 10.910
Round   3, Average loss 0.084, Test accuracy 17.920
Round   4, Average loss 0.090, Test accuracy 10.320
Iteration  2
Round   0, Average loss 0.222, Test accuracy 17.230
Round   1, Average loss 0.115, Test accuracy 9.580
Roun

In [12]:
plot(dict_train_loss, dict_std_train_loss, 'train_loss')
plot(dict_acc_test, dict_std_acc_test, 'test_acc')

In [13]:
with open(r'./test/test.txt', 'a') as f:
    for label in dict_acc_test:
        f.write(label)
        f.write(' ')
        for item in dict_acc_test[label]:
            item1 = str(item)
            f.write(item1)
            f.write(' ')
        f.write('\n')
print('save finished')
f.close()



# for label in dict_acc_test:
#     print(label)
#     for item in dict_acc_test[label]:
#         print(item)


# for item in dict_acc_test.items():
#     for i in range(len(item)):
#         str1 = item[i]
#         print(str1,end=' ')
#         with open(r'./test/test.txt', 'a') as f:
#             f.write(str1)
#             f.write('\r\t')
#         print('finish saving')    
#         f.close()

save finished


In [14]:
dict_acc_test

{'base': array([12.768   , 14.717999, 11.667999, 16.852001, 13.668001],
       dtype=float32),
 'cluster': array([10.162, 11.182, 19.142, 20.704, 32.456], dtype=float32),
 'lsh-cluster': array([16.534   , 18.044   , 26.457998, 25.787998, 23.438   ],
       dtype=float32)}

In [1]:
with open(r'./test/test.txt', 'a') as f:
    for label in dict_acc_test1:
        f.write(label)
        f.write(' ')
        for item in dict_acc_test1[label]:
            item1 = str(item)
            f.write(item1)
            f.write(' ')
        f.write('\n')
print('save finished')
f.close()

NameError: name 'dict_acc_test1' is not defined

In [4]:
labels1 = []
items2 = []
with open(r'./test/all_test_acc_cifar_cnn_1_3_3000_iidFalse_2020-06-27-09-40-48.txt', 'r') as f:
    for item in f.readlines():
        item1 = np.array(item.split())
        label1 = item1[0]
        labels1.append(label1)
        item2 = np.delete(item1, 0).astype('float32')
        items2.append(item2)
f.close()

In [5]:
dict_acc_test1 = {}
i=0
for label in labels1:
#     print(label)
#     print(i)
#     print(items2[i])
    dict_acc_test1[label] = items2[i]
    i = i+1
print(dict_acc_test1)

{'base': array([ 9.776667, 10.66    , 10.      , ..., 44.98    , 47.52    ,
       41.88667 ], dtype=float32), 'cluster': array([10.      , 10.      , 11.276668, ..., 50.556667, 51.15333 ,
       51.55    ], dtype=float32), 'lsh-cluster': array([ 9.996667, 10.166667, 10.496667, ..., 51.186665, 50.87    ,
       52.18333 ], dtype=float32)}


In [6]:
print(labels1)

['base', 'cluster', 'lsh-cluster']


In [12]:
dict_std_acc_test = {}
plot(dict_acc_test1, dict_std_acc_test, 'test_acc')