# CS242: Final Project





> Harvard CS 242: Computing at Scale (Spring 2020)
> 
> Instructor: Professor HT Kung
>
> Students: Kavya Kopparapu, Eric Lin, Jazz Zhao


---

### **1. General Setup Code**

---
Define the dataset (CIFAR and MNIST) as well as the standard net we will be using for training.

In [1]:
## Code Cell 1.1

import time
import copy
import sys
from collections import OrderedDict

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt

#Using MNIST
#dataset = datasets.MNIST(root='./data')
#idx = dataset.train_labels==1
#dataset.train_labels = dataset.train_labels[idx]
#dataset.train_data = dataset.train_data[idx]

# Using CIFAR-10
# Load training data
transform_train = transforms.Compose([                                   
    transforms.RandomCrop(32, padding=4),                                       
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 
                                        download=True,
                                        transform=transform_train)
#Edit and Load validation data
validset = torchvision.datasets.CIFAR10(root='./data', train=True, 
                                        download=True,
                                        transform=transform_train)
valid_size = 0.2
indices = list(range(len(trainset)))
split = int(np.floor(valid_size * len(trainset)))
    
if True:
    np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, 
                                            sampler=valid_sampler, shuffle=False,
                                            num_workers=2)
validloader = torch.utils.data.DataLoader(validset, batch_size=128, 
                                            sampler=valid_sampler, shuffle=False,
                                            num_workers=2)
# Load testing data
transform_test = transforms.Compose([                                           
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True,
                                       transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False,
                                         num_workers=2)


# Using same ConvNet as in Assignment 1
def conv_block(in_channels, out_channels, kernel_size=3, stride=1,
               padding=1):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding,
                  bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
        )

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.model = nn.Sequential(
            conv_block(3, 32),
            conv_block(32, 32),
            conv_block(32, 64, stride=2),
            conv_block(64, 64),
            conv_block(64, 64),
            conv_block(64, 128, stride=2),
            conv_block(128, 128),
            conv_block(128, 256),
            conv_block(256, 256),
            nn.AdaptiveAvgPool2d(1)
            )

        self.classifier = nn.Linear(256, 10)

    def forward(self, x):
        h = self.model(x)
        B, C, _, _ = h.shape
        h = h.view(B, C)
        return self.classifier(h)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


**Device Class and Train/Test Methods**

---

In [0]:
## Code Cell 1.2
import statistics 

class DatasetSplit(torch.utils.data.Dataset):
    def __init__(self, dataset, idxs):
        self.dataset = dataset
        self.idxs = [int(i) for i in idxs]

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset[self.idxs[item]]
        return image, torch.tensor(label)

class Device():
    def __init__(self, net, device_id, trainset, validset, testset, train_idxs, valid_idxs, test_idxs, bias, archetype, lr=0.1,
                      milestones=None, batch_size=128):
        if milestones == None:
            milestones = [25, 50, 75]

        device_net = copy.deepcopy(net)
        optimizer = torch.optim.SGD(device_net.parameters(), lr=lr, momentum=0.9,
                                    weight_decay=5e-4)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        milestones=milestones,
                                                        gamma=0.1)
        self.device_trainset = DatasetSplit(trainset, train_idxs)
        self.trainloader = torch.utils.data.DataLoader(self.device_trainset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        num_workers=2)
        self.device_validset = DatasetSplit(validset, valid_idxs)
        self.validloader = torch.utils.data.DataLoader(self.device_validset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        num_workers=2)
        self.device_testset = DatasetSplit(testset, test_idxs)
        self.testloader = torch.utils.data.DataLoader(self.device_testset,
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        num_workers=2)
        self.nets = []
        self.idx = device_id
        self.ranking = [1.]
        self.active = [1] #either 1 or 0, depending on whether we got rid of it or not
        self.nets.append({
            'net': device_net,
            # 'id': device_id,
            # 'dataloader': device_trainloader, 
            'optimizer': optimizer,
            'scheduler': scheduler,
            'train_loss_tracker': [],
            'train_acc_tracker': [],
            'valid_loss_tracker': [],
            'valid_acc_tracker': [],
            'test_loss_tracker': [],
            'test_acc_tracker': [],
        })

        # Bias and archetype parameters
        self.bias = bias              # Number between 0 and 1 to represent linear comb. of archetypes
        self.archetype = archetype    # An array of possible archetypes
    
    def update_ranking(self, removed=False, duplicate_model_id=-1, offset_rank=-1):
        #print("updating for device ", self.idx)
        zero_threshold = 1 #number of standard deviations away for model deletion cutoff

        if len(self.nets) > 1:
            #print("ranking 1 in the update_ranking method: ", self.ranking)
            metrics = []
            for i in range(len(self.nets)):
                if(len(self.nets[i]['valid_acc_tracker']) > 0):
                    rank = self.nets[i]['valid_acc_tracker'][-1]
                    if(len(self.nets[i]['valid_acc_tracker']) >= 3):
                        rank = (self.nets[i]['valid_acc_tracker'][-1]+self.nets[i]['valid_acc_tracker'][-2]+self.nets[i]['valid_acc_tracker'][-3])/3
                    if duplicate_model_id == i and offset_rank != -1:
                        rank = offset_rank      # Heavily rank the devices that are underperforming for new models and vice versa
                    if rank == 0:
                        rank += 0.001
                    metrics.append(rank)
                else:
                    metrics.append(50)
            
            #if we added more models, add active trackers for them
            while(len(self.nets) != len(self.active)):
                self.active.append(1)
            if removed:       # Auto-set a model as inactive if it was already removed
                self.active[duplicate_model_id] = 0
            

            # normalization first time (with self.active)
            self.ranking = [metrics[i]*self.active[i]/sum(metrics) for i in range(len(metrics))]

            
            #print("ranking 2 in the update_ranking method: ", self.ranking)

            nonzero_elts = np.array(self.active).nonzero()[0]
            nonzero_arr = []
            for i in nonzero_elts:
                nonzero_arr.append(self.ranking[i])
            
            
            # Remove models that are underperforming
            if offset_rank == -1:   # only remove if not duplicating round
                max_rank = max(self.ranking)
                if len(nonzero_elts) > 1:
                    std = statistics.stdev(nonzero_arr)
                    mean = sum(nonzero_arr)/len(nonzero_arr)

                    # remove models that are underperforming
                    for j in range(len(self.ranking)):
                        if self.active[j] != 0:
                            if(mean - self.ranking[j] > zero_threshold*std):
                                self.ranking[j] = 0
                                self.active[j] = 0
                            elif( len(self.ranking) > 3 and (self.ranking[j] * 10 < max_rank)):
                                self.ranking[j] = 0
                                self.active[j] = 0

            bool_ranking_below_zero = False
            # Add noise
            # noise = random.gauss(0, statistics.stdev(self.ranking))
            noise = random.gauss(0, 0.01)
            if len(nonzero_elts) == 1:
                i = nonzero_elts[0]
            else:
                i = nonzero_elts[random.randint(0, len(nonzero_elts)-1)]
                for j in range(len(self.ranking)):
                    if(j != i):
                        self.ranking[j] -= noise/(len(nonzero_elts)-1)
                        if self.ranking[j] < 0:
                          bool_ranking_below_zero = True
            
            self.ranking[i] += noise
            if self.ranking[i] < 0:
                bool_ranking_below_zero = True

            # for i in range(len(self.ranking)):
                # noise = random.gauss(0, 0.2)
                # self.ranking[i] += noise
                # self.ranking[i] = max(0, self.ranking[i])

            # Normalize again
            if(bool_ranking_below_zero):
                self.ranking = [self.ranking[i]-min(self.ranking) for i in range(len(self.ranking))]
            self.ranking = [self.ranking[i]*self.active[i]/sum(self.ranking) for i in range(len(self.ranking))]
            #print("ranking 3 in the update_ranking method: ", self.ranking)
            

def create_devices(net, trainset, validset, testset, train_idxs, valid_idxs, test_idxs, bias, archetype, lr=0.1,
                  milestones=None, batch_size=128, num_devices=2):
    devices_lst = [Device(net, i, trainset, validset, testset, train_idxs[i], valid_idxs[i], test_idxs[i], bias[i], archetype[i], lr,
                  milestones, batch_size) for i in range(num_devices)]
    return devices_lst
      
  
def train(epoch, device, model_id):
    device.nets[model_id]['net'].train()
    train_loss, correct, total = 0, 0, 0

    dataset = device.device_trainset
    dataloader = device.trainloader

    for batch_idx, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.cuda(), targets.cuda()
        device.nets[model_id]['optimizer'].zero_grad()
        outputs = device.nets[model_id]['net'](inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        device.nets[model_id]['optimizer'].step()
        train_loss += loss.item()
        device.nets[model_id]['train_loss_tracker'].append(loss.item())
        loss = train_loss / (batch_idx + 1)
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        acc = 100. * correct / total
        dev_id = device.idx
        #if epoch == local_epochs - 1:
            #sys.stdout.write(f'\r(Device {dev_id}/Epoch {epoch}) ' + 
            #                f'Train Loss: {loss:.3f} | Train Acc: {acc:.3f}')
            #sys.stdout.flush()
    test_loss = 0
    outputs = [0]
    device.nets[model_id]['train_acc_tracker'].append(acc)
    #sys.stdout.flush()

def validate(epoch, device, model_id):
    device.nets[model_id]['net'].eval()
    test_loss, correct, total = 0, 0, 0

    dataset = device.device_validset
    dataloader = device.validloader
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = device.nets[model_id]['net'](inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            device.nets[model_id]['valid_loss_tracker'].append(loss.item())
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            loss = test_loss / (batch_idx + 1)
            acc = 100.* correct / total
        test_loss = 0
        outputs = [0]
    # if epoch == local_epochs - 1:
        # print("Device: " + str(device.idx) + " VALIDATION: model_id # " + str(model_id))
        # sys.stdout.write(f' | Valid Loss: {loss:.3f} | Valid Acc: {acc:.3f}\n')
        # sys.stdout.flush()  
    acc = 100.*correct/total
    device.nets[model_id]['valid_acc_tracker'].append(acc)
    device.nets[model_id]['net'].train()

def test(epoch, device, model_id, dataset, dataloader):
    criterion = nn.CrossEntropyLoss()

    device.nets[model_id]['net'].eval()
    test_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = device.nets[model_id]['net'](inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()

            # print("test", loss.item(), targets, batch_idx)
            
            device.nets[model_id]['test_loss_tracker'].append(loss.item())
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            loss = test_loss / (batch_idx + 1)
            #print('loss:', loss)
            acc = 100.* correct / total
        test_loss = 0
        outputs = [0]
    # sys.stdout.write(f' | Test Loss: {loss:.3f} | Test Acc: {acc:.3f}\n')
    # sys.stdout.flush()  
    acc = 100.*correct/total
    device.nets[model_id]['test_acc_tracker'].append(acc)

    device.nets[model_id]['net'].train()
    return ('%.3f' % loss, '%.3f' % acc)


In [0]:
## Code Cell 1.3
import random #to use the random.sample method

def iid_sampler(dataset, num_devices, data_pct):
    '''
    dataset: PyTorch Dataset (e.g., CIFAR-10 training set)
    num_devices: integer number of devices to create subsets for
    data_pct: percentage of training samples to give each device
              e.g., 0.1 represents 10%

    return: a dictionary of the following format:
      {
        0: [3, 65, 2233, ..., 22] // device 0 sample indexes
        1: [0, 2, 4, ..., 583] // device 1 sample indexes
        ...
      }

    iid (independent and identically distributed) means that the indexes
    should be drawn independently in a uniformly random fashion.
    '''

    # total number of samples in the dataset
    total_samples = len(dataset)

    # Part 1.1: Implement!
    arr = [i for i in range(total_samples)] #create an arrray of length total_samples
    d = {} #initialize the dictonary
    for i in range(num_devices): #for every device
        d[i] = random.sample(list(arr), k=round(data_pct*total_samples)) #select data_pct*total_samples from the array, without replacement
    return d #return the dictionary

**Implementing Components for Federated Learning**

---

In [0]:
## Code Cell 1.5

def model_average_weight(devices, model_id):
    '''
    devices: a list of devices generated by create_devices
    Returns an the average of the weights.
    '''
    d_id = 0
    while(d_id < len(devices) and devices[d_id].active[model_id] == 0):
        d_id += 1

    if(d_id >= len(devices)):
        return None
        
    global_tensors = copy.deepcopy(devices[d_id].nets[model_id]['net'].state_dict()) #initialize a global tensor with the weights of the first device
    ranking_sum = devices[d_id].ranking[model_id]

    for i in range(0, len(devices)):#iterate over the remaining devices
        if(i == d_id):
            for j in global_tensors.keys(): #add the tensors together by the key they are indexed by
                global_tensors[j] = global_tensors[j]*devices[d_id].ranking[model_id]
        if(devices[i].active[model_id] == 1):
            #for easy/ less complicated referencing, store the device and the state_dict
            d = devices[i]
            d_tensors = d.nets[model_id]['net'].state_dict()
            for j in global_tensors.keys(): #add the tensors together by the key they are indexed by
                global_tensors[j] += d_tensors[j]*d.ranking[model_id]
            ranking_sum += devices[i].ranking[model_id]

    for j in global_tensors.keys(): #average each tensor by the number of devices
        global_tensors[j] = global_tensors[j]/ranking_sum
    return global_tensors #return the averaged weights



def get_devices_for_round(devices, device_pct):
    '''
    This function will select a percentage of devices to participate in each training round.
    '''
    # Part 1.2: Implement!
    #randomly choose device_pct*len(devices) devices from the devices array without replacement
    arr = random.sample(devices, k=round(device_pct*len(devices)))
    return arr

---

### **2. Non-IID Testing and Archetype Definition Code**

---


**Non-iid Sampling**

---

In [0]:
## Code Cell 2.1

# creates noniid TRAINING and VALIDATION datasets for each group
def noniid_group_sampler(dataset, num_items_per_device, archetype, bias):
    '''
    dataset: PyTorch Dataset (e.g., CIFAR-10 training set)
    num_items_per_device: how many samples to assign to each device
    archetype: a dictionary of arrays representing the labels that is predominantly represented by this edge device
        device index -> array of archetypes
    bias: a dictionary of the percent of samples that are represented by the archetype
        device index -> value from 0 to 1

    return: a dictionary of the following format:
      {
        0: [3, 65, 2233, ..., 22] // device 0 sample indexes
        1: [0, 2, 4, ..., 583] // device 1 sample indexes
        ...
      }

    '''
    #label dict stores the indexes of the dataset examples that fall into the ith group of CIFAR
    label_dict = {}
    for i in range(0, 10): #assuming CIFAR, which has labels 0-9
        label_dict[i] = []
    for i in range(len(dataset)):
        label = dataset[i][1]
        label_dict[label].append(i)
    
    num_devices = len(archetype)

    final_dict = {} #final dict is to be returned
    for i in range(num_devices):
        bias_group = []
        not_bias_group = []
        archs = [0,1,2,3,4,5] #6 archetypes
        for j in label_dict.keys():
            if(j in archetype[i]):
                bias_group += label_dict[j]
            #else:
            if(archetype[i][0] in [0,1,2]): #two meta-archetypes
                if(j in [0,1,2] and j != archetype[i][0]):
                    not_bias_group += label_dict[j]
            elif(archetype[i][0] in [3,4,5]): #two meta-archetypes
                if(j in [3,4,5] and j != archetype[i][0]):
                    not_bias_group += label_dict[j]

        exs = random.sample(bias_group, int(num_items_per_device*bias[i]))
        exs.extend(random.sample(not_bias_group, num_items_per_device-int(num_items_per_device*bias[i])))
        random.shuffle(exs)
        final_dict[i] = exs
    return final_dict

---
**Group-based Testing**



In [0]:
## Code Cell 2.3

# creates noniid TEST datasets for each group
def cifar_noniid_group_test(dataset):

    #label dict stores the indexes of the dataset examples that fall into the ith group of CIFAR
    label_dict = {}
    for i in range(0, 10): #assuming CIFAR, which has labels 0-9, change later
        label_dict[i] = []
    for i in range(len(dataset)):
        label = dataset[i][1]
        label_dict[label].append(i)
    return label_dict

# gets per-group accuracy of global model
def test_group(epoch, device, model_id, label_dict, dataset = testset):
    
    net = device.nets[model_id]['net']
    net.eval() #turn the net into evaluaton mode
    # sys.stdout.write(' | accuracy: ')
    with torch.no_grad():
        #for group in label_dict.keys(): 
        for group in [0,1,2,3,4,5]: #6 archetypes
            test_loss, correct, total = 0, 0, 0
            new_dataset = DatasetSplit(dataset, label_dict[group])
            dataloader = torch.utils.data.DataLoader(new_dataset, batch_size=128, shuffle=False,
                                            num_workers=2)
            for batch_idx, (inputs, targets) in enumerate(dataloader):
                inputs, targets = inputs.cuda(), targets.cuda()
                outputs = net(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item()
                #print("test_group", loss.item())
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
            # Compute and print loss and accuracy at the end of the group
            loss = test_loss / (batch_idx + 1)
            acc = 100.* correct / total
            # sys.stdout.write(f'{acc:.3f} | ')

            outputs = [0]
            test_loss = 0
    net.train()
    # sys.stdout.write('\n')
    sys.stdout.flush()  


In [7]:
## Code Cell 3.1

def quantizer(input, nbit):
    '''
    input: full precision tensor in the range [0, 1]
    return: quantized tensor
    '''
    scale_factor = 1 / (2**nbit -  1)

    # scale input by inverse of scale_factor and round to nearest integer
    output = input / scale_factor
    output = torch.round(output)

    # scale rounded output back and return
    output *= scale_factor
    return output

# Test Code
test_data = torch.tensor([i/11 for i in range(11)])

# ground truth results of 4-bit quantization
ground_truth = torch.tensor([0.0000, 0.0667, 0.2000, 0.2667, 0.3333, 0.4667,
                             0.5333, 0.6667, 0.7333, 0.8000, 0.9333])

# output of your quantization function
quantizer_output = quantizer(test_data, 4)

if torch.allclose(quantizer_output, ground_truth, atol=1e-04):
    print('Output of Quantization Matches!')
else:
    print('Output of Quantization DOES NOT Match!')


## Code Cell 3.2

def quantize_model(model, nbit):
    '''
    Used in Code Cell 3.3 to quantize the ConvNet model
    '''
    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            m.weight.data, m.adaptive_scale = dorefa_g(m.weight, nbit)
            if m.bias is not None:
                m.bias.data,_ = dorefa_g(m.bias, nbit, m.adaptive_scale)

def dorefa_g(w, nbit, adaptive_scale=None):
    '''
    w: a floating-point weight tensor to quantize
    nbit: the number of bits in the quantized representation
    adaptive_scale: the maximum scale value. if None, it is set to be the
                    absolute maximum value in w.
    '''
    if adaptive_scale is None:
        adaptive_scale = torch.max(torch.abs(w))
    
    # follows equations above
    sigma = torch.rand(w.shape) - 0.5
    noise = sigma / (2**nbit - 1)
    # avoid type errors
    noise = noise.type(w.type())
    inp = w / (2*adaptive_scale) + 0.5 + noise
    w_q = 2*adaptive_scale * (quantizer(inp, nbit) - 0.5)

    return w_q, adaptive_scale


# Test Code
test_data = torch.tensor([i/11 for i in range(11)])

# ground truth results of 4-bit quantization
ground_truth = torch.tensor([-0.0606, 0.0606, 0.1818, 0.3030, 0.3030, 0.4242,
                             0.5455, 0.5455, 0.7879, 0.7879, 0.9091])

# output of your quantization function
torch.manual_seed(43)
quantizer_output, adaptive_scale = dorefa_g(test_data, 4)

if torch.allclose(quantizer_output, ground_truth, atol=1e-04):
    print('Output of Quantization Matches!')
else:
    print('Output of Quantization DOES NOT Match!')

Output of Quantization Matches!
Output of Quantization Matches!


---
**Federated Learning Results in Non-IID Setting**

In [0]:
# Train model on each device
# Get rankings on each device
# Update weights

# use these parameters
rounds = 45
local_epochs = 3
num_devices = 18
num_labels = 6
num_items_per_device = 5000
device_pct = 0.5
data_pct = 0.1
net = ConvNet().cuda()
criterion = nn.CrossEntropyLoss()
#duplicate_milestones = [2, 5, 15]
duplicate_milestones = [5, 15, 25, 30]

devices_archetype = [[i//3] for i in range(num_devices)]
devices_bias = [random.uniform(0.6, 0.7) for i in range(num_devices)]

'''
device 0-2: 85% 0, 15% 1,2
device 3-5: 85% 1, 15% 0,2
device 6-8: 85% 2, 15% 0,1
device 9-11: 85% 3, 15% 4,5
device 12-14: 85% 4, 15% 3,5
device 15-17: 85% 5, 15% 3,4
'''
#data_idxs = iid_sampler(trainset, num_devices, data_pct) #this is in the uniform case, without archetypes
train_idxs = noniid_group_sampler(trainset, num_items_per_device, devices_archetype, devices_bias)
valid_idxs = noniid_group_sampler(validset, num_items_per_device // 3, devices_archetype, devices_bias)
test_idxs_device = noniid_group_sampler(testset, 500, devices_archetype, devices_bias)

label_dict_test = cifar_noniid_group_test(testset)
# test_idxs = label_dict_test[0] + label_dict_test[1]
test_idxs = []
for i in range(0, num_labels):
    test_idxs += label_dict_test[i] 
random.shuffle(test_idxs)

arch_testset = DatasetSplit(testset, test_idxs)
test_dataloader = torch.utils.data.DataLoader(arch_testset, batch_size=128,
                                                shuffle=True, num_workers=2)

label_dict_valid = cifar_noniid_group_test(validset)

#print(devices_archetype)
#print(data_idxs)

In [11]:
#nbit = 8
model_id_lst = [0]

## Device creation
devices = create_devices(net, trainset, validset, testset, train_idxs, valid_idxs, test_idxs_device, devices_bias, devices_archetype, num_devices=num_devices)
print('Devices', len(devices))

## NON- IID Federated Learning
##
##
##
start_time = time.time()
for round_num in range(rounds):
  
    # Part 1.3: Implement getting devices for each round here
    round_devices = get_devices_for_round(devices, device_pct)
    # print('Round Devices', round_devices)

    #print('--------------Round: ' + str(round_num) + "-------------")
    
    for device in round_devices:
        for model_id in model_id_lst:
            if(device.active[model_id] != 0):
                # Training
                for local_epoch in range(local_epochs):
                    train(local_epoch, device, model_id) 
                    # print("Device: " + str(device.idx) + " VALIDATION: model_id # " + str(model_id))
                    # validate(local_epoch, device, model_id) 
                    # print()
                    # test_group(round_num, device, model_id, label_dict_valid, validset)   
                # after training, quantize the learned model
                #quantize_model(device.nets[model_id]['net'], nbit)
        

    # Part 1.3: Implement weight averaging here
    for model_id in model_id_lst:
        w_avg = model_average_weight(round_devices, model_id)

        if(w_avg != None):
            for device in devices:
                if(device.active[model_id]!= 0):
                    device.nets[model_id]['net'].load_state_dict(w_avg)
                    device.nets[model_id]['optimizer'].zero_grad()
                    device.nets[model_id]['optimizer'].step()
                    device.nets[model_id]['scheduler'].step()
            
        # test accuracy with highest ranking model
        if((devices[0].active[model_id] == 1) and (devices[0].ranking[model_id] == max(devices[0].ranking))):
            # print()
            # print("ALL-TEST ACCURACY")
            test(round_num, devices[0], model_id, arch_testset, test_dataloader)
            # print("ALL-TEST TEST GROUPS ACCURACY")
            # test_group(round_num, devices[0], model_id, label_dict_test)
    
    # Validation
    if round_num not in duplicate_milestones:
        for device in round_devices:
            for model_id in model_id_lst:
                if(device.active[model_id] != 0):
                    validate(local_epochs - 1, device, model_id)    # <- there are print statements here
            # Figure out rankings here
            device.update_ranking()

    # Testing with IID from device
    test_iid_results = []
    for index in range(len(devices)):
        device = devices[index]
        max_model = device.ranking.index(max(device.ranking))
        test_iid_results.append(float(test(round_num, device, max_model, device.device_testset, device.testloader)[1]))

    #print(round_num, test_iid_results)
    """
    active_arr_tracker = [sum(devices[i].active) for i in range(len(devices))]
    print(round_num, active_arr_tracker)
    for i in range(len(devices)):
        print(i, devices[i].ranking)
    """

    #duplicate all models
    if(round_num in duplicate_milestones):
        # Run validation and update rankings for everyone
        for device in devices:
            for model_id in model_id_lst:
                if(device.active[model_id] != 0):
                    validate(local_epochs - 1, device, model_id)    # <- there are print statements here
            # Figure out rankings here
            device.update_ranking()

        # Number of nets to duplicate
        nets_to_create = len(model_id_lst)
        for model_id in range(0, nets_to_create):
            for device in devices:
                if device.active[model_id] != 0:    # If model wasn't already removed
                    device_net = ConvNet().cuda()
                    device_net.load_state_dict(device.nets[model_id]['net'].state_dict())
                    # device_net = copy.deepcopy(device.nets[model_id]['net'])
                    # optimizer = copy.deepcopy(device.nets[model_id]['optimizer'])
                    # scheduler = copy.deepcopy(device.nets[model_id]['scheduler'])
                    # train_loss_tracker = [copy.deepcopy(device.nets[model_id]['train_loss_tracker'][-1])]
                    # print('model 0:', device.nets[0]['train_acc_tracker'])
                    # train_acc_tracker = [copy.deepcopy(device.nets[model_id]['train_acc_tracker'][-1])]
                    valid_loss_tracker = [copy.deepcopy(device.nets[model_id]['valid_loss_tracker'][-1])]
                    valid_acc_tracker = [100 - copy.deepcopy(device.nets[model_id]['valid_acc_tracker'][-1])]
                    optimizer = torch.optim.SGD(device_net.parameters(), lr=0.1, momentum=0.9,
                                                weight_decay=5e-4)
                    rounds_passed = len(device.nets[model_id]['train_acc_tracker'])
                    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                                    milestones=[25-rounds_passed, 50-rounds_passed, 75-rounds_passed],
                                                                    gamma=0.1)
                    device.nets.append({
                        'net': device_net,
                        'optimizer': optimizer,
                        'scheduler': scheduler,
                        'train_loss_tracker': [],
                        'train_acc_tracker': [],
                        'valid_loss_tracker': valid_loss_tracker,
                        'valid_acc_tracker': valid_acc_tracker,
                        'test_loss_tracker': [],
                        'test_acc_tracker': [],
                    })
                    device.active.append(1)
                    # Heavily rank the devices that are underperforming for new models and vice versa
                    if len(valid_acc_tracker) > 0:
                        device.update_ranking(removed = False, duplicate_model_id = model_id + nets_to_create, offset_rank = valid_acc_tracker[-1])
                    else:
                        device.update_ranking()

                else:                           # If model was already removed
                    device.nets.append({
                        'valid_acc_tracker': [0.],
                    })
                    device.active.append(0)
                    device.update_ranking(removed = True, duplicate_model_id = model_id + nets_to_create)

            model_id_lst.append(model_id + nets_to_create)
    # print('model id list:', model_id_lst)
    # print('best model:', [device.ranking.index(max(device.ranking)) for device in devices])
    # for device in devices:
        # print("device's active models:", device.active)
        # print("device " + str(device.idx) + " ranking: " + str(device.ranking))
        # print("archetype:", device.archetype)


total_time = time.time() - start_time
print('Total training time: {} seconds'.format(total_time))

Devices 18
0 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
0  hi  [1.0]
1  hi  [1.0]
2  hi  [1.0]
3  hi  [1.0]
4  hi  [1.0]
5  hi  [1.0]
6  hi  [1.0]
7  hi  [1.0]
8  hi  [1.0]
9  hi  [1.0]
10  hi  [1.0]
11  hi  [1.0]
12  hi  [1.0]
13  hi  [1.0]
14  hi  [1.0]
15  hi  [1.0]
16  hi  [1.0]
17  hi  [1.0]
1 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
0  hi  [1.0]
1  hi  [1.0]
2  hi  [1.0]
3  hi  [1.0]
4  hi  [1.0]
5  hi  [1.0]
6  hi  [1.0]
7  hi  [1.0]
8  hi  [1.0]
9  hi  [1.0]
10  hi  [1.0]
11  hi  [1.0]
12  hi  [1.0]
13  hi  [1.0]
14  hi  [1.0]
15  hi  [1.0]
16  hi  [1.0]
17  hi  [1.0]
2 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
0  hi  [1.0]
1  hi  [1.0]
2  hi  [1.0]
3  hi  [1.0]
4  hi  [1.0]
5  hi  [1.0]
6  hi  [1.0]
7  hi  [1.0]
8  hi  [1.0]
9  hi  [1.0]
10  hi  [1.0]
11  hi  [1.0]
12  hi  [1.0]
13  hi  [1.0]
14  hi  [1.0]
15  hi  [1.0]
16  hi  [1.0]
17  hi  [1.0]
3 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
0  hi  [1.0]
1  hi  [1.0]
2  hi  [1

In [0]:
model_id_lst = [0] 

## Device creation
devices = create_devices(net, trainset, validset, testset, train_idxs, valid_idxs, test_idxs_device, devices_bias, devices_archetype, num_devices=num_devices)
print('Devices', len(devices))

## NON-IID Federated Learning
##  Control (no duplicating, no weighting)
##
##
start_time = time.time()
for round_num in range(rounds):
  
    # Part 1.3: Implement getting devices for each round here
    round_devices = get_devices_for_round(devices, device_pct)
    # print('Round Devices', round_devices)

    print('--------------Round: ' + str(round_num) + "-------------")
    
    for device in round_devices:
        for model_id in model_id_lst:
            for local_epoch in range(local_epochs):
                train(local_epoch, device, model_id) 
                validate(local_epoch, device, model_id) 
                    # print()
                    # print("Device: " + str(device.idx) + " VALIDATION: model_id # " + str(model_id))
                test_group(round_num, device, model_id, label_dict_valid, validset)   

    # Part 1.3: Implement weight averaging here
    for model_id in model_id_lst:
        w_avg = model_average_weight(round_devices, model_id)

        for device in devices:
            device.nets[model_id]['net'].load_state_dict(w_avg)
            device.nets[model_id]['optimizer'].zero_grad()
            device.nets[model_id]['optimizer'].step()
            device.nets[model_id]['scheduler'].step()
            
        # test accuracy with highest ranking model
        if((devices[0].active[model_id] == 1) and (devices[0].ranking[model_id] == max(devices[0].ranking))):
            print()
            print("ALL-TEST ACCURACY")
            test(round_num, devices[0], model_id, arch_testset, test_dataloader)
            print("ALL-TEST TEST GROUPS ACCURACY")
            test_group(round_num, devices[0], model_id, label_dict_test)
    
    for index in range(len(devices)):
        device = devices[index]
        max_model = device.ranking.index(max(device.ranking))
        print("TESTING WITH IID FROM DEVICE ", devices.index(device))
        test(round_num, device, max_model, device.device_testset, device.testloader)


total_time = time.time() - start_time
print('Total training time: {} seconds'.format(total_time))