In [1]:
import pickle
from copy import deepcopy

from torchvision import datasets

from src import *

In [2]:
def standardTopology(K, standardTopo):
    custom = CustomTopology(K)
    A = nx.adjacency_matrix(standardTopo(K).to_networkx()).toarray()
    custom.set_neighbours(A)
    custom.to_networkx()
    return custom

### 1. Retrieve end topology with logs from topology search
We use the logs to retrieve the topologies found from the iterative method. We use that topology for following experiment.

In [5]:
datasetMNIST  = datasets.MNIST(root="./", download=True)
load = Loader()
mnist, mnistL = load.toArray(datasetMNIST)
mnist, mnistL = (torch.tensor(mnist).float(), torch.tensor(mnistL))
print('shape of dataset from MNIST {}'.format(mnist.shape))
trainData, trainL= mnist[:-20000], mnistL[:-20000]
valData , valL   = mnist[-20000:-10000], mnistL[-20000:-10000]
testData, testL  = mnist[-10000:], mnistL[-10000:]

shape of dataset from MNIST torch.Size([60000, 28, 28])


In [6]:
s = 3
nbEpoch    = 100
batchSize  = 100
np.random.seed(s)
nodesNoneIIDData = []
indexIID = mnist_noniid(trainData, trainL, 8)

for i in range(8):
    tmpX = mnist[indexIID[i]].reshape(-1, batchSize, 1, 28,28)
    tmpy = mnistL[indexIID[i]].reshape(-1, batchSize)
    nodesNoneIIDData.append((tmpX,tmpy))

In [6]:
with open('./logs/scoresValImprovedBinary.pkl','rb') as f:
    valImp   = pickle.load(f)
    
startingTopology = BinaryTreeTopology
topology = standardTopology(8, startingTopology)

topologies = []
for resultdfVal in valImp[s]:
    topologies.append(deepcopy(topology))
    nodeMin     = np.argmin(resultdfVal[resultdfVal['Epoch']==nbEpoch]['acc'])
    node2Change = resultdfVal[resultdfVal['Epoch']==nbEpoch].iloc[nodeMin]['node#']

    # Let's find a non neighbour node to add into the neighbours 
    # so as to perhaps have better performance later on
    cur_neighbours = topology.get_neighbors(int(node2Change))
    exist          = set()
    for cur in cur_neighbours:
        exist      = exist.union(set(np.array(nodesNoneIIDData[cur][1].flatten())))

    non_neighbours = list(set(range(8)) - set(cur_neighbours) - set([int(node2Change)]))
    if len(non_neighbours) == 0: break
    idxBest        = np.argmax([len(exist.union(set(np.array(nodesNoneIIDData[noncur][1].flatten())))) 
                       for noncur in non_neighbours])
    node2attach    = non_neighbours[idxBest]

    topology.adjency[int(node2Change),node2attach] = 1
    topology.adjency[node2attach,int(node2Change)] = 1
    topology.to_networkx()

### 2. Performance evaluation on 10 runs with fixed distribution but 10 seeds for model weight initialization and D-SGD process
Previously we computed 10 runs on both seeded 10 distributions and training runs so as to not overfit on one of distribution. After finding a decent topology we use do once again 10 runs with a fixed distribution to verify the better accuracy in both validation and test dataset that the new topology yields. 

In [7]:
scoresTrain  = []
scoresVal    = []
scoresTest   = []
scoresDist   = []

# PARAM DEFINING 
K = 8 # number of nodes
nbEpoch    = 100
batchSize  = 100
nodesValidation = (valData.reshape(-1, valData.shape[0], 1, 28, 28), valL.reshape(-1,valL.shape[0]))
nodesTest       = (testData.reshape(-1, testData.shape[0], 1, 28, 28), testL.reshape(-1,testL.shape[0]))


# Generating NON-IID Dataset distribution
nodesNoneIIDData = []
np.random.seed(s)
indexIID = mnist_noniid(trainData, trainL, K)

for i in range(K):
    tmpX = mnist[indexIID[i]].reshape(-1, batchSize, 1, 28,28)
    tmpy = mnistL[indexIID[i]].reshape(-1, batchSize)
    nodesNoneIIDData.append((tmpX,tmpy))

# LOOP FOR STATIC TOPOLOGY
for t in tqdm(range(10)):
    torch.manual_seed(t) # fix seed for model training
    resultsTrain = []
    resultsVal   = []
    resultsDist  = []    
    resultsModel = []
    ret   = []    
    for k in [0]: # use selected 6th topology, and so a topology with ring + 5 edges

        # Reset topology that we improve
        topology = TorusTopology(K, dimension='3d')

        resultdfTrain, resultdfVal, cdist, models = computeDecentralize(nodesNoneIIDData, topology, 
                                                            K, max_epoch=nbEpoch,
                                                           validation=nodesValidation)
        resultsTrain.append(resultdfTrain)
        resultsVal.append(resultdfVal)
        resultsDist.append(cdist)
        resultsModel.append(models)

        # Test accuracy with obtained model
        loss_fn = nn.CrossEntropyLoss()
        graph   = topology.to_networkx()

        test_stats = {str(node):[] for node in graph.nodes()}
        for node_idx in graph.nodes():

            lossTest, accTest = subopt(nodesTest, models[node_idx], loss_fn)
            curTestDir        = {'loss': lossTest, 'acc': accTest}
            test_stats[str(node_idx)].append(curTestDir)

        resultdfTest = getLogs(test_stats)
        ret.append(resultdfTest)

    # LOGGING THE RESULTS
    scoresTrain.append(resultsTrain)
    scoresVal.append(resultsVal)
    scoresDist.append(resultsDist)
    scoresTest.append(ret)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

In [8]:
with open('./trainingLogs/scoresTrainTorus.pkl','wb') as f:
    pickle.dump(scoresTrain,f)
with open('./trainingLogs/scoresValTorus.pkl','wb') as f:
    pickle.dump(scoresVal,f)
with open('./trainingLogs/scoresTestTorus.pkl','wb') as f:
    pickle.dump(scoresTest,f)
with open('./trainingLogs/scoresDistTorus.pkl','wb') as f:
    pickle.dump(scoresDist,f)    