In [1]:
using Random
import Base:+,-,*,println, sum, broadcasted, size, adjoint, show, dropdims, tanh, reshape
using Base.Iterators:partition, flatten

mutable struct Tensor
    data
    autograd
    creators
    creation_op
    id
    children
    grad 
    index_select_indices
    softmax_output
    target_dist
    parent_shape
    
    function Tensor(data; autograd=false, creators=nothing, creation_op = nothing, id=nothing)
        if isnothing(id)
            id = rand(1:100000)
        end
        T = new(data, autograd, creators, creation_op, id)
        T.children = Dict()
        T.grad = nothing
        T.index_select_indices = nothing
        
        if !(isnothing(creators))
            for c in creators
                if haskey(c.children, T.id)
                    c.children[T.id] += 1
                else
                    c.children[T.id] = 1
                end
            end
        end
        return T
    end
end

function all_children_grads_accounted_for(t::Tensor)
    for (id, cnt) in t.children
        if (cnt != 0)
            return false
        end
    end
    return true
end

function backward(t::Tensor, grad=nothing, grad_origin=nothing)
    if t.autograd
        if isnothing(grad)
            grad = Tensor(ones(size(t.data)))
        end
    
        if !(isnothing(grad_origin))
            if t.children[grad_origin.id] == 0
                return
                throw("cannot backprop more than once")
            else
                t.children[grad_origin.id] -= 1
            end
        end
        
        if isnothing(t.grad)
            t.grad = grad
        else
            t.grad += grad
        end
        
        # grads must not have grads of their own
        @assert !grad.autograd
        
        # only continue backpropping if there's something to
        # backprop into and if all gradients (from children)
        # are accounted for override waiting for children if
        # "backprop" was called on this variable directly
        
        if (!isnothing(t.creators) && (all_children_grads_accounted_for(t) || isnothing(grad_origin)))
            if t.creation_op == "add"
                backward(t.creators[1], t.grad, t)
                backward(t.creators[2], t.grad, t)
            end
            
            if t.creation_op == "sub"
                backward(t.creators[1], t.grad, t)
                backward(t.creators[2], -t.grad, t)
            end
            
            if t.creation_op == "mul"
                new_ = t.grad .* t.creators[2]
                backward(t.creators[1], new_, t)
                new_ = t.grad .* t.creators[1]
                backward(t.creators[2], new_, t)
            end
            
            if t.creation_op == "mm"
                c1 = t.creators[1]
                c2 = t.creators[2]
                new_ =  t.grad * c2' ################
                backward(c1, new_)
                new_ = c1' * t.grad
                backward(c2, new_)
            end
                  
            if t.creation_op == "transpose"
                backward(t.creators[1], t.grad')
            end
            
            if t.creation_op == "reshape"
                backward(t.creators[1], reshape(t.grad, t.parent_shape))
            end
            
            if occursin("sum", t.creation_op)
                dim = parse(Int, split(t.creation_op, "_")[2])
                backward(t.creators[1], expand(t.grad, dim, size(t.creators[1].data)[dim]))
            end
            
            if occursin("expand", t.creation_op)
                dim = parse(Int, split(t.creation_op, "_")[2])
                ndims_cr = ndims(t.creators[1].data)
                backward(t.creators[1], dropdims(sum(t.grad;dims=dim);dims=dim, ndims_cr=ndims_cr))
            end
            
            if t.creation_op == "neg"
                backward(t.creators[1], -t.grad)
            end
            
            if t.creation_op == "sigmoid"
                ones_ = Tensor(ones(size(t.grad.data)))
                backward(t.creators[1], t.grad .* t .* (ones_ - t) )
            end
            
            if t.creation_op == "tanh"
                ones_ = Tensor(ones(size(t.grad.data)))
                backward(t.creators[1], t.grad .* (ones_ - (t .* t)))
            end
            
            if t.creation_op == "index_select"
                new_grad = zeros(size(t.creators[1]))
                indices = t.index_select_indices.data
                major_chunks = partition(1:size(t.grad,2),length(indices))
                grad_chunks = [t.grad.data[:,inds][:,j]  for(i,inds) in enumerate(major_chunks) for j=1:size(inds)[1]]
    
                for (i,ind) in enumerate(flatten(indices))
                    new_grad[:,ind] +=  grad_chunks[i]
                end
                backward(t.creators[1], Tensor(new_grad))
            end
            if t.creation_op == "cross_entropy"
                dx = t.softmax_output .- t.target_dist
                backward(t.creators[1], Tensor(dx))
            end
        end
    end
end
                        
size(a::Tensor) = size(a.data)
size(a::Tensor, ind::Int) = size(a.data, ind)

function +(a::Tensor, b::Tensor)
    if (a.autograd && b.autograd)
        return Tensor(a.data + b.data; autograd=true, creators=[a,b], creation_op = "add")
    end
    return Tensor(a.data+b.data)
end

function -(a::Tensor)
    if (a.autograd)
        return Tensor(a.data .* -1; autograd=true, creators=[a], creation_op = "neg")
    end
    return Tensor(a.data .* -1)
end

function -(a::Tensor, b::Tensor)
    if (a.autograd && b.autograd)
        return Tensor(a.data - b.data; autograd=true, creators=[a,b], creation_op = "sub")
    end
    return Tensor(a.data-b.data)
end

#element-wise multiplication
function broadcasted(f::typeof(*), a::Tensor, b::Tensor)
    new_data = zeros(size(a.data))
    for i=1:length(new_data)
        new_data[i] = f(a.data[i] ,b.data[i])
    end
    if (a.autograd && b.autograd)
        return Tensor(new_data; autograd=true, creators=[a,b], creation_op ="mul")
    end
    return Tensor(new_data)
end

function broadcasted(f::typeof(-), a::Tensor, b::Tensor)
    new_data = zeros(size(a.data))
    for i=1:length(new_data)
        new_data[i] = -(a.data[i] ,b.data[i])
    end
    if (a.autograd && b.autograd)
        return Tensor(new_data; autograd=true, creators=[a,b], creation_op ="sub")
    end
    return Tensor(new_data)
end

function sum(a::Tensor; dims=dims)
    new_ = dropdims(sum(a.data ;dims=dims), dims = tuple(findall(size(a) .== 1)...))
    if (a.autograd)
        return Tensor(new_; autograd=true, creators=[a], creation_op = "sum_"*string(dims))
    end
    return Tensor(new_)
end

function dropdims(a::Tensor;dims=dims,ndims_cr=ndims_cr)
    if ndims(a.data) == ndims_cr
        return a
    end
    if (a.autograd)
        return Tensor(dropdims(a.data ;dims=dims); autograd=true, creators=[a], creation_op = "dropdims")
    end
    return Tensor(dropdims(a.data ;dims=dims))
end

function expand(a::Tensor, dim, copies)
    sz = size(a)
    rep = ntuple(d->d==dim ? copies : 1, length(sz)+1)
    new_size = ntuple(d->d<dim ? sz[d] : d == dim ? 1 : sz[d-1], length(sz)+1)
    new_data =  repeat(reshape(a.data, new_size), outer=rep)
    if (a.autograd)
        return Tensor(new_data; autograd=true, creators=[a], creation_op = "expand_"*string(dim))
    end
    return Tensor(new_data)
end

#transpose
function adjoint(a::Tensor)
    if (a.autograd)
        return Tensor(a.data';autograd=true, creators=[a], creation_op = "transpose")
    end
    return Tensor(a.data')
end

#matrix multiply 
function *(a::Tensor, b::Tensor)
    if (a.autograd && b.autograd)
        return Tensor(a.data * b.data; autograd=true, creators=[a,b], creation_op = "mm")
    end
    return Tensor(a.data * b.data)
end


function index_select_helper(a::Array, indices)
    return reduce(hcat,map(ind -> a[:,ind], indices))
end

function index_select(a::Tensor, indices::Tensor)
    new_ = index_select_helper(a.data, indices.data)
    if (a.autograd)
        T = Tensor(new_, autograd=true, creators=[a], creation_op = "index_select")
        T.index_select_indices = indices
        return T
    end
    return Tensor(new_)
end

function reshape(a::Tensor, shape)
    if (a.autograd)
        T= Tensor(reshape(a.data, shape); autograd=true, creators=[a], creation_op = "reshape")
        T.parent_shape = size(a.data)
        return T
    end
    return Tensor(reshape(a.data, shape))
end


println(t::Tensor) = println(t.data)
show(io::IO,m::MIME"text/plain",a::Tensor) = show(io,m,a.data)
                        
abstract type Layer end

function get_parameters(l::Layer)
    return l.parameters
end

mutable struct Linear <: Layer
    W
    b
    use_bias
    parameters
                            
    function Linear(n_inputs, n_outputs;bias=true)
        linear = new()
        linear.use_bias = bias
        linear.W = Tensor(randn(n_outputs, n_inputs) .* sqrt(2.0/n_inputs), autograd=true)
        if bias
            linear.b = Tensor(zeros(n_outputs), autograd=true) 
            linear.parameters = [linear.W,linear.b]
        else
            linear.parameters = [linear.W]
        end
        return linear
    end
end

function forward(l::Linear, input)
    if l.use_bias
        return (l.W * input)  + expand(l.b,2,size(input.data, 2))
    end
    return l.W * input
end                        

                        
mutable struct Sequential <: Layer
    layers
    function Sequential(layers)
        return new(layers)
    end
end

function add(s::Sequential, layer)
    push!(s.layers, layer)
end

function forward(s::Sequential, input)
    for layer in s.layers
        input = forward(layer, input)
    end
    return input
end

function get_parameters(s::Sequential)
    parameters = [get_parameters(layer) for layer in s.layers]
    return collect(Iterators.flatten(parameters))
end

mutable struct SGD
    parameters
    alpha
    SGD(parameters, alpha) = new(parameters, alpha)
end

function zero!(opt::SGD)
    for p in opt.parameters
        p.grad.data .*= 0.0
    end
end

function step(opt::SGD, zero=true)
    for p in opt.parameters
        p.data -= (p.grad.data .* opt.alpha)
        if zero
            p.grad.data .*= 0.0
        end
    end
end
                        
σ(x) = 1/(1+exp(-x))                        

struct Tanh <: Layer
    Tanh() = new()
end

struct Sigmoid <: Layer
    Sigmoid() = new()
end

function get_parameters(l::Tanh)
    return []
end

function get_parameters(l::Sigmoid)
    return []
end

function forward(l::Sigmoid, a::Tensor)
    if a.autograd
        return Tensor(σ.(a.data); autograd=true, creators=[a], creation_op = "sigmoid")
    end
    return Tensor(σ.(a.data))
end
        
function forward(l::Tanh, a::Tensor)
    if a.autograd
        return Tensor(tanh.(a.data); autograd=true, creators=[a], creation_op = "tanh")
    end
    return Tensor(tanh.(a.data))
end    
                        
                        
mutable struct Embedding <: Layer
    vocab_size
    dim
    weight
    parameters
    # this random initialiation style is just a convention from word2vec
    function Embedding(dim, vocab_size) 
        E = new(vocab_size, dim, Tensor((randn(dim, vocab_size) .- 0.5) ./ dim; autograd=true))
        E.parameters = [E.weight]
        return E
    end
end

function forward(E::Embedding, indices)
    return index_select(E.weight, indices)
end
                        
using Statistics: mean
using LinearAlgebra: I
function softmax(x)
    temp = exp.(x)
    return temp ./ sum(temp;dims=1)
end

struct MSELoss <: Layer
    MSELoss() = new()
end

function forward(l::MSELoss, pred, target)
    return sum((pred - target) .* (pred - target);dims=2)
end

struct CrossEntropyLoss 
    CrossEntropyLoss() = new()
end

function forward(l::CrossEntropyLoss, a::Tensor, target::Tensor)
    softmax_output = softmax(a.data)
    log_out = log.(softmax_output)
    sz = size(a.data, 1)
    identity = 1.0 .* Matrix(I, (sz, sz))
    target_dist = reshape(identity[:,target.data],(size(a.data)))
    loss = -mean(sum(log_out .* target_dist;dims=1))
    if a.autograd
        loss = Tensor(loss; autograd=true, creators=[a], creation_op = "cross_entropy")
        loss.softmax_output = softmax_output
        loss.target_dist = target_dist
        return loss
    end
    return Tensor(loss)
end


mutable struct RNNCell <: Layer
    n_hidden
    
    activation
    
    w_ih
    w_hh
    w_ho
    
    parameters
    
    function RNNCell(n_inputs, n_hidden, n_output, activation="sigmoid")
        if activation == "sigmoid"
            act = Sigmoid()
        elseif activation == "tanh"
            act = Tanh()
        else
            throw("Non-linearity not found")
        end
        
        parameters = []

        w_ih = Linear(n_inputs, n_hidden)
        w_hh = Linear(n_hidden, n_hidden)
        w_ho = Linear(n_hidden, n_output)
        
        push!(parameters, get_parameters(w_ih))
        push!(parameters, get_parameters(w_hh))
        push!(parameters, get_parameters(w_ho))
        parameters = collect(Iterators.flatten(parameters))
        return new(n_hidden, act, w_ih, w_hh, w_ho, parameters)
    end
end

function forward(rnn::RNNCell, input::Tensor, hidden::Tensor)
    from_prev_hidden = forward(rnn.w_hh, hidden)
    combined = forward(rnn.w_ih, input) + from_prev_hidden
    new_hidden = forward(rnn.activation, combined)
    output = forward(rnn.w_ho, new_hidden)
    return output, new_hidden
end

function init_hidden(rnn::RNNCell; batch_size=1)
    return Tensor(zeros(rnn.n_hidden, batch_size), autograd=true)
end

mutable struct LSTMCell <: Layer
    
    n_hidden
    
    xf
    xi
    xo
    xc
    
    hf
    hi
    ho
    hc
    
    w_ho
    parameters
    sigmoid
    tanh
    
    function LSTMCell(n_inputs, n_hidden, n_output)

        xf = Linear(n_inputs, n_hidden)
        xi = Linear(n_inputs, n_hidden)
        xo = Linear(n_inputs, n_hidden)        
        xc = Linear(n_inputs, n_hidden) 
        
        hf = Linear(n_hidden, n_hidden; bias=false)
        hi = Linear(n_hidden, n_hidden; bias=false)
        ho = Linear(n_hidden, n_hidden; bias=false)
        hc = Linear(n_hidden, n_hidden; bias=false) 
        
        w_ho = Linear(n_hidden, n_output; bias=false)
        
        parameters = [get_parameters(i) for i in [xf, xi, xo, xc, hf, hi, hc, w_ho]]
        parameters = collect(Iterators.flatten(parameters))
        
        return new(n_hidden, xf, xi, xo, xc, hf, hi, ho, hc, w_ho, parameters, Sigmoid(), Tanh())
    end
end

function forward(lstm::LSTMCell, input::Tensor, hidden)
    
    prev_hidden = hidden[1]        
    prev_cell = hidden[2]
    
    f = forward(lstm.xf, input) + forward(lstm.sigmoid, forward(lstm.hf, prev_hidden))
    i = forward(lstm.xi, input) + forward(lstm.sigmoid, forward(lstm.hi, prev_hidden))
    o = forward(lstm.xo, input) + forward(lstm.sigmoid, forward(lstm.ho, prev_hidden))
    g = forward(lstm.xc, input) + forward(lstm.tanh, forward(lstm.hc, prev_hidden))
    
    c = (f .* prev_cell) + (i .* g)

    h = o .* forward(lstm.tanh, c)
    
    output = forward(lstm.w_ho, h)
    
    return output, (h, c)
end

function init_hidden(lstm; batch_size=1)
    init_hidden = Tensor(zeros(lstm.n_hidden, batch_size), autograd=true)
    init_cell   = Tensor(zeros(lstm.n_hidden, batch_size), autograd=true)
    
    init_hidden.data[1,:] .+= 1
    init_cell.data[1,:] .+= 1
    return (init_hidden, init_cell)
end

init_hidden (generic function with 2 methods)

# Step 1: Plain Ole Fashioned Deep Learning (Email Spam Detection)

In [2]:
raw = readlines("spam.txt")
raw = filter.(isascii,raw)
vocab = Set()

spam = []

for row in raw
    push!(spam, Set(split(row[1:end-1])))
    for word in spam[end]
        push!(vocab, word)
    end
end

raw = readlines("ham.txt")
raw = filter.(isascii,raw)

ham = []
for row in raw
    push!(ham, Set(split(row[1:end-1])))
    for word in ham[end]
        push!(vocab, word)
    end
end

push!(vocab, "<unk>")

vocab = collect(vocab)

w2i = Dict()
for (i,w) in enumerate(vocab)
    w2i[w] = i
end

function to_indices(input; l=500)
    indices = []
    for line in input
        if length(line) < l
            line = cat(collect(line), repeat(["<unk>"], l-length(line));dims=1)
            idxs = []
            
            for word in line
                
                push!(idxs, w2i[word])
            end
            push!(indices, idxs)
        end
    end
    return indices
end

spam_idx = to_indices(spam)
ham_idx = to_indices(ham)

train_spam_idx = spam_idx[1:end-1001]
train_ham_idx = ham_idx[1:end-1001]

test_spam_idx = spam_idx[end-1000:end]
test_ham_idx = ham_idx[end-1000:end]

train_data = []
train_target = []

test_data = []
test_target = []

for i=1:maximum([length(train_spam_idx), length(train_ham_idx)])
    push!(train_data, train_spam_idx[((i-1)%length(train_spam_idx))+1])
    push!(train_target,[1])
    
    push!(train_data, train_ham_idx[((i-1)%length(train_ham_idx))+1])
    push!(train_target,[0])
end

for i=1:maximum([length(test_spam_idx), length(test_ham_idx)])
    push!(test_data, test_spam_idx[((i-1)%length(test_spam_idx))+1])
    push!(test_target,[1])
    
    push!(test_data, test_ham_idx[((i-1)%length(test_ham_idx))+1])
    push!(test_target,[0])
end
train_target = reduce(hcat,train_target)
test_target = reduce(hcat, test_target);

In [9]:
model = Embedding(1,length(vocab))
sigmoid = Sigmoid()
model.weight.data .*= 0
criterion = MSELoss()
optim = SGD(get_parameters(model), 0.01);

In [3]:
using Base.Iterators: partition
function train(model,input_data, target_data; batch_size=500, iterations=5)
    criterion = MSELoss()
    optim = SGD(get_parameters(model), 0.01)
    
    n_batches = trunc(Int,length(input_data) / batch_size)
    for iter=1:iterations
        iter_loss = 0
        for (j,i) in enumerate(partition(1:length(input_data), batch_size))
            # padding token should stay at 0
            model.weight.data[:,w2i["<unk>"]] *= 0 
            
            input = Tensor(input_data[i], autograd=true)
            target = Tensor(target_data[:,i], autograd=true)
            embed_out = reshape(forward(model, input), (1,length(input_data[1]),length(i)))
            pred = forward(sigmoid, sum(embed_out;dims=2))
            
            loss = forward(criterion, pred,target)
            backward(loss)
            step(optim)

            iter_loss += loss.data[1] / batch_size
            print("\tLoss: $(iter_loss/(j+1)) \r")
        end
        println()
    end
   return model 
end

train (generic function with 1 method)

In [4]:
function test(model, test_data, test_target)
    model.weight.data[:,w2i["<unk>"]] *= 0 
    input = Tensor(test_data, autograd=true)
    # target = Tensor(test_target, autograd=true)
    embed_out = reshape(forward(model, input), (1,500,:))
    pred = forward(sigmoid, sum(embed_out;dims=2))
    pred_labels = Int.(pred.data .>= 0.5) .== test_target
    return round(100*sum(pred_labels)/length(test_target);digits=2)
end

test (generic function with 1 method)

In [23]:
for i=1:3
    model = train(model, train_data, train_target;batch_size=500,iterations=1)
    println("Correct on Test Set: $(test(model, test_data, test_target))")
end

Loss: 0.036542026216407765 
Correct on Test Set: 98.5
Loss: 0.011293550777292613 
Correct on Test Set: 99.15
Loss: 0.008138803819084366 
Correct on Test Set: 99.4


# Basic Federated Learning

In [5]:
bob = (train_data[1:10000], train_target[:,1:10000])
alice = (train_data[10001:20000], train_target[:,10001:20000])
sue = (train_data[20001:end], train_target[:,20001:end]);

In [31]:
model = Embedding(1,length(vocab))
model.weight.data .*= 0;

In [32]:
for i=1:3
    println("Starting Training Round...")
    
    println("\tStep 1: send the model to Bob")
    bob_model = train(deepcopy(model), bob[1], bob[2]; iterations=1)
    
    println("\n\tStep 2: send the model to Alice")
    alice_model = train(deepcopy(model), alice[1], alice[2]; iterations=1)
    
    println("\n\tStep 3: Send the model to Sue")
    sue_model = train(deepcopy(model), sue[1], sue[2]; iterations=1)
    
    println("\n\tAverage Everyone's New Models")
    model.weight.data = (bob_model.weight.data + 
                         alice_model.weight.data + 
                         sue_model.weight.data)./3
    
    println("Correct on Test Set: $(test(model, test_data, test_target))")
    println("\nRepeat!!\n")
end

Starting Training Round...
	Step 1: send the model to Bob
	Loss: 0.08677672659327845 

	Step 2: send the model to Alice
	Loss: 0.08217674407715851 

	Step 3: Send the model to Sue
	Loss: 0.05120595136342707  

	Average Everyone's New Models
Correct on Test Set: 97.65

Repeat!!

Starting Training Round...
	Step 1: send the model to Bob
	Loss: 0.02125951596629757  

	Step 2: send the model to Alice
	Loss: 0.02054173204982453  

	Step 3: Send the model to Sue
	Loss: 0.01952674364865583  

	Average Everyone's New Models
Correct on Test Set: 98.3

Repeat!!

Starting Training Round...
	Step 1: send the model to Bob
	Loss: 0.015777530894521444 

	Step 2: send the model to Alice
	Loss: 0.015155423992275244 

	Step 3: Send the model to Sue
	Loss: 0.014909027517198395 

	Average Everyone's New Models
Correct on Test Set: 98.65

Repeat!!



# Hacking Federated Learning

In [66]:
bobs_email = ["my", "computer", "password", "is", "pizza"]

bob_input = [[w2i[x] for x in bobs_email], [w2i[x] for x in bobs_email]]
bob_target = reshape([0,0],(1,2))

model = Embedding(1,length(vocab))
model.weight.data .*= 0;

bobs_model = train(deepcopy(model), bob_input, bob_target; iterations=1, batch_size=2);

	Loss: 0.125 


In [67]:
for (i,v) in enumerate(bobs_model.weight.data - model.weight.data)
    if (v != 0)
        println(vocab[i])
    end
end

password
pizza
is
my
computer


# Homomorphic Encryption

In [None]:
using Pkg
Pkg.add("Paillier")

In [6]:
using Paillier

In [77]:
public_key, private_key = Paillier.generate_paillier_keypair(1024)

# encrypt the number "5"
x = Paillier.encrypt(public_key, 5)

# encrypt the number "3"
y = Paillier.encrypt(public_key, 3)

# add the two encrypted values
z = x+y

# decrypt the result
z_ = Paillier.decrypt(private_key, z)
println("The Answer: ", z_)

The Answer: 8


# Secure Aggregation

In [48]:
model = Embedding(1,length(vocab))
model.weight.data .*= 0

# note that in production the n_length should be at least 1024
publickey, privatekey = Paillier.generate_paillier_keypair(128)
encoding = Paillier.Encoding(Float64, publickey)


function train_and_encrypt(model, input, target, encoding; iterations=1)
    new_model = train(model, input, target; iterations=iterations)
    encrypted_weights = Paillier.encode_and_encrypt(new_model.weight.data, encoding)
    return encrypted_weights
end

train_and_encrypt (generic function with 1 method)

In [50]:
for i=1:3
    println("Starting Training Round...")
    
    println("\tStep 1: send the model to Bob")
    bob_model = train_and_encrypt(deepcopy(model), bob[1], bob[2], encoding; iterations=1)
    
    
    println("\n\tStep 2: send the model to Alice")
    alice_model = train_and_encrypt(deepcopy(model), alice[1], alice[2], encoding; iterations=1)
    
    println("\n\tStep 3: Send the model to Sue")
    sue_model = train_and_encrypt(deepcopy(model), sue[1], sue[2], encoding; iterations=1)
    
    
    print("\n\tStep 4: Bob, Alice, and Sue send their")
    println("\tencrypted models to each other.")
    
    aggregated_model = zeros(size(bob_model))
    for i=1:length(bob_model)
        aggregated_model[i] = Paillier.decrypt_and_decode(privatekey, bob_model[i] + alice_model[i] + sue_model[i])/3
    end
    model.weight.data = aggregated_model
    
    println("Correct on Test Set: $(test(model, test_data, test_target))")
    println("\nRepeat!!\n")
end

Starting Training Round...
	Step 1: send the model to Bob
	Loss: 0.08677672659327845 

	Step 2: send the model to Alice
	Loss: 0.08217674407715851 

	Step 3: Send the model to Sue
	Loss: 0.05120595136342707  

	Step 4: Bob, Alice, and Sue send their	encrypted models to each other.
Correct on Test Set: 97.65

Repeat!!

Starting Training Round...
	Step 1: send the model to Bob
	Loss: 0.02125951596629757  

	Step 2: send the model to Alice
	Loss: 0.02054173204982453  

	Step 3: Send the model to Sue
	Loss: 0.01952674364865583  

	Step 4: Bob, Alice, and Sue send their	encrypted models to each other.
Correct on Test Set: 98.3

Repeat!!

Starting Training Round...
	Step 1: send the model to Bob
	Loss: 0.015777530894521448 

	Step 2: send the model to Alice
	Loss: 0.015155423992275244 

	Step 3: Send the model to Sue
	Loss: 0.014909027517198394 

	Step 4: Bob, Alice, and Sue send their	encrypted models to each other.
Correct on Test Set: 98.65

Repeat!!

