<a href="https://colab.research.google.com/github/sadrasafa/Comp541-Project/blob/master/FlowGMM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Julia on Colaboratory ##


In [0]:
# Installation cell
%%shell
if ! command -v julia 2>&1 > /dev/null
then
    wget 'https://julialang-s3.julialang.org/bin/linux/x64/1.3/julia-1.3.1-linux-x86_64.tar.gz' \
        -O /tmp/julia.tar.gz
    tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1
    rm /tmp/julia.tar.gz
fi
julia -e 'using Pkg; pkg"add Plots; add PyPlot; add IJulia; add Knet; precompile"'
julia -e 'using Pkg; pkg"build Knet;"'

In [1]:
using Knet
# Test if Knet is using gpu
Knet.gpu()

┌ Info: Precompiling Knet [1902f260-5fb4-5aff-8c31-6271790ab950]
└ @ Base loading.jl:1273
└ @ CuArrays /root/.julia/packages/CuArrays/HE8G6/src/CuArrays.jl:127


0

# Install Libraries

In [0]:
using Pkg;

# Install missing packages
for p in ["Knet", "MLJ", "MLJModels", "Distributions", "Plots", "PyPlot"]
    if !haskey(Pkg.installed(),p)
        Pkg.add(p);
    end
end

using Knet, Plots, Random, PyPlot, MLJ, Distributions, LinearAlgebra

# FlowGMM

In [3]:
atype = (Knet.gpu()>=0 ? Knet.KnetArray{Float32} : Array{Float32})

KnetArray{Float32,N} where N

In [4]:
struct Mask; d; reverse; end

#one argument: (mask) will return x_id and x_change
#two argument: (unmask) will_return concat(y_id, y_change)
function (mask::Mask)(x) 
    len = size(x, 1)
    b = convert(atype,zeros(len,1))
    d = mask.d
    if mask.reverse 
        b[d+1:end,1] .= 1
    else
        b[1:d,1] .= 1
    end
    x_id = x .* b
    x_change = x .* (1 .- b)
    return x_id, x_change
end
function (mask::Mask)(y_id, y_change)
    len = size(y_id, 1)
    b = convert(atype,zeros(len,1))
    d = mask.d
    if mask.reverse 
        b[d+1:end,1] .= 1
    else
        b[1:d,1] .= 1
    end
    return y_id .* b + y_change .* (1 .- b)
end


struct Sequential
    layers
    Sequential(layers...) = new(layers)
end
(s::Sequential)(x) = (for l in s.layers; x = l(x); end; x)

struct DenseLayer; w; b; f; end

DenseLayer(i::Int,o::Int, f=relu) = DenseLayer(param(o,i), param0(o), f)

(d::DenseLayer)(x) = d.f.(d.w * x .+ d.b)



#Coupling Layer
mutable struct CouplingLayer; st_net::Sequential; mask::Mask; logdet; end

function CouplingLayer(;in_dim::Int, hidden_dim::Int, num_layers::Int, mask::Mask)
    layers = []
    push!(layers, DenseLayer(in_dim, hidden_dim, relu))
    for layer in 1:num_layers
        push!(layers, DenseLayer(hidden_dim, hidden_dim, relu))
    end
    push!(layers, DenseLayer(hidden_dim, 2*in_dim, identity))
    st_net = Sequential(layers...)
    CouplingLayer(st_net, mask, 0.0)
end

function (cpl::CouplingLayer)(x)
    x_id, x_change, s, t = get_s_and_t(cpl, x)
    y_change = x_change .* exp.(s) .+ t #in original code, first addition is performed, then exponentiation
    y_id = x_id
    cpl.logdet = sum(s; dims=1)
    return cpl.mask(y_id, y_change)
end
#st is a neural network, the first part of the output is used as s, second part as t
function get_s_and_t(cpl::CouplingLayer, x)
    x_id, x_change = cpl.mask(x)
    st = cpl.st_net(x_id)
    middle = (size(st)[1]+1)÷2
    s, t = st[1:middle,:], st[middle+1:end,:]
    return (x_id, x_change, s, t)
end


struct RealNVP; seq::Sequential; end

function RealNVP(;in_dim::Int, hidden_dim::Int, num_coupling_layers::Int, num_hidden_layers::Int)
    coupling_layers = []
    for i in 1:num_coupling_layers
        push!(coupling_layers, CouplingLayer(;in_dim=in_dim, hidden_dim=hidden_dim, num_layers=num_hidden_layers, mask=Mask(div(in_dim,2), Bool(i%2))))
    end
    seq = Sequential(coupling_layers...)
    RealNVP(seq)
end

(realnvp::RealNVP)(x) = realnvp.seq(x)

function logdet(realNVP::RealNVP)
    total_logdet = 0.0
    for cpl in realNVP.seq.layers
        total_logdet = total_logdet .+ cpl.logdet
    end
    return total_logdet
end

logdet (generic function with 1 method)

In [5]:
function make_moons_ssl()
    Knet.seed!(2020)
    Random.seed!(2020)
    n_samples = 1000
    data = MLJ.make_moons(n_samples;noise=.05,)
    data = convert(atype, permutedims(hcat(data[1][1], data[1][2])))
    labels = convert(atype, ones(1,n_samples)) * (-1)
    idx1 = [1 2 4 5 6]
    labels[idx1] = 1
    idx0 = [3 7 8 11 18]
    labels[idx0] = 0
    return data, labels
end

make_moons_ssl (generic function with 1 method)

In [6]:
struct Prior; means; n_components; d; gaussians; weights; end
#n_components: number of classes
#d: feature dimenstion of data points
#means: d x n_components
#gaussians: we have n_components multivariate-gaussians, each with size d
function Prior(means)
    d, n_components = size(means)
    weights = convert(atype, ones(1, n_components))
    gaussians = []
    for i in 1:n_components
        mu = means[:,i]
        sig = Matrix{Float64}(I, d, d)
        push!(gaussians, MvNormal(mu, sig))
    end
    Prior(means, n_components, d, gaussians, weights)
end

function log_prob(prior::Prior, z, labels=nothing; label_weight=1.0)
    all_log_probs = []
    for g in prior.gaussians
        push!(all_log_probs, logpdf(g, convert(Array{Float32}, z)))
    end
    all_log_probs = convert(atype, hcat(all_log_probs...)) #n_instances x n_components
    mixture_log_probs = logsumexp(all_log_probs .+ log.(softmax(prior.weights)); dims=2)
    if labels == nothing
        return mixture_log_probs
    else
        log_probs = convert(atype, zeros(size(mixture_log_probs)))
        int_labels = convert(Array{Int32}, labels)
        mask = (int_labels .== -1)
        log_probs[mask] += mixture_log_probs[mask]
        for i in 1:prior.n_components
            mask = (int_labels .== (i-1))
            log_probs[mask] += all_log_probs[:, i][mask] * label_weight
        end
        return log_probs
    end
end

log_prob (generic function with 2 methods)

In [71]:
function flow_loss(z, logdet, labels, prior; k=256)
    prior_ll = log_prob(prior, z, labels)

    #I dont know why we are doing this correction
    batch_size = size(z,2)
    kk = length(z) / batch_size 
    
    corrected_prior_ll = prior_ll .- log(k) * kk
    
    ll = corrected_prior_ll + permutedims(logdet)
    nll = -mean(ll)
    return nll
end

flow_loss (generic function with 1 method)

In [58]:
data, labels = make_moons_ssl()
prior = Prior([-3.5 3.5; -3.5 3.5])
realnvp = RealNVP(in_dim=2, hidden_dim=512, num_coupling_layers=5, num_hidden_layers=1)
z = realnvp(data)
sldj = logdet(realnvp)
loss = flow_loss(z, sldj, labels, prior)

23.688066f0

In [0]:
lr = 1e-4
epochs = 2001

num_unlabeled = Int(sum(labels .== -1))
num_labeled = size(labels)[2] - num_unlabeled
batch_size = num_labeled
print_freq = 500

int_labels = convert(Array{Int32}, labels)

mask_labeled = [index[2] for index in findall(label->label!=-1, int_labels)]
labeled_data = data[:,mask_labeled]
labeled_labels = labels[mask_labeled]

mask_unlabeled = [index[2] for index in findall(label->label==-1, int_labels)]
unlabeled_data = data[:, mask_unlabeled]
unlabeled_labels = labels[mask_unlabeled]

for p in Knet.params(realnvp)
    p.opt = Adam(;lr=lr)
end

for epoch in 1:epochs
    batch_idx = Distributions.sample(1:num_unlabeled, batch_size, replace=false)
    batch_x, batch_y = unlabeled_data[:, batch_idx], unlabeled_labels[batch_idx]
    batch_x = hcat(batch_x, labeled_data)
    batch_y = vcat(batch_y, labeled_labels)

    z = realnvp(batch_x)
    sldj = logdet(realnvp)
    loss = @diff flow_loss(z, sldj, batch_y, prior)
    for p in Knet.params(realnvp)
        g = Knet.grad(loss, p)
        update!(Knet.value(p), g, p.opt)
    end
    if epoch % print_freq == 0
        print("iter ")
        print(epoch)
        print(" loss: ")
        print(loss)
        println(" ")
    end
    if epoch == Int(floor(epochs * 0.5)) || epoch == Int(floor(epochs * 0.8))
        lr /= 10
        for p in Knet.params(realnvp)
            p.opt = Adam(;lr=lr)
        end
    end
end