# Naive Bayes  
[Reference 1](https://en.wikipedia.org/wiki/Naive_Bayes_classifier)  
[Reference 2](https://scikit-learn.org/stable/modules/naive_bayes.html)

Bayes' theorem:
$$P(y|x_1,...,x_n)=\frac{P(y)P(x_1,...,x_n|y)}{P(x_1,...,x_n)}$$

Classification:
$$\hat{y}=\text{arg}\max_yP(y)\prod^n_{i=1}P(x_i|y)$$

Guassian Likelihood:  
$$P(x_i|y)=\frac{1}{\sqrt{2\pi\sigma^2_{x_i,y}}}\exp\Bigg(-\frac{(x_i-\mu_{x_i,y})^2}{2\sigma^2_{x_i,y}}\Bigg)$$  
where $\sigma_y$ and $\mu_y$ are estimated with maximum likelihood

In [1]:
include("../tools.jl")
import .JuTools

In [2]:
X_data, Y_data = JuTools.data_generate_cluster_2d(pos1=(30.0, 80.0), pos2=(80.0, 30.0),
    radius1=5.0, radius2=10.0, random_scale=8.0, data_size=1000)
println(size(X_data))
println(size(Y_data))

(1000, 2)
(1000,)


In [3]:
mutable struct NaiveBayes
    n_features::Integer
    pY::Dict{Number,AbstractFloat}
    mean::Dict{Number,Array{AbstractFloat}}
    var::Dict{Number,Array{AbstractFloat}}
end

In [4]:
function train(X_data::Array{T} where T<:Number, Y_data::Array{K} where K<:Number)::NaiveBayes
    @assert ndims(X_data) == ndims(Y_data) + 1
    @assert size(X_data)[1] == size(Y_data)[1]
    n_features = size(X_data)[2]
    # compute probability of Y
    pY = Dict{Number,AbstractFloat}()
    for Y_val in Y_data
        if haskey(pY, Y_val)
            pY[Y_val] += 1.0
        else
            pY[Y_val] = 1.0
        end
    end
    for Y_val in keys(pY)
        pY[Y_val] /= size(Y_data)[1]
    end
    # compute Xi mean and var for each Y
    mean = Dict{Number,Array{AbstractFloat}}()
    var = Dict{Number,Array{AbstractFloat}}()
    for Y_val in keys(pY)
        X_part = X_data[Y_data .== Y_val, :]
        mean[Y_val] = vec(sum(X_part, dims=1) ./ size(X_part)[1])
        var[Y_val] = vec(sum((X_part .- reshape(mean[Y_val], 1, :)).^2, dims=1) ./ size(X_part)[1])
    end
    return NaiveBayes(n_features, pY, mean, var)
end

train (generic function with 1 method)

In [5]:
function guassian(X_vec::Array, mean::Array, var::Array)::AbstractFloat
    @assert ndims(X_vec) == ndims(mean) == ndims(var) == 1
    @assert length(X_vec) == length(mean) == length(var)
    left = 1.0 ./ sqrt.((2.0 * pi) .* var)
    right = exp.(-(X_vec .- mean).^2 ./ (2.0 .* var))
    p = left .* right
    return prod(p)
end

guassian (generic function with 1 method)

In [6]:
function predict(X_data::Array{T} where T<:Number, data::NaiveBayes)::Array
    if ndims(X_data) == 1
        X_data = reshape(X_data, 1, :)
    end
    @assert ndims(X_data) == 2
    @assert size(X_data)[2] == data.n_features
    prediction = Array{Number}(undef, size(X_data)[1])
    for i in 1:size(X_data)[1]
        X_vec = X_data[i, :]
        Y_dict = Dict{Number,AbstractFloat}()
        for Y_val in keys(data.pY)
            Y_dict[Y_val] = data.pY[Y_val] * guassian(X_vec, data.mean[Y_val], data.var[Y_val])
        end
        prediction[i] = sort(collect(Y_dict), by=m->m[2], rev=true)[1][1]
    end
    return prediction
end

predict (generic function with 1 method)

In [7]:
trained = train(X_data, Y_data)

NaiveBayes(2, Dict{Number,AbstractFloat}(0.0 => 0.5,1.0 => 0.5), Dict{Number,Array{AbstractFloat,N} where N}(0.0 => [79.84792331838256, 29.869030821103415],1.0 => [30.14427317034883, 80.01324929089203]), Dict{Number,Array{AbstractFloat,N} where N}(0.0 => [96.14132273795053, 95.86748826325143],1.0 => [72.82706756596316, 72.45081287824333]))

In [8]:
prediction = predict(X_data, trained)

1000-element Array{Number,1}:
 1.0
 0.0
 0.0
 1.0
 1.0
 1.0
 0.0
 1.0
 1.0
 0.0
 1.0
 1.0
 0.0
 ⋮
 0.0
 0.0
 1.0
 1.0
 0.0
 1.0
 1.0
 1.0
 1.0
 0.0
 1.0
 0.0

In [9]:
JuTools.compute_accuracy(prediction, Y_data)

1.0