In [28]:
import Statistics

In [20]:
# Generate data
features = rand(100,4)
target = randn(100)
target .= target .>= 0 
data = [features  target]

100×5 Matrix{Float64}:
 0.939838   0.339041   0.572614  0.993944  1.0
 0.155375   0.62961    0.910406  0.84807   1.0
 0.331673   0.824359   0.260982  0.726228  0.0
 0.313382   0.844747   0.315542  0.406222  1.0
 0.0950407  0.863619   0.6495    0.343214  0.0
 0.923825   0.342852   0.317916  0.337804  0.0
 0.244011   0.359283   0.373809  0.726707  1.0
 0.62611    0.364935   0.66541   0.794139  1.0
 0.87288    0.589729   0.710286  0.415384  1.0
 0.727073   0.762336   0.436547  0.642825  0.0
 ⋮                                         
 0.178725   0.951463   0.341189  0.534031  0.0
 0.17472    0.919849   0.499719  0.21921   0.0
 0.927395   0.123      0.239165  0.738991  0.0
 0.221498   0.752771   0.274489  0.566189  1.0
 0.45283    0.242427   0.343484  0.584939  0.0
 0.0173767  0.147683   0.163432  0.596849  1.0
 0.0344194  0.350416   0.155455  0.746123  1.0
 0.615042   0.0162762  0.281998  0.340092  1.0
 0.516135   0.0212464  0.314057  0.778457  0.0

In [63]:
size(features)

(100, 4)

In [78]:
function seprate_classes(x,y) # Separate the dataset into a subset of data for each class
    seprated_classes = Dict()
    for i in 1:size(x,1)
        feature = x[i,:]
        class_name = y[i]
        if class_name ∉ keys(seprated_classes) 
            seprated_classes[class_name] = []
        end
        append!(seprated_classes[class_name],feature)
    end
    return seprated_classes
end

seprate_classes (generic function with 1 method)

In [79]:
classes = seprate_classes(features,target)

Dict{Any, Any} with 2 entries:
  0.0 => Any[0.331673, 0.824359, 0.260982, 0.726228, 0.0950407, 0.863619, 0.649…
  1.0 => Any[0.939838, 0.339041, 0.572614, 0.993944, 0.155375, 0.62961, 0.91040…

In [93]:
function std_mean(x) # Calculates standard deviation and mean of features.
    stat = Dict("std"=>[],"mean"=>[])
    for feature in eachrow(x)
        append!(stat["std"],Statistics.std(feature))
        append!(stat["mean"],Statistics.mean(feature))
    end
    return stat
end

std_mean (generic function with 1 method)

In [94]:
stdmean = std_mean(features)

Dict{String, Vector{Any}} with 2 entries:
  "mean" => [0.711359, 0.635865, 0.535811, 0.469973, 0.487843, 0.480599, 0.4259…
  "std"  => [0.310874, 0.342205, 0.280904, 0.253567, 0.337911, 0.29568, 0.20874…

In [95]:
length(stdmean["mean"])

100

In [106]:
function distribution(x,std,mean) # Gaussian Distribution Function
    exponent = exp.(-((x .- mean).^2 ./ (2 .* std .^ 2)))
    return exponent ./ (sqrt(2*π)*std)
end

distribution (generic function with 1 method)

In [108]:
mean_ , std_ = stdmean["mean"] , stdmean["std"]
distribution(features,std_,mean_)

100×4 Matrix{Float64}:
 0.97956   0.626408  1.16164   0.848987
 0.435029  1.1656    0.845006  0.961883
 1.09062   0.837965  0.880029  1.12868
 1.30018   0.527788  1.30699   1.52437
 0.600733  0.636164  1.05295   1.07728
 0.438687  1.21049   1.15972   1.20072
 1.30717   1.81614   1.85247   0.676904
 2.20942   0.860038  2.1225    1.33316
 1.04291   1.97615   1.95749   1.00602
 2.30786   1.94778   1.01315   2.73285
 ⋮                             
 0.749214  0.480978  1.06626   1.19096
 0.836467  0.460906  1.15368   0.921818
 0.571695  0.629867  0.810913  0.861239
 1.03629   0.78091   1.23306   1.4403
 2.57828   1.46226   2.47904   1.29309
 1.10363   1.49658   1.5249    0.553693
 0.837297  1.27518   1.11086   0.506018
 0.763892  0.781586  1.61113   1.61471
 1.17662   0.601829  1.19445   0.636709

In [116]:
function fit(x,y)
    classes = seprate_classes(x,y)
    class_summary = Dict()
    for item in classes
        class_name , feature_val = item.first , item.second
        class_summary[class_name] = Dict("prior_proba"=>length(feature_val)/size(x,1),
                                        "summary"=>[i for i in std_mean(feature_val)])
    end
    return class_summary        
end

fit (generic function with 1 method)

In [117]:
fit(features,target)

Dict{Any, Any} with 2 entries:
  0.0 => Dict{String, Any}("summary"=>Pair{String, Vector{Any}}["mean"=>[0.3316…
  1.0 => Dict{String, Any}("summary"=>Pair{String, Vector{Any}}["mean"=>[0.9398…

In [132]:
function predict(x,class_summary)
    MAPs = []
    for row in eachrow(x)
        joint_proba = Dict()
        for item in class_summary
            class_name , features = item.first,item.second
            total_features = length(features["summary"])
            likelihood =1

            for idx in 1:total_features
                feature = row[idx]
                mean_ = features["summary"][idx]["mean"]
                std_ = features["summary"][idx]["std"]
                normal_proba = distribution(feature,std_,mean_)
                likelihood *= normal_proba
            end
            prior_proba = features["prior_proba"]
            joint_proba[class_name] = prior_proba * likelihood
        end
        MAP = max((x,y)->joint_proba[x]>joint_proba[y] ? x : y,keys(joint_proba)) # ?
        append!(MAPs,MAP)
    end
    return MAPs
end

predict (generic function with 1 method)