In [2]:
import Statistics

In [3]:
# Generate data
features = rand(100,4)
target = randn(100)
target .= target .>= 0 
data = [features  target]

100×5 Matrix{Float64}:
 0.684477   0.108546   0.0236157  0.415899   1.0
 0.213789   0.691746   0.409566   0.141439   1.0
 0.556799   0.0159538  0.686521   0.0218984  1.0
 0.625068   0.771359   0.734991   0.18648    1.0
 0.617335   0.121375   0.540097   0.36108    0.0
 0.327347   0.470746   0.800916   0.727161   0.0
 0.884767   0.388244   0.8836     0.298815   0.0
 0.531147   0.134145   0.616349   0.363749   0.0
 0.905983   0.306906   0.286375   0.0801698  1.0
 0.635022   0.264015   0.0682579  0.742054   1.0
 ⋮                                           
 0.0421674  0.190519   0.157301   0.750437   0.0
 0.854066   0.04333    0.0118001  0.674143   0.0
 0.595349   0.204488   0.60241    0.809816   0.0
 0.335507   0.561306   0.550985   0.327218   0.0
 0.955865   0.898329   0.519915   0.386675   0.0
 0.510536   0.0259978  0.637459   0.671834   0.0
 0.898151   0.83497    0.363083   0.112629   1.0
 0.463135   0.3657     0.273836   0.49121    0.0
 0.619466   0.240415   0.974157   0.968484   0.0

In [4]:
size(features)

(100, 4)

In [5]:
function seprate_classes(x,y) # Separate the dataset into a subset of data for each class
    seprated_classes = Dict()
    for i in 1:size(x,1)
        feature = x[i,:]
        class_name = y[i]
        if class_name ∉ keys(seprated_classes) 
            seprated_classes[class_name] = []
        end
        append!(seprated_classes[class_name],feature)
    end
    return seprated_classes
end

seprate_classes (generic function with 1 method)

In [6]:
classes = seprate_classes(features,target)

Dict{Any, Any} with 2 entries:
  0.0 => Any[0.617335, 0.121375, 0.540097, 0.36108, 0.327347, 0.470746, 0.80091…
  1.0 => Any[0.684477, 0.108546, 0.0236157, 0.415899, 0.213789, 0.691746, 0.409…

In [131]:
function std_mean(x) # Calculates standard deviation and mean of features.
    stat = Dict("std"=>[],"mean"=>[])
    for feature in eachcol(x)
        append!(stat["std"],Statistics.std(feature))
        append!(stat["mean"],Statistics.mean(feature))
    end
    stdmean = []
    for key in keys(stat)
        for val in stat[key]
            append!(stdmean,key=>val)
        end
    end
    return stdmean
end

std_mean (generic function with 1 method)

In [132]:
stdmean = std_mean(features)

16-element Vector{Any}:
  "mean"
 0.5062984421560289
  "mean"
 0.51117294277009
  "mean"
 0.4897895220153616
  "mean"
 0.5226172595964266
  "std"
 0.2932996732138587
  "std"
 0.30095669921659496
  "std"
 0.27897111638551175
  "std"
 0.2889981035259152

In [133]:
stdmean |> typeof

Vector{Any}[90m (alias for [39m[90mArray{Any, 1}[39m[90m)[39m

In [135]:
length(stdmean)

16

In [136]:
function distribution(x,std,mean) # Gaussian Distribution Function
    exponent = exp.(-((x .- mean).^2 ./ (2 .* std .^ 2)))
    return exponent ./ (sqrt(2*π)*std)
end

distribution (generic function with 1 method)

In [138]:
function fit(x,y)
    classes = seprate_classes(x,y)
    class_summary = Dict()
    for item in classes
        class_name , feature_val = item.first , item.second
        class_summary[class_name] = Dict("prior_proba"=>length(feature_val)/size(x,1),
                                        "summary"=>[i for i in std_mean(feature_val)])
    end
    return class_summary        
end

fit (generic function with 1 method)

In [139]:
class_summary = fit(features,target)

Dict{Any, Any} with 2 entries:
  0.0 => Dict{String, Any}("summary"=>Any["mean", 0.496085, "std", 0.29588], "p…
  1.0 => Dict{String, Any}("summary"=>Any["mean", 0.518854, "std", 0.283898], "…

In [140]:
function predict(x,class_summary)
    MAPs = []
    for row in eachrow(x)
        joint_proba = Dict()
        for item in class_summary
            class_name , features = item.first,item.second
            total_features = length(features["summary"])
            likelihood =1

            for idx in 1:total_features
                feature = row[idx]
                mean_ = features["summary"][idx]["mean"]
                std_ = features["summary"][idx]["std"]
                normal_proba = distribution(feature,std_,mean_)
                likelihood *= normal_proba
            end
            prior_proba = features["prior_proba"]
            joint_proba[class_name] = prior_proba * likelihood
        end
        MAP = max((x,y)->joint_proba[x]>joint_proba[y] ? x : y,keys(joint_proba)) # ?
        append!(MAPs,MAP)
    end
    return MAPs
end

predict (generic function with 1 method)

In [141]:
predict(features,class_summary)

MethodError: MethodError: no method matching getindex(::String, ::String)
Closest candidates are:
  getindex(::AbstractString, !Matched::Colon) at strings/basic.jl:189
  getindex(::String, !Matched::Int64) at strings/string.jl:226
  getindex(::AbstractString, !Matched::Integer) at strings/basic.jl:184
  ...

In [142]:
class_summary[0.0]["summary"]

4-element Vector{Any}:
  "mean"
 0.49608464989372103
  "std"
 0.2958795713470843

In [109]:
a = Dict("qqqq"=>[1,2,3,4,],"wwww"=>[6,7,8,9])

Dict{String, Vector{Int64}} with 2 entries:
  "wwww" => [6, 7, 8, 9]
  "qqqq" => [1, 2, 3, 4]

In [105]:
[item.first=>item.second[i] for (i,item) in enumerate(a)]

2-element Vector{Pair{String, Int64}}:
 "wwww" => 6
 "qqqq" => 2

In [119]:
for key in keys(a)
    for (i,item) in enumerate(a[key])
        println(key=>item)
    end
end

"wwww" => 6
"wwww" => 7
"wwww" => 8
"wwww" => 9
"qqqq" => 1
"qqqq" => 2
"qqqq" => 3
"qqqq" => 4


In [124]:
[i=>j for i in 1:5,j in 1:5]

5×5 Matrix{Pair{Int64, Int64}}:
 1=>1  1=>2  1=>3  1=>4  1=>5
 2=>1  2=>2  2=>3  2=>4  2=>5
 3=>1  3=>2  3=>3  3=>4  3=>5
 4=>1  4=>2  4=>3  4=>4  4=>5
 5=>1  5=>2  5=>3  5=>4  5=>5