In [5]:
import Statistics

In [6]:
# Generate data
features = rand(100,4)
target = randn(100)
target .= target .>= 0 
data = [features  target]

100×5 Matrix{Float64}:
 0.504244   0.121357  0.335059   0.833726   0.0
 0.857522   0.195658  0.290936   0.667798   1.0
 0.572218   0.600397  0.742903   0.0253299  1.0
 0.79046    0.505987  0.135975   0.411041   1.0
 0.88033    0.402325  0.498024   0.954556   0.0
 0.941442   0.982863  0.0417341  0.535912   1.0
 0.691419   0.154565  0.0844429  0.622999   1.0
 0.119686   0.137514  0.386212   0.202894   0.0
 0.456357   0.531945  0.257931   0.0123152  0.0
 0.0994617  0.300315  0.722777   0.175727   0.0
 ⋮                                          
 0.117298   0.354608  0.429392   0.796158   0.0
 0.425023   0.586345  0.0959007  0.692643   1.0
 0.907219   0.93532   0.629518   0.406089   0.0
 0.643712   0.628473  0.686888   0.116309   1.0
 0.164039   0.569774  0.266927   0.571539   0.0
 0.844377   0.990152  0.474299   0.900324   1.0
 0.747085   0.935444  0.822042   0.750314   1.0
 0.849962   0.246876  0.216915   0.520319   1.0
 0.473827   0.422905  0.923124   0.194479   0.0

In [7]:
size(features)

(100, 4)

In [8]:
features[1,:,:]

4×1 Matrix{Float64}:
 0.504244278014571
 0.12135702687195071
 0.3350591675376179
 0.8337256260423394

In [9]:
function seprate_classes(x,y) # Separate the dataset into a subset of data for each class
    seprated_classes = Dict()
    for i in 1:size(x,1)
        feature = x[i,:,:]' # size(x[i,:])->(4,)  -  size(x[i,:,:])->(4,1)  - size(x[i,:,:])->(1,4)
        class_name = y[i]
        if class_name ∉ keys(seprated_classes) 
            seprated_classes[class_name] = Array{Float64}(undef,0,4) # empty array : size -> 0,4
        end
        seprated_classes[class_name] = [seprated_classes[class_name];feature];
    end
    
    return seprated_classes
end

seprate_classes (generic function with 1 method)

In [10]:
classes = seprate_classes(features,target)
classes[1]

54×4 Matrix{Float64}:
 0.857522    0.195658  0.290936   0.667798
 0.572218    0.600397  0.742903   0.0253299
 0.79046     0.505987  0.135975   0.411041
 0.941442    0.982863  0.0417341  0.535912
 0.691419    0.154565  0.0844429  0.622999
 0.945969    0.984145  0.292694   0.991409
 0.208384    0.446753  0.164714   0.692378
 0.266104    0.46692   0.822737   0.693993
 0.660704    0.173468  0.884122   0.158046
 0.147885    0.802437  0.316474   0.213371
 ⋮                                
 0.913371    0.86115   0.677597   0.631333
 0.583608    0.825903  0.936387   0.341628
 0.776422    0.121244  0.698616   0.349257
 0.00742869  0.834739  0.882961   0.83086
 0.425023    0.586345  0.0959007  0.692643
 0.643712    0.628473  0.686888   0.116309
 0.844377    0.990152  0.474299   0.900324
 0.747085    0.935444  0.822042   0.750314
 0.849962    0.246876  0.216915   0.520319

In [66]:
function std_mean(x) # Calculates standard deviation and mean of features.
    std = []
    mean = []
    for feature in eachcol(x)
        append!(std,Statistics.std(feature))
        append!(mean,Statistics.mean(feature))
    end
    return std , mean
end

std_mean (generic function with 1 method)

In [67]:
std,mean = std_mean(features)

(Any[0.27704334068848846, 0.28379069393422274, 0.2819322858441184, 0.290403747225323], Any[0.5246479215265838, 0.5332334537589887, 0.4998228941142742, 0.5085780369794282])

In [69]:
mean |> size

(4,)

In [15]:
function distribution(x,std,mean) # Gaussian Distribution Function
    exponent = exp.(-((x .- mean).^2 ./ (2 .* std .^ 2)))
    return exponent ./ (sqrt(2*π)*std)
end

distribution (generic function with 1 method)

In [73]:
function fit(x,y)
    classes = seprate_classes(x,y)
    class_summary = Dict()
    for item in classes
        class_name , feature_val = item.first , item.second
        std,mean = std_mean(feature_val);
        summary = Dict("mean"=>mean,"std"=>std);
        class_summary[class_name] = Dict("prior_proba"=>length(feature_val)/size(x,1),
                                        "summary"=>summary)
    end
    return class_summary        
end

fit (generic function with 1 method)

In [74]:
class_summary = fit(features,target)

(46, 4)
(54, 4)


Dict{Any, Any} with 2 entries:
  0.0 => Dict{String, Any}("summary"=>Dict{String, Vector{Any}}("mean"=>[0.4741…
  1.0 => Dict{String, Any}("summary"=>Dict{String, Vector{Any}}("mean"=>[0.5676…

In [18]:
class_summary[1]["summary"]

Dict{String, Vector{Any}} with 2 entries:
  "mean" => [0.567659, 0.568018, 0.529242, 0.491545]
  "std"  => [0.269737, 0.295958, 0.309129, 0.28641]

In [84]:
function predict(x,class_summary)
    MAPs = []
    for row in eachrow(x)
        joint_proba = Dict()
        for item in class_summary
            class_name , features = item.first,item.second
            total_features = length(features["summary"])
            likelihood =1

            for idx in 1:total_features
                feature = row[idx]
                mean_ = features["summary"]["mean"][idx]
                std_ = features["summary"]["std"][idx]
                normal_proba = distribution(feature,std_,mean_)
                likelihood *= normal_proba
            end
            prior_proba = features["prior_proba"]
            joint_proba[class_name] = prior_proba * likelihood
        end
        MAP = max((x,y)->joint_proba[x]>joint_proba[y] ? x : y,keys(joint_proba)) 
        append!(MAPs,MAP)
    end
    return MAPs
end

predict (generic function with 1 method)

In [85]:
maps = predict(features,class_summary)

doroste!


MethodError: MethodError: no method matching isless(::Base.KeySet{Any, Dict{Any, Any}}, ::var"#23#24"{Dict{Any, Any}})
Closest candidates are:
  isless(::Any, !Matched::Missing) at missing.jl:88
  isless(!Matched::Missing, ::Any) at missing.jl:87

In [107]:
global maps = []
global joint_proba = Dict()
for row in eachrow(features)
    joint_proba = Dict()
    for item in class_summary
        class_name , features_val = item.first,item.second
        # println(class_name)
        total_features = length(features_val["summary"])
        likelihood =1
        
        for idx in 1:total_features
            feature = row[idx]
            mean_ = features_val["summary"]["mean"][idx]
            std_ = features_val["summary"]["std"][idx]
            normal_proba = distribution(feature,std_,mean_)
            likelihood *= normal_proba
        end
        prior_proba = features_val["prior_proba"]
        joint_proba[class_name] = prior_proba * likelihood
    end
    map = reduce((x,y)->joint_proba[x]>joint_proba[y] ? x : y,keys(joint_proba))
    append!(maps,map)
end
    

In [112]:
sum(abs.(maps - target))

43.0

In [97]:
a= Dict("q"=>5,"s"=>12)
b = [1 2 3 12]

1×4 Matrix{Int64}:
 1  2  3  12

In [25]:
for key in keys(a)
    for (i,item) in enumerate(a[key])
        println(key=>item)
    end
end

UndefVarError: UndefVarError: a not defined

In [26]:
[i=>j for i in 1:5,j in 1:5]

5×5 Matrix{Pair{Int64, Int64}}:
 1=>1  1=>2  1=>3  1=>4  1=>5
 2=>1  2=>2  2=>3  2=>4  2=>5
 3=>1  3=>2  3=>3  3=>4  3=>5
 4=>1  4=>2  4=>3  4=>4  4=>5
 5=>1  5=>2  5=>3  5=>4  5=>5