In [30]:
import Statistics

In [31]:
# Generate data
features = rand(100,4)
target = randn(100)
target .= target .>= 0 
data = [features  target]

100×5 Matrix{Float64}:
 0.520323   0.783415  0.765647   0.366589   1.0
 0.137864   0.852474  0.907693   0.790964   1.0
 0.304566   0.417679  0.660915   0.253835   1.0
 0.81057    0.14587   0.769866   0.563403   1.0
 0.857055   0.677896  0.74095    0.255435   0.0
 0.339275   0.600864  0.0254507  0.554227   1.0
 0.448123   0.66026   0.144217   0.925327   0.0
 0.442736   0.312229  0.945634   0.910304   1.0
 0.271838   0.69699   0.968364   0.0680698  1.0
 0.483975   0.481135  0.220821   0.0126298  1.0
 ⋮                                          
 0.21946    0.963494  0.813374   0.0479463  0.0
 0.269036   0.553666  0.0737573  0.238573   1.0
 0.742099   0.674566  0.201889   0.339827   1.0
 0.0341961  0.648409  0.0428404  0.623081   1.0
 0.576743   0.819684  0.0988861  0.89939    1.0
 0.828739   0.307097  0.836367   0.134796   1.0
 0.708958   0.838908  0.312048   0.984244   1.0
 0.549265   0.437865  0.884353   0.0994664  1.0
 0.86317    0.409103  0.0963636  0.412207   1.0

In [32]:
size(features)

(100, 4)

In [51]:
features[1,:,:]

4×1 Matrix{Float64}:
 0.5203225808882386
 0.7834150105647646
 0.7656472446859207
 0.36658895966866933

In [58]:
function seprate_classes(x,y) # Separate the dataset into a subset of data for each class
    seprated_classes = Dict()
    for i in 1:size(x,1)
        feature = x[i,:,:]' # size(x[i,:])->(4,)  -  size(x[i,:,:])->(4,1)  - size(x[i,:,:])->(1,4)
        class_name = y[i]
        if class_name ∉ keys(seprated_classes) 
            seprated_classes[class_name] = Array{Float64}(undef,0,4) # empty array : size -> 0,4
        end
        seprated_classes[class_name] = [seprated_classes[class_name];feature];
    end
    
    return seprated_classes
end

seprate_classes (generic function with 1 method)

In [59]:
classes = seprate_classes(features,target)
classes[1]

55×4 Matrix{Float64}:
 0.520323   0.783415  0.765647   0.366589
 0.137864   0.852474  0.907693   0.790964
 0.304566   0.417679  0.660915   0.253835
 0.81057    0.14587   0.769866   0.563403
 0.339275   0.600864  0.0254507  0.554227
 0.442736   0.312229  0.945634   0.910304
 0.271838   0.69699   0.968364   0.0680698
 0.483975   0.481135  0.220821   0.0126298
 0.667458   0.822965  0.919274   0.0410578
 0.261841   0.827234  0.709329   0.82555
 ⋮                               
 0.159294   0.377098  0.333341   0.471165
 0.269036   0.553666  0.0737573  0.238573
 0.742099   0.674566  0.201889   0.339827
 0.0341961  0.648409  0.0428404  0.623081
 0.576743   0.819684  0.0988861  0.89939
 0.828739   0.307097  0.836367   0.134796
 0.708958   0.838908  0.312048   0.984244
 0.549265   0.437865  0.884353   0.0994664
 0.86317    0.409103  0.0963636  0.412207

In [60]:
function std_mean(x) # Calculates standard deviation and mean of features.
    std = []
    mean = []
    for feature in eachcol(x)
        append!(std,Statistics.std(feature))
        append!(mean,Statistics.mean(feature))
    end
    return std , mean
end

std_mean (generic function with 1 method)

In [61]:
std,mean = std_mean(features)

(Any[0.2719639857228294, 0.2863302820857365, 0.3102293680446004, 0.2913150791739614], Any[0.511576843531682, 0.523640073547075, 0.47407562294629785, 0.46182071559619087])

In [62]:
features |> size

(100, 4)

In [63]:
[println(i) for i in std_mean(features)]

Any[0.2719639857228294, 0.2863302820857365, 0.3102293680446004, 0.2913150791739614]
Any[0.511576843531682, 0.523640073547075, 0.47407562294629785, 0.46182071559619087]


2-element Vector{Nothing}:
 nothing
 nothing

In [64]:
function distribution(x,std,mean) # Gaussian Distribution Function
    exponent = exp.(-((x .- mean).^2 ./ (2 .* std .^ 2)))
    return exponent ./ (sqrt(2*π)*std)
end

distribution (generic function with 1 method)

In [65]:
function fit(x,y)
    classes = seprate_classes(x,y)
    class_summary = Dict()
    for item in classes
        @show class_name , feature_val = item.first , item.second
        std,mean = std_mean(feature_val);
        summary = Dict("mean"=>mean,"std"=>std);
        class_summary[class_name] = Dict("prior_proba"=>length(feature_val)/size(x,1),
                                        "summary"=>summary)
    end
    return class_summary        
end

fit (generic function with 1 method)

In [66]:
class_summary = fit(features,target)

(class_name, feature_val) = (item.first, item.second) = (0.0, [0.8570551287801391 0.6778964677480335 0.7409498039601308 0.2554346593782236; 0.44812286702384563 0.6602602055385257 0.14421656397505445 0.9253267438591699; 0.1519267236441143 0.3850753402855487 0.5254347530965658 0.2532498956409872; 0.17418839479460224 0.4681996056304949 0.2774808115779741 0.8664656036357967; 0.9314981127588142 0.32617658295140384 0.6925370510473629 0.30001870295381095; 0.35297995183585507 0.39162793228740655 0.5123108530480528 0.23694705066110233; 0.9769381879762438 0.7790955165556751 0.8748158007760631 0.1418877840352608; 0.402296516080468 0.0603292314428977 0.08902710008791925 0.6020785102311731; 0.6463768321969603 0.5746099910123464 0.8783518518230125 0.11187624703301724; 0.7627648242422618 0.5727180108135167 0.7286957421659376 0.4341943840267919; 0.7995829248750348 0.9247999124853122 0.23301383198161385 0.13726048797083756; 0.30384002340326155 0.18866429073061453 0.3345878187401131 0.46619809450678074;

Dict{Any, Any} with 2 entries:
  0.0 => Dict{String, Any}("summary"=>Dict{String, Vector{Any}}("mean"=>[0.5148…
  1.0 => Dict{String, Any}("summary"=>Dict{String, Vector{Any}}("mean"=>[0.5089…

In [67]:
class_summary[1]["summary"]

Dict{String, Vector{Any}} with 2 entries:
  "mean" => [0.508937, 0.560569, 0.486491, 0.481739]
  "std"  => [0.28134, 0.276731, 0.338989, 0.285671]

In [68]:
classes = seprate_classes(features,target)
std_mean(classes[0])
# [i for i in std_mean(class)]

(Any[0.26317113295103395, 0.29443790134713343, 0.27401937849520475, 0.29947892402177906], Any[0.5148032215504668, 0.4785051350370994, 0.4589009226092219, 0.43747675646975726])

In [69]:
function predict(x,class_summary)
    MAPs = []
    for row in eachrow(x)
        joint_proba = Dict()
        for item in class_summary
            class_name , features = item.first,item.second
            total_features = length(features["summary"])
            likelihood =1

            for idx in 1:total_features
                feature = row[idx]
                mean_ = features["summary"][idx]["mean"]
                std_ = features["summary"][idx]["std"]
                normal_proba = distribution(feature,std_,mean_)
                likelihood *= normal_proba
            end
            prior_proba = features["prior_proba"]
            joint_proba[class_name] = prior_proba * likelihood
        end
        MAP = max((x,y)->joint_proba[x]>joint_proba[y] ? x : y,keys(joint_proba)) # ?
        append!(MAPs,MAP)
    end
    return MAPs
end

predict (generic function with 1 method)

In [70]:
maps = predict(features,class_summary)

KeyError: KeyError: key 1 not found

In [71]:
maps = []
for row in eachrow(features)
    for item in class_summary
        class_name , features_val = item.first,item.second
        # println(class_name)
        println(features_val)
        total_features = length(features_val["summary"])
        likelihood =1
        
        println("doroste")
        for idx in 1:total_features
            @show feature = row[idx]
            mean_ = features_val["summary"][idx]["mean"]
            std_ = features_val["summary"][idx]["std"]
            normal_proba = distribution(feature,std_,mean_)
            likelihood *= normal_proba
        end
        prior_proba = features_val["prior_proba"]
        joint_proba[class_name] = prior_proba * likelihood

        println("==============")
    end
end
    

Dict{String, Any}("summary" => Dict{String, Vector{Any}}("mean" => [0.5148032215504668, 0.4785051350370994, 0.4589009226092219, 0.43747675646975726], "std" => [0.26317113295103395, 0.29443790134713343, 0.27401937849520475, 0.29947892402177906]), "prior_proba" => 1.8)
doroste
feature = row[idx] = 0.5203225808882386


KeyError: KeyError: key 1 not found

In [72]:
class_summary[1]["summary"]["mean"]

4-element Vector{Any}:
 0.5089370796981307
 0.5605686596006915
 0.48649128685845094
 0.48173850033600013

In [21]:
[item.first=>item.second[i] for (i,item) in enumerate(a)]

UndefVarError: UndefVarError: a not defined

In [22]:
for key in keys(a)
    for (i,item) in enumerate(a[key])
        println(key=>item)
    end
end

UndefVarError: UndefVarError: a not defined

In [23]:
[i=>j for i in 1:5,j in 1:5]

5×5 Matrix{Pair{Int64, Int64}}:
 1=>1  1=>2  1=>3  1=>4  1=>5
 2=>1  2=>2  2=>3  2=>4  2=>5
 3=>1  3=>2  3=>3  3=>4  3=>5
 4=>1  4=>2  4=>3  4=>4  4=>5
 5=>1  5=>2  5=>3  5=>4  5=>5