# Load packages

In [3]:
using JudiLing
using CSV # read csv files into dataframes
using DataFrames # parse data into dataframes
using Plots, StatsPlots # to look at some of the measures

using JudiLingMeasures

┌ Info: Precompiling JudiLingMeasures [c131b360-78aa-49ef-85bf-52f23aef648f]
└ @ Base loading.jl:1278


# Setup
Use standard latin example, explanations are [here](https://github.com/MegamindHenry/JudiLing.jl).

In [4]:
# Comment this in if you haven't downloaded the latin datasets already
#download("https://osf.io/2ejfu/download", joinpath(@__DIR__, "latin_train.csv"))
#download("https://osf.io/bm7y6/download", joinpath(@__DIR__,"latin_val.csv"))

latin_train =
    DataFrame(CSV.File(joinpath(@__DIR__, "latin_train.csv")))
latin_val =
    DataFrame(CSV.File(joinpath(@__DIR__, "latin_val.csv")))

cue_obj_train, cue_obj_val = JudiLing.make_cue_matrix(
    latin_train,
    latin_val,
    grams = 3,
    target_col = :Word,
    tokenized = false,
    keep_sep = false
)

n_features = size(cue_obj_train.C, 2)
S_train, S_val = JudiLing.make_S_matrix(
    latin_train,
    latin_val,
    ["Lexeme"],
    ["Person", "Number", "Tense", "Voice", "Mood"],
    ncol = n_features
)

G_train = JudiLing.make_transform_matrix(S_train, cue_obj_train.C)
F_train = JudiLing.make_transform_matrix(cue_obj_train.C, S_train)

Chat_train = S_train * G_train
Chat_val = S_val * G_train
Shat_train = cue_obj_train.C * F_train
Shat_val = cue_obj_val.C * F_train

A = cue_obj_train.A
max_t = JudiLing.cal_max_timestep(latin_train, latin_val, :Word)

res_learn_train, gpi_learn_train, rpi_learn_train = JudiLingMeasures.learn_paths_rpi(
    latin_train,
    latin_train,
    cue_obj_train.C,
    S_train,
    F_train,
    Chat_train,
    A,
    cue_obj_train.i2f,
    cue_obj_train.f2i, # api changed in 0.3.1
    gold_ind = cue_obj_train.gold_ind,
    Shat_val = Shat_train,
    check_gold_path = true,
    max_t = max_t,
    max_can = 10,
    grams = 3,
    threshold = 0.05,
    tokenized = false,
    sep_token = "_",
    keep_sep = false,
    target_col = :Word,
    issparse = :dense,
    verbose = true,
)

res_learn_val, gpi_learn_val, rpi_learn_val = JudiLingMeasures.learn_paths_rpi(
    latin_train,
    latin_val,
    cue_obj_train.C,
    S_val,
    F_train,
    Chat_val,
    A,
    cue_obj_train.i2f,
    cue_obj_train.f2i, # api changed in 0.3.1
    gold_ind = cue_obj_val.gold_ind,
    Shat_val = Shat_val,
    check_gold_path = true,
    max_t = max_t,
    max_can = 10,
    grams = 3,
    threshold = 0.05,
    is_tolerant = true,
    tolerance = -0.1,
    max_tolerance = 2,
    tokenized = false,
    sep_token = "-",
    keep_sep = false,
    target_col = :Word,
    issparse = :dense,
    verbose = true,
)


patched function usedMaking fac C
Timestep 1
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.03125
Finding paths...
Timestep 2
average 1.0 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.03515625
Finding paths...
Timestep 3
average 1.1205357142857142 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.04296875
Finding paths...
Timestep 4
average 1.3422619047619047 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.16015625
Finding paths...
Timestep 5
average 2.8154761904761907 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.3671875
Finding paths...
Timestep 6
average 4.153273809523809 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense m

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


patched function usedMaking fac C
Timestep 1
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.03125
Finding paths...
Timestep 2
average 8.0 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.03515625
Finding paths...
Timestep 3
average 8.552238805970148 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.04296875
Finding paths...
Timestep 4
average 4.365671641791045 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.16015625
Finding paths...
Timestep 5
average 10.664179104477611 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense matrix format
Calculating Ythat...
Sparsity: 0.3671875
Finding paths...
Timestep 6
average 20.611940298507463 of paths currently
Calculating Yt...
Calculating Mt...
Returning a dense ma

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


(Array{JudiLing.Result_Path_Info_Struct,1}[[JudiLing.Result_Path_Info_Struct([56, 57, 58, 60, 63, 195, 190, 180, 120, 121], 2, 0.9950902722131681), JudiLing.Result_Path_Info_Struct([56, 57, 58, 60, 52, 175, 120, 121], 2, 0.9366201815440792), JudiLing.Result_Path_Info_Struct([56, 57, 58, 60, 140, 174, 108, 73], 1, 0.9337275106226407), JudiLing.Result_Path_Info_Struct([56, 57, 58, 60, 63, 195, 190, 73], 1, 0.8867351498316011), JudiLing.Result_Path_Info_Struct([56, 57, 58, 60, 63, 194, 208, 171], 2, 0.8553741311940346), JudiLing.Result_Path_Info_Struct([56, 57, 58, 60, 63, 196, 193, 80, 128, 120, 121], 0, 0.8455882804138655), JudiLing.Result_Path_Info_Struct([56, 57, 149, 151, 247, 249, 238, 16, 119, 120, 121], 2, 0.844260111111015), JudiLing.Result_Path_Info_Struct([56, 57, 58, 60, 140, 174, 108, 180, 120, 121], 2, 0.836929970675498), JudiLing.Result_Path_Info_Struct([56, 57, 58, 60, 140, 174, 109, 244], 2, 0.7979769046622188), JudiLing.Result_Path_Info_Struct([56, 57, 58, 61, 49], 2, 0.

# General help with measures

Each measure comes with a documentation, which you can call via

In [4]:
?JudiLingMeasures.L1Norm

```
L1Norm(Shat::Union{JudiLing.SparseMatrixCSC, Matrix})
```

Compute the L1 Norm of Shat.

# Examples

```jldoctest
julia> Shat = [[1 2 3]; [-1 -2 -3]; [1 2 3]]
julia> L1Norm(Shat)
3-element Vector{Int64}:
 6
 6
 6
```


# All available measures
In the following, we implement a function which computes all available measures for the validation data of the latin example above.

In [51]:
function compute_all_measures(data_val, Chat_val, S_val, Shat_val, 
                                    res_learn, cue_obj_train, cue_obj_val, rpi_learn, gpi_learn)
    # MAKE PREPARATIONS
    
    # generate additional objects for the measures such as
    # - results: copy of data_val for storing the measures in 
    # - cor_s: the correlation matrix between Shat and S
    # - df: DataFrame of res_learn, the output of learn_paths
    # - pred_df: DataFrame with path supports for the predicted forms produced by learn_paths
    results, cor_s, df, pred_df = JudiLingMeasures.make_measure_preparations(data_val, S_val, Shat_val,
                                    res_learn, cue_obj_train, cue_obj_val, rpi_learn)
    
    
    # CALCULATE MEASURES
    
    # vector length/activation/uncertainty
    results[!,"L1Norm"] = JudiLingMeasures.L1Norm(Shat_val)
    results[!,"L2Norm"] = JudiLingMeasures.L2Norm(Shat_val)
    
    # semantic neighbourhood
    results[!,"Density"] = JudiLingMeasures.density(cor_s)
    results[!,"ALC"] = JudiLingMeasures.ALC(cor_s)
    results[!,"EDNN"] = JudiLingMeasures.EDNN(Shat_val, S_val)
    results[!,"NNC"] = JudiLingMeasures.NNC(cor_s)
    
    # comprehension accuracy
    results[!,"TargetCorrelation"] = JudiLingMeasures.target_correlation(cor_s)
    results[!,"rank"] = JudiLingMeasures.rank(cor_s)
    results[!,"recognition"] = JudiLingMeasures.recognition(data_val)
    
    # production accuracy/support/uncertainty for the predicted form
    results[!,"SCPP"] = JudiLingMeasures.SCPP(df, results)
    results[!,"PathSum"] = JudiLingMeasures.path_sum(pred_df)
    results[!,"TargetPathSum"] = JudiLingMeasures.target_path_sum(gpi_learn)
    results[!,"PathSumChat"] = JudiLingMeasures.path_sum_chat(res_learn, Chat_val)
    results[!,"C-Precision"] = JudiLingMeasures.c_precision(Chat_val, cue_obj_val.C)
    results[!,"L1Chat"] = JudiLingMeasures.L1Norm(Chat_val)
    results[!,"SemanticSupportForForm"] = JudiLingMeasures.semantic_support_for_form(cue_obj_val, Chat_val)
    
    # support for the predicted path, focusing on the path transitions and components of the path
    results[!,"WithinPathEntropies"] = JudiLingMeasures.within_path_entropies(pred_df)
    results[!,"Support"] = JudiLingMeasures.last_support(cue_obj_val, Chat_val)
    results[!,"MeanWordSupport"] = JudiLingMeasures.mean_word_support(res_learn, pred_df)
    results[!,"MeanWordSupportChat"] = JudiLingMeasures.mean_word_support_chat(res_learn, Chat_val)
    results[!,"lwlr"] = JudiLingMeasures.lwlr(res_learn, pred_df)
    results[!,"lwlrChat"] = JudiLingMeasures.lwlr_chat(res_learn, Chat_val)
    
    # support for competing forms
    results[!,"PathCounts"] = JudiLingMeasures.path_counts(df)
    results[!,"ALDC"] = JudiLingMeasures.ALDC(df)
    results[!,"PathEntropiesSemanticSupport"] = JudiLingMeasures.path_entropies_semantic_support(df)
    results[!,"PathEntropiesChat"] = JudiLingMeasures.path_entropies_chat(res_learn, Chat_val)
    
    
    results
end

compute_all_measures (generic function with 1 method)

Now we can call this function.

In [52]:
dat_all_measures = compute_all_measures(latin_val, # the data of interest
                                        Chat_val, # the Chat of the data of interest
                                        S_val, # the S matrix of the data of interest
                                        Shat_val, # the Shat matrix of the data of interest
                                        res_learn_val, # the output of learn_paths for the data of interest
                                        cue_obj_train, # the cue_obj of the training data
                                        cue_obj_val, # the cue_obj of the data of interest
                                        rpi_learn_val, # the rpi_learn object of the data of interest
                                        gpi_learn_val); # the gpi_learn object of the data of interest

Recognition not implemented


The resulting dataframe includes all the measures:

In [53]:
dat_all_measures[1:10, 19:end]

Unnamed: 0_level_0,PathSum,TargetPathSum,PathSumChat,C-Precision,L1Chat,SemanticSupportForForm
Unnamed: 0_level_1,Float64,Float64,Any,Float64,Float64,Any
1,3.7036,3.7036,4.65773,0.565497,23.7994,4.65773
2,4.59556,4.94948,4.66293,0.559771,24.4365,4.98572
3,3.96154,3.96154,4.94808,0.587485,22.4387,4.94808
4,4.18955,4.18955,4.85837,0.510669,25.6224,4.85837
5,4.19961,4.19961,5.32665,0.633173,23.6295,5.32665
6,4.77745,4.77745,5.13405,0.558818,25.8623,5.13405
7,3.81647,3.81647,4.85789,0.558479,24.835,4.85789
8,4.58405,4.46162,4.61507,0.626944,25.8132,5.4896
9,4.24587,3.62604,4.79271,0.547825,23.0664,3.93507
10,3.79675,3.79675,4.17812,0.531636,24.9963,4.17812


Some visualisation of the resulting measures:

In [None]:
plot(dat_all_measures.MeanWordSupport, 
     dat_all_measures.PathEntropiesChat, seriestype=:scatter, label=false,
     xlab="MeanWordSupport", ylab="PathEntropiesChat")

In [None]:
plot(dat_all_measures.MeanWordSupport, 
     dat_all_measures.MeanWordSupportChat, seriestype=:scatter, label=false,
     xlab="MeanWordSupport", ylab="MeanWordSupportChat")

In [None]:
plot(dat_all_measures.SemanticVectorLength, 
     dat_all_measures.SemanticDensity, seriestype=:scatter, label=false,
     xlab="SemanticVectorLength", ylab="SemanticDensity")