In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import matplotlib.ticker as ticker
import matplotlib.cm

import wcs

In [2]:
LANGS_TO_CHECK = [16, 17, 20, 32, 36]
HOLD_OUT_FRACTIONS = [0.0, 0.2, 0.4, 0.6, 0.8]

In [3]:
full_word_counts = {}
simple_models = {}
threshold = 5
color_dists = wcs.all_pairwise_color_distances()
adjacency_dict = wcs.build_adjacency_dict(color_dists, threshold)
for lang in LANGS_TO_CHECK:
    full_word_counts[lang] = wcs.build_word_count(lang)
    simple_models[lang] = wcs.build_simple_model(full_word_counts[lang])

In [7]:
num_trials = 10
neighbor_weight = 3
burn_in_iterations = 15
num_restarts = 10
num_to_generate = 3
## running evaulation experiment
for lang in LANGS_TO_CHECK:
    print(f"Running Evaluation for language {lang}")
    print(f"{20*'-'}")
    
    for fraction in HOLD_OUT_FRACTIONS:
        print(f"Building MRF model with {fraction}% held out")
        ave_divergences = np.zeros(num_trials)
        for trial in range(num_trials):
            # print(f"\tOn trial {trial+1}")
            held_out_speakers = wcs.build_rand_set_of_speakers(lang, fraction)
            wc, ho = wcs.build_held_out_word_count(lang, held_out_speakers)
            samples = wcs.mrf_sampler(wc, adjacency_dict, neighbor_weight, num_restarts,
                                  burn_in_iterations, num_to_generate)
            mrf_model = wcs.build_mrf_model_from_samples(samples)
            
            divs = wcs.compute_KL_divs(mrf_model, simple_models[lang])
            ave_divergences[trial] = np.mean(divs)
            
        # print(f"{20*'-'}")
        print(f"\nFor lang {lang} and hold out frac {fraction}, average divergences = {ave_divergences}")
        print(f"\tThe mean of these is {np.mean(ave_divergences)} and std dev {np.std(ave_divergences)}")
        print(f"{20*'-'}")

Running Evaluation for language 16
--------------------
Building MRF model with 0.0% held out

For lang 16 and hold out frac 0.0, average divergences = [0.3893012  0.38680586 0.38763276 0.39231714 0.38519376 0.38839466
 0.38674512 0.38392609 0.39023779 0.39049819]
	The mean of these is 0.38810525757645886 and std dev 0.002433756937987399
--------------------
Building MRF model with 0.2% held out

For lang 16 and hold out frac 0.2, average divergences = [0.38878678 0.40055945 0.37957256 0.38765108 0.39747663 0.38636222
 0.3873325  0.3855244  0.38019146 0.3881079 ]
	The mean of these is 0.38815649824536885 and std dev 0.006239433345521656
--------------------
Building MRF model with 0.4% held out

For lang 16 and hold out frac 0.4, average divergences = [0.3848537  0.39255305 0.38127523 0.39189618 0.35137301 0.37793546
 0.38637255 0.39589407 0.39878397 0.39443453]
	The mean of these is 0.385537176270092 and std dev 0.01302256125970056
--------------------
Building MRF model with 0.6% hel