# NLP HW 4: Hidden Markov Model

## Setup


Downloading data

In [None]:
# from https://medium.com/@Keshav31/colab-features-download-and-upload-e1ec537a83df
from urllib.request import urlretrieve
import os
from zipfile import ZipFile

url = 'https://ttic.uchicago.edu/~kgimpel/teaching/31210-s19/data/31210-s19-hw3.zip'
file = '31210-s19-hw3.zip'

if not os.path.isfile(file):
    urlretrieve(url,file)

with ZipFile(file) as zipf:
    zipf.extractall()

!rm -rf data/
!mkdir data/
!mv 31210-s19-hw3/* data/
!rm -rf 31210-s19-hw3.zip 31210-s19-hw3/

Loading training data and doing bigram counts

In [None]:
from collections import Counter
    
word_tag_counts = Counter()
tag_bigram_counts = Counter()
vocab = set()
pos_tags = set()
    
start_token = ('<s>', '<s>')
end_token = ('</s>', '</s>')
    
with open('data/en_ewt.train', 'r') as trainfile:
    prev = start_token
    for row in trainfile.readlines():
        row = row.split()
        if len(row) == 0:
            row = end_token
        elif prev[0] == '</s>': 
            prev = start_token
        if row[0] not in ['<s>', '</s>']:
            vocab.add(row[0])
            pos_tags.add(row[1])
        word_tag_counts[(row[0], row[1])] += 1
        tag_bigram_counts[(prev[1], row[1])] += 1
        prev = row

print('data loaded')
print('vocab size:', str(len(vocab)))
print('num pos tags:', str(len(pos_tags)))

data loaded
vocab size: 19380
num pos tags: 50


Compute probabilities

In [None]:
import numpy as np

log_p_tau = {}
lambda_tau = 0.1

for y_p in list(pos_tags):
    denom = sum([tag_bigram_counts[(y_p, y_pp)] + lambda_tau
                       for y_pp in list(pos_tags) + ['</s>']])
    
    for y in list(pos_tags) + ['</s>']:
        numer = tag_bigram_counts[(y_p, y)]
        log_p_tau[(y, y_p)] = np.log((numer + lambda_tau)/denom)

for y in list(pos_tags):
    denom = sum([tag_bigram_counts[('<s>', y_pp)] + lambda_tau
                        for y_pp in list(pos_tags)])
    numer = tag_bigram_counts[('<s>', y)]
    log_p_tau[(y, '<s>')] = np.log((numer + lambda_tau)/denom)

log_p_eta = {}
lambda_eta = 0.001
for y in list(pos_tags):
    denom = sum([word_tag_counts[(x_p, y)] + lambda_eta
                       for x_p in list(vocab)])
    for x in list(vocab):
        numer = word_tag_counts[(x, y)]
        log_p_eta[(x, y)] = np.log((numer + lambda_eta)/denom)

# make sure that all the zero probs are right
log_p_tau[('</s>', '<s>')] = -float('inf')
for y in list(pos_tags):
    log_p_tau[('<s>', y)] = -float('inf')
    log_p_tau[(y, '</s>')] = -float('inf')

Functions to calculate log-probability and accuracy of tagged sentence

In [None]:
# takes a list of (word, tag) tuples
# should start with <s>
def log_sentence_probability(s):
    total = log_p_tau[('</s>', s[-1][1])]
    for i in range(1, len(s)):
        total += log_p_tau[(s[i][1], s[i-1][1])]
        total += log_p_eta[(s[i][0], s[i][1])] 
    return total

def calc_accuracy_rate(sentences, gold_standards):
    total = 0
    total_correct = 0
    assert(len(sentences) == len(gold_standards))
    for s_num in range(len(sentences)):
        for w_num in range(len(sentences[s_num])):
            total += 1
            if sentences[s_num][w_num][1] == gold_standards[s_num][w_num][1]:
                total_correct += 1
    return total_correct / total

Reusable function to score prediction functions

In [None]:
import time, tqdm

# start_work_func prepares all memoization
#     takes no args, returns nothing
# pred_func does predictions
#     takes a sentence of [(word, gs_tag)]
#     returns predictions [(word, pred_tag)]
def score_prediction_function(start_work_func, pred_func, do_setup=True):
    start_time = time.time()
    if do_setup:
        print('doing setup, building lookup tables...', flush=True)
        start_work_func()
    print('making predictions...', flush=True)
    with open('data/en_ewt.dev', 'r') as trainfile:
        current_sent = [start_token]
        predictions = []
        gold_standards = []
        total_log_prob = 0
        lines = trainfile.readlines()
        for row in tqdm.tqdm(lines):
            row = row.split()
            if len(row) == 0:
                gold_standards.append(current_sent)
                pred = pred_func(current_sent)
                log_prob = log_sentence_probability(pred)
                predictions.append(pred)
                total_log_prob += log_prob
                current_sent = [start_token]
            else:
                current_sent.append(row)

    end_time = time.time()
    elapsed_time = end_time - start_time

    print()
    tagging_accuracy = calc_accuracy_rate(predictions, gold_standards)
    print('tagging accuracy on DEV:', tagging_accuracy)
    print('time required for predictions (s):', elapsed_time)
    print('log-probability of predictions:', total_log_prob, flush=True)

## 1. Gibbs Sampling for HMMs

### a) Gibbs Sampling Derivation

$P(Y_t=y|Y_{-t}=y_{-t}, \textbf{X}=\textbf{x})$

by HMM independence rules and $A\bot B \rightarrow P(A|B)=P(A)$:

$=P(Y_t=y|Y_{t-1}=y_{t-1}, Y_{t+1}=y_{t+1}, X_t=x_t)$

by Bayes' theorem:

$=\frac{P(Y_{t-1}=y_{t-1}, Y_{t+1}=y_{t+1}, X_t=x_t|Y_t=y) P(Y_t=y)}{P(Y_{t-1}=y_{t-1}, Y_{t+1}=y_{t+1}, X_t=x_T)}$

by HMM independence rules, cancelling constant W.R.T. $Y_t$, and rearranging:

$\propto (P(Y_t=y)P(Y_{t-1}=y_{t-1}|Y_t=y))P(X_t=x_t|Y_t=y)P(Y_{t+1}=y_{t+1}|Y_t=y_t)$

one more Bayes' theorem:

$=(P(Y_{t-1}=y_{t-1})P(Y_t=y|Y_{t-1}=y_{t-1}))P(X_t=x_t|Y_t=y)P(Y_{t+1}=y_{t+1}|Y_t=y_t)$

cancel one more constant, and done!

$\propto P(Y_t=y|Y_{t-1}=y_{t-1})P(X_t=x_t|Y_t=y)P(Y_{t+1}=y_{t+1}|Y_t=y_t)$

To normalize with log-probabilities, plug in every possible value of $y$ to the formula above, then `np.logaddexp` them all together. Add the three terms above for a given $y$ then subtract the normalization factor to get the normalized log-probability.

### b) Gibbs Sampling Special Cases

I'm assuming you just want us to get these into a form where we can use the lookup tables for $p_\tau$ and $p_\eta$, so that's all the simplification that I bothered with. Normalization is done the same way as in the last part.

---

$P(Y_1=y | Y_{-1}=y_{-1}, \textbf{X}=\textbf{x})$

Let `<s>` stand in for $y_0$. $P(Y_0=\texttt{<s>})=1$ regardless of any other $\textbf{X}, \textbf{Y}$, so $Y_0 \bot Y_1$, so we can just add the condition of $Y_0$'s value. Plug in to the previous formula.

$\propto P(Y_1=y|Y_0=\texttt{<s>})P(X_1=x_1|Y_1=y)P(Y_2=y_2|Y_1=y_1)$

$\square$

---

$P(Y_T=y | Y_{-T}=y_{-T}, \textbf{X}=\textbf{x})$

Let `</s>` stand in for $y_{t+1}$. $P(Y_{t+1}=\texttt{</s>})=1$ regardless of any other $\textbf{X}, \textbf{Y}$, so $Y_T \bot Y_{T+1}$, so we can just add the condition of $Y_{T+1}$'s value. Plug in to the previous formula.

$\propto P(Y_T=y|Y_{T-1}=y_{T-1})P(X_T=x_T|Y_T=y)P(Y_{T+1}=\texttt{</s>}|Y_T=y_T)$

$\square$

### c) Implement Gibbs sampling

In [None]:
import functools, random

# make sure things stay ordered properly for prob dists
l_pos_tags = list(pos_tags)

# computes log gibbs probability for values of variables above
log_gibbs_prob = lambda yt, ytm, ytp, xt: \
    (log_p_tau[(yt, ytm)] + log_p_eta[(xt, yt)] + log_p_tau[(ytp, yt)])

log_gibbs_probs = lambda *args:\
    [log_gibbs_prob(yt, *args) for yt in l_pos_tags]

# computes log normalization factor to be subtracted from above
log_gibbs_probs_normed = lambda probs:\
    probs - functools.reduce(np.logaddexp, probs)

probs_memo = {}

gibbs_setup = lambda: None

def gibbs_iteration(Y_T, sentence):
    global probs_memo, beta, beta_update
    beta_update()
    Y_T1 = ['<s>']
    for ix in range(1, len(sentence) - 1):
        dist_args = (Y_T[ix-1], Y_T[ix+1], sentence[ix][0])
        if (dist_args, beta) in probs_memo:
            probs = probs_memo[(dist_args, beta)]
        elif dist_args in probs_memo:
            probs = probs_memo[dist_args]
            probs = np.exp(log_gibbs_probs_normed([p*beta for p in probs]))
            #probs_memo[(dist_args, beta)] = probs
        else: 
            probs = log_gibbs_probs(*dist_args)
            probs_memo[dist_args] = probs
            probs = np.exp(log_gibbs_probs_normed([p*beta for p in probs]))
            #probs_memo[(dist_args, beta)] = probs
        
        #Y_T1.append(np.random.choice(l_pos_tags, p=probs))
        Y_T1.append(random.choices(l_pos_tags, weights=probs)[0])
    Y_T1.append('</s>')
    return Y_T1

def gibbs_sampling(sentence):
    # because of how this is called in the test function
    # K can't be an argument, has to be set externally
    # beta_update is a function set globally that has a parameter
    # reset=True means a new sentence, =False means a new iteration
    # beta is set globally
    global K, beta_update
    beta_update(reset=True)
    Y_T = list(np.random.choice(list(pos_tags), len(sentence), replace=True))
    Y_T = [start_token[0]] + Y_T + [end_token[0]]
    sentence = sentence + [end_token]
    for _ in range(K):
        Y_T = gibbs_iteration(Y_T, sentence)
    return list(zip([s[0] for s in sentence], list(Y_T)))[:-1]

### d) Test Gibbs Sampling

In [None]:
def beta_update(reset=False):
    pass

beta = 1.

def test_all_k():
    global K
    for K in [2, 5, 10, 50, 100, 500, 1000]:
        print('----- K = ' + str(K) + ' -----')
        score_prediction_function(gibbs_setup, gibbs_sampling)
        if K < 1000: print()

test_all_k()

----- K = 2 -----
doing setup, building lookup tables...
making predictions...


 43%|████▎     | 11741/27152 [00:04<00:06, 2338.22it/s]


KeyboardInterrupt: ignored

summary of results:

| K    | Runtime (s) | Tag Accuracy | Log-Prob of Preds |
|------|-------------|--------------|-------------------|
| 2    | 9.59        | 83.70%       | -177110           |
| 5    | 11.44       | 86.89%       | -169121           |
| 10   | 13.56       | 87.30%       | -168110           |
| 50   | 43.80       | 87.53%       | -167661           |
| 100  | 79.96       | 87.54%       | -167485           |
| 500  | 374.08      | 87.77%       | -167251           |
| 1000 | 740.01      | 87.55%       | -167289           |

### e) Setting $\beta$

In [None]:
for beta in [0.5, 2., 5.]:
    print('----- beta = ' + str(beta) ' -----')
    test_all_k()

| K    | $\beta$ | Runtime (s) | Tag Accuracy | Log-Prob of Preds |
|------|------|-------------|--------------|-------------------|
| 2    | 0.5     |   9.83   |    70.16%    |   -219167          |
|      |   2.0   |    9.54         |      86.23%        |      -171951             |
|      | 5.0     |     9.51        |      86.86%        |      -170494             |
| 5    |   0.5   |  14.05      |    77.18%    |      -195628     |
|      |    2.0  |       10.72      |        88.19%      |       -166613            |
|      |    5.0  |    10.68         |     88.48%         |       -166334            |
| 10    |   0.5   |   18.21     |    77.75%    |      -193608     |
|      |    2.0  |      13.29       |       88.52%       |     -166022              |
|      |    5.0  |     13.18        |       88.74%       |      -166093             |
| 50   |   0.5   |   57.85     |   78.20%     |        -193383   |
|      |    2.0  |      43.09       |      89.02%        |      -165271             |
|      |    5.0  |      42.30       |      89.22%        |      -165342             |
| 100    |   0.5   |   98.01     |   78.06%     |      -193377     |
|      |    2.0  |       79.18      |        89.01%      |        -164980           |
|      |    5.0  |  78.82           |      89.29%        |       -165162            |
| 500    |   0.5   |    427.28    |    78.32%    |    -192331       |
|      |    2.0  |    369.96         |   89.28%           |      -164813             |
|      |    5.0  |      369.26       |        89.26%      |   -164846                |
| 1000    |   0.5   |   798.24     |   77.90%     |      -193385     |
|      |    2.0  |      733.53       |       89.26%       |     -164669              |
|      |    5.0  |      732.23       |      89.30%        |       -164732            |

I see a trend where the tag accuracy increases sharply when moving from $\beta=0.5$ to $\beta=2.0$, and a small increase from $\beta=2.0$ to $\beta=5.0$. The trend with $K$ is the same as last time, where higher $K$ means better performance, but with diminishing returns as $K$ grows.

Interestingly enough, when $\beta=0.5$, everything runs noticably slower than when $\beta > 1$, but I'm exponentiating in log space, so either way the same amount of multiplication is happening. I think that's going on is that lower $\beta$ means that the sampler is more likely to pick more unlikely tags, so there is more variety of combinations of $(Y_{t-1}, Y_{t+1}, X_t)$ that show up as it runs, so there are a lot more cache misses.

### f) Annealing $\beta$

In [None]:
# annealing schedule from the assignment
def beta_update(reset=False):
    global beta
    if reset: beta = 0.1
    else: beta += 0.1
            
test_all_k()

#### $\beta_0 = 0.1$, $\beta_t = 0.1+\beta_{t-1}$ 

This is a little better than constant $\beta$ for high $K$. The tag accuracies are pretty terrible for low $K$ as expected (because $\beta$ doesn't reach 1 until $K=10$). However, above $K=10$, it gets a whole fraction of a percent advantage. 

Unfortunately, especially for large $K$, this takes a very significant amount longer to compute because there are so many more possibilities to memoize, and so many fewer iterations per beta. Basically the values that *are* memoized are spread more sparsely across the whole space, because there are $K$ options for beta as well. I tried to memoize the intermediate step of the distribution before exponentiating to beta, but this didn't help much, because exponentiation and normalization are still pretty expensive :\(.

Things are speeding up a bit as it runs, but since as it continued all the cache misses became on really rare probabilities, so the speed sort of leveled out. I want more cores but I'm too lazy to set up a Jupyter server on Midway2, SSH tunnel it into my laptop, port forward it, then connect Colaboratory to it... such is life. I am going to go read Zen and the Art of Motorcycle Maintenance, hopefully it will teach me the patience to cope with staring at this glacially slow-moving TQDM bar.

It has now finished running. I just spent 1700 seconds of my life staring at a progress bar for 0.01% improvement. :''(

| K    | Runtime (s) | Tag Accuracy | Log-Prob of Preds |
|------|-------------|--------------|-------------------|
| 2    | 11.24       | 40.50%       | -339231           |
| 5    | 23.56       | 79.19%       | -189529           |
| 10   |   38.56     | 87.54%    | -167990           |
| 50   |   152.74     |  89.44%     |      -164495     |
| 100  |  219.67     |    89.63%    |    -164340       |
| 500  |  473.38*   |   89.77%     |   -164454       |
| 1000 |    1706.84**  |   89.78%     |     -164365      |

\* artifically low, I started this run and let it get about halfway through, then killed it to add a progress bar because I thought it might be hanging, then restarted it- so there were a lot of things that got memoized the first time around that probably drastically sped up the second run

\*\* this column is honestly meaningless at this point, I got so fed up with $K=1000$ that I went absolutely crazy with the cacheing and micro-optimizations but was too tired to re-run the rest.... sorry :/

In [None]:
def test_all_k():
    global K
    for K in [1000]:
        print('----- K = ' + str(K) + ' -----')
        score_prediction_function(gibbs_setup, gibbs_sampling)
        if K < 1000: print()

# my own idea- exponential annealing!
def beta_update(reset=False):
    global beta
    if reset: beta = 0.01
    else: beta *= 1.01
        
test_all_k()

#### $\beta_0 = 0.01$, $\beta_t = 1.01 * \beta_{t-1}$ 

I picked these constants because they were nice factors of 10 where $\beta$ didn't end up larger than the number of atoms in the universe after 1000 iterations :P. $\beta$ ends up around 200, which is only about double the max $\beta$ from the last one, but it has much more spread because it starts so much lower and initially grows so much slower.

The way that we're running this simulation basically is doing stochastic gradient descent in a space of tag states, with a "metric" of transition probabilities. This choice of $\beta$ means the simulation has a longer period at first to get out of local "islands" of high-probability states, then gets more and more "confined" to the highest-probability states it finds as $\beta$ grows quickly.

Unsurprisingly at lower $K$ this does very poorly, because $\beta$ stays very small the whole time, so decisions are made pretty much at random. I could probably tune constants to make it better for certain $K$.

At high $K$, specifically at 1000, this has the 3rd-best performance I've seen in any implementation including MBR, well outperforming constant $\beta$ or linear annealing. This is because high $\beta$ means that the tags are unlikely to change between iterations because the distribution is so imbalanced, so not many iterations are needed at that high $\beta$- the simulation will stay within whatever small island of high-probability states it settled on in the earlier rounds. At lower $\beta$ there's likely to be a lot of movement, which is good, because it means the tag state will be able to jump across islands of local high-probability states, then eventually settle on the global area of high-probability states, and then let a high $\beta$ do the rest of the work to make sure the final stable state is the most probable one in the local area.

| K    | Runtime (s) | Tag Accuracy | Log-Prob of Preds |
|------|-------------|--------------|-------------------|
| 2    |  7.93   |     9.48%  |          -506580|
| 5    |    19.32    |   9.38%     |  -506477       |
| 10   |  32.89      | 9.51%  |      -506464   |
| 50   |   162.79     |     9.58%  |     -505395     |
| 100  |   347.86    |   9.93%     |     -501930      |
| 500  |  1606.49*   |    88.65%    |    -165397      |
| 1000 |    2899.95*  |   90.05%     |    -163752       |

\* ran out of RAM, had to turn off part of the caching, so this is slower than it would have been with more memory :/

## 2. Gibbs Sampling for Minimum Bayes Risk Inference

### a) MBR and MAP

$\hat{\textbf{y}}= \arg\min_\textbf{y} \sum_{\textbf{y'}} P(\textbf{Y}=\textbf{y'} | \textbf{X} = \textbf{x}) \;\text{cost}(\textbf{y}, \textbf{y}')$

$= \arg\min_\textbf{y} \left(P(\textbf{Y}=\textbf{y} | \textbf{X} = \textbf{x}) \;\text{cost}(\textbf{y}, \textbf{y})+ \sum_{\textbf{y'}\neq y} P(\textbf{Y}=\textbf{y'} | \textbf{X} = \textbf{x}) \;\text{cost}(\textbf{y}, \textbf{y}')\right)$

$= \arg\min_\textbf{y} \left(P(\textbf{Y}=\textbf{y} | \textbf{X} = \textbf{x}) \cdot 0+ \sum_{\textbf{y'}\neq y} P(\textbf{Y}=\textbf{y'} | \textbf{X} = \textbf{x}) \cdot 1\right)$

$= \arg\min_\textbf{y} \sum_{\textbf{y'}\neq y} P(\textbf{Y}=\textbf{y'} | \textbf{X} = \textbf{x})$

$= \arg\min_\textbf{y} P(\textbf{Y}\neq \textbf{y}|\textbf{X}=\textbf{x})$

$= \arg\min_\textbf{y} 1- P(\textbf{Y}=\textbf{y}|\textbf{X}=\textbf{x})$

$= \arg\max_\textbf{y}  P(\textbf{Y}=\textbf{y}|\textbf{X}=\textbf{x})$

### b) Approximation of Hamming cost MBR

Same idea as 0-1 cost, instead of looking at the probability over all possible samples that $Y_t=y | \textbf{X}=\textbf{x}$, just average its indicator over a bunch of samples for that value of $\textbf{x}$.

$P(Y_t=y|\textbf{X}=\textbf{x}) = \sum_{y_{-t}} P(Y_t=y, Y_{-t}=y_{-t}|\bf{X}=\bf{x})$

$\simeq \frac{1}{K} \sum_{i=1}^K \mathbb{I}[\tilde{y_t}^{(i)} = y]$

### c) Implementing MBR inference

My plan here is to keep a count of how many times a tag is predicted at a certain index, then just take the maximum tag at each index.

In [None]:
from collections import Counter

MBR_setup = lambda: None

def table_score_prediction_function(start_work_func, pred_func, do_setup=True):
    start_time = time.time()
    if do_setup:
        start_work_func()
    with open('data/en_ewt.dev', 'r') as trainfile:
        current_sent = [start_token]
        predictions = []
        gold_standards = []
        total_log_prob = 0
        lines = trainfile.readlines()
        for row in lines:
            row = row.split()
            if len(row) == 0:
                gold_standards.append(current_sent)
                pred = pred_func(current_sent)
                log_prob = log_sentence_probability(pred)
                predictions.append(pred)
                total_log_prob += log_prob
                current_sent = [start_token]
            else:
                current_sent.append(row)

    end_time = time.time()
    elapsed_time = end_time - start_time

    tagging_accuracy = calc_accuracy_rate(predictions, gold_standards)
    return f' {elapsed_time} | {tagging_accuracy * 100}% |' 
def MBR_inference(sentence):
    # because of how this is called in the test function
    # K can't be an argument, has to be set externally
    # beta_update is a function set globally that has a parameter
    # reset=True means a new sentence, =False means a new iteration
    # beta is set globally
    global K, beta_update, beta
    beta_update(reset=True)
    Y_T = list(np.random.choice(list(pos_tags), len(sentence), replace=True))
    Y_T = [start_token[0]] + Y_T + [end_token[0]]
    sentence = sentence + [end_token]
    counters = [Counter() for i in range(len(sentence))]
    for _ in range(K):
        Y_T = gibbs_iteration(Y_T, sentence)
        for ix, y_t in enumerate(Y_T):
            counters[ix][y_t] += 1
    return list(zip([s[0] for s in sentence], 
                    [y_t.most_common(1)[0][0] for y_t in counters]))[:-1]

def test_all_k_MBR():
    global K, beta
    for K in [2, 5, 10, 50, 100, 500, 1000]:
        result_str = '|'
        if K == 2:
            result_str += ' ' + str(beta) + ' '
        result_str += '| ' + str(K) + ' | '
        result_str += table_score_prediction_function(MBR_setup, 
                                                MBR_inference)
        print(result_str)

In [None]:
def beta_update(reset=False):
    pass

# this just outputs the rows of the table, i was being lazy
for beta in [0.5, 1., 2., 5.]:
    test_all_k_MBR()

| 0.5 | 2 |  9.330647945404053 | 55.60179728933412% |


KeyboardInterrupt: ignored

For low $K$, accuracy increases with $\beta$. This is probably because there are so few iterations that quickly pulling the tags into a local minima (with very high-probability tags from whatever state it start in) ends up being the best on average.

With high $K$, $\beta=1$ actually has the best performance, then $0.5$, then $2$, then $5$. I think this is because with more iterations for the tag state to wander around, and since (assuming the model is right) regression to the mean means that if these states are moving around enough they'll end up doing a random-ish walk centered around the "correct" tagging. 

Lower betas will move around enough to escape any local minima and explore around the "correct" answer, so that their average ends up being closest to the truth. 

Higher $\beta$ probably has a better chance of being stuck in a local minima during all the iterations, because it has a very high probability of picking between only a few highly-locally-likely tags each time and having an average around that local minima rather than around the correct answer.

| $\beta$ | $K$ | Runtime (s) | Tag Accuracy |
|------|------|-------------|--------------|
| 0.5 | 2 |  3.44 | 55.59% |
|| 5 |  4.53 | 82.44% |
|| 10 |  6.23 | 86.79% |
|| 50 |  18.68 | 89.31% |
|| 100 |  34.08 | 89.56% |
|| 500 |  157.98 | 89.92% |
|| 1000 |  303.68 | 89.91% |
| 1.0 | 2 |  4.38 | 69.62% |
|| 5 |  5.31 | 87.35% |
|| 10 |  6.68 | 88.75% |
|| 50 |  19.76 | 89.65% |
|| 100 |  31.71 | 89.97% |
|| 500 |  139.05 | 90.15% |
|| 1000 |  272.64 | 90.18% |
| 2.0 | 2 |  3.78 | 72.55% |
|| 5 |  4.63 | 88.33% |
|| 10 |  6.00 | 89.09% |
|| 50 |  16.77 | 89.70% |
|| 100 |  30.23 | 89.59% |
|| 500 |  138.66 | 89.89% |
|| 1000 |  272.35 | 89.85% |
| 5.0 | 2 |  5.98 | 73.27% |
|| 5 |  6.21 | 88.49% |
|| 10 |  6.69 | 88.87% |
|| 50 |  17.31 | 89.23% |
|| 100 |  30.42 | 89.25% |
|| 500 |  136.71 | 89.49% |
|| 1000 |  268.00 | 89.53% |