# Data analysis workbook for the vigenere stuff

In [39]:
import cbor2

In [40]:
import scipy.stats

I've copied a couple of files out of `../vigenere/target/criterion/data/` 

- `32early.cbor`, which is the timing run of `vigenere/benches/key_abc32.rs` using the key `012345a`
- `32late.cbor`,  which is the timing run of `vigenere/benches/key_abc32.rs` using the key `tuvwxyz`

Now I need to figure out how to handle them.

Having run `python -m cbor2.tood -p 32early.cbor` on the command line, I see that what I will need to run a t-test is the list of times (in pico seconds?) in the `avg_values` list.

Ok. Time to read the cbor2 docs

I've never tried to read files from a notebook. I hope that file PATH's make sense

In [41]:
EARLY_PATH = './32early.cbor'
LATE_PATH = './32late.cbor'



In [61]:
class Comparison:
    def __init__(self, file1: str, file2: str, alternative="two-tailed"):
        valid_alternatives = ["two-tailed", "less", "greater"]
        if alternative not in valid_alternatives:
            raise ValueError(f'alternative must be one of {valid_alternatives}')
        
        self.alt = alternative # will pass to t-test

        with open(file1, "rb") as f:
            f1_data = cbor2.decoder.load(f)

        with open(file2, "rb") as f:
            f2_data = cbor2.decoder.load(f)

        # Check whether these can be treated as related samples
        self.related: bool
        if f1_data["iterations"] == f2_data["iterations"]:
            self.related = True
        else:
            self.related = False

        self.f1_mean = f1_data["estimates"]["mean"]
        self.f2_main = f2_data["estimates"]["mean"]

        self.f1_values: list[float]
        self.f2_values: list[float]
        if self.related:
            self.f1_values = f1_data["values"]
            self.f2_values = f2_data["values"]
        else:
            self.f1_values = f1_data["avg_values"]
            self.f2_values = f2_data["avg_values"]

    def ttest(self) -> tuple[float, float]:
        t: float
        p: float
        if self.related:
            (t, p) = scipy.stats.ttest_rel(self.f1_values,
                                self.f2_values,
                                alternative=self.alt)
        else:
            (t, p) = scipy.stats.ttest_ind(self.f1_values,
                                self.f2_values,
                                alternative=self.alt)

        return t, p



In [62]:
comp32 = Comparison(EARLY_PATH, LATE_PATH, alternative="less")

In [63]:
comp32.ttest()

(-0.444293599052737, 0.3289000124091078)