In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import levene
from scipy.stats import f_oneway

In [2]:
def get_df(name):
    df = pd.read_csv(f'outputs/{name}-bleus.csv')
    df['top_bleu'] = df[['bleu1', 'bleu2', 'bleu3']].max(axis=1)
    return df

In [3]:
ada_df = get_df('ada')
babbage_df = get_df('babbage')
curie_df = get_df('curie')
turbo_df = get_df('turbo')

## Levene test
### N0: The variance among groups is equal
### We fail to reject N0 -> The variance among groups is equal.

In [4]:
statistic, p = levene(ada_df.bleu1, babbage_df.bleu1, curie_df.bleu1, turbo_df.bleu1)
print(f"Statistic: {statistic}, p-value: {p}")

Statistic: 1.6896394309391163, p-value: 0.16710640360052412


## One-way ANOVA
### Assumptions
- The data is not normally distributed, however the sample size is large enough to disregard this.
- The homogeneity of variances assumption is fulfilled.
- The data are independent

### N0: There is no difference in the population means
### We reject N0 -> There's a difference in the population means.

In [5]:
stat, p = f_oneway(ada_df.bleu1, babbage_df.bleu1, curie_df.bleu1, turbo_df.bleu1)
print(f"Statistic: {stat}, p-value: {p}")


Statistic: 3.2312735459432202, p-value: 0.02151709867198638
