In [1]:
import numpy as np
from scipy import stats

https://rtmath.net/assets/docs/finmath/html/9291020f-884a-4590-b988-3eda38560bb8.htm
https://rtmath.net/assets/docs/finmath/html/bc1f37b5-460e-13ab-9ddb-0097a6609642.htm

<p>Z-score is a value used to describe the normal distribution. It is defined as the distance between the mean score and the experimental data point, expressed in terms of SD (standard deviation). In statistical data analysis, it is also called standard score, z value, standardized score, and normal score.</p>

The formula for calculating a z-score is 
z = (x-μ)/σ, </br>
where x is the raw score, μ is the population mean, and σ is the population standard deviation.</br>
As the formula shows, the z-score is simply the raw score minus the population mean, divided by the population standard deviation.

In [2]:
class ZTest:
  

    def zscore(self):
        sample_mean = np.mean(self.x1)
        sample_std = np.std(self.x1)
        zscores = (self.x1 - sample_mean) / sample_std
        return zscores

    def __init__(self, x1, x2=None, value=0):
        self.x1 = x1
        self.x2 = x2
        self.value = value

    def run(self):
        if self.x2 is None:
            # One sample z-test
            x1 = np.array(self.x1)
            n = len(x1)
            mean = x1.mean()
            std = x1.std(ddof=1)
            z = (mean - self.value) / (std / np.sqrt(n))
            p = stats.norm.sf(abs(z)) * 2
        else:
            # Two sample z-test
            x1 = np.array(self.x1)
            x2 = np.array(self.x2)
            n1 = len(x1)
            n2 = len(x2)
            mean1 = x1.mean()
            mean2 = x2.mean()
            std1 = x1.std(ddof=1)
            std2 = x2.std(ddof=1)
            z = (mean1 - mean2 - self.value) / np.sqrt(std1**2/n1 + std2**2/n2)
            p = stats.norm.sf(abs(z)) * 2
        return z, p
    

In [3]:
#You can use this class to perform one sample and two sample z-tests in Python. Here is an example of how to use it:

data = [88, 92, 94, 94, 96, 97, 97, 97, 99, 99, 105, 109, 109, 109, 110, 112, 112, 113, 114, 115]
ztest = ZTest(data, value=100)
ztest.run()


#This returns (1.5976240527147705,0.1101266701438426), where the first value is the test statistic for the one sample z-test and the second value is the corresponding p-value. Is there anything else you would like to know?

(1.5976240527147705, 0.11012667014384257)

In [4]:
ztest1=ZTest(data)
ztest1.zscore()

array([-1.80856637, -1.32788428, -1.08754323, -1.08754323, -0.84720219,
       -0.72703166, -0.72703166, -0.72703166, -0.48669062, -0.48669062,
        0.23433252,  0.71501461,  0.71501461,  0.71501461,  0.83518513,
        1.07552618,  1.07552618,  1.1956967 ,  1.31586722,  1.43603775])

In [5]:
import numpy as np
from scipy import stats

class TTest:
    def __init__(self, data1, data2=None,value=0):
        self.data1 = data1
        self.data2 = data2
        self.value = value

    def one_sample_test(self,alpha=0.05):
      # One sample t-test
            data1_mean = np.mean(self.data1)
            data1_std = np.std(self.data1)
            #n = len(x1)
            n_data1 = len(self.data1)
            #mean = x1.mean()
            #std = x1.std(ddof=1)
            t_statistic = (data1_mean - self.value) / (data1_std / np.sqrt(n_data1))
            p_value = stats.t.sf(abs(t_statistic), n_data1-1) * 2
            if p_value < alpha:
              print("The null hypothesis can be rejected")
            else:
              print("The null hypothesis cannot be rejected")


    def two_sample_test(self, alpha=0.05):
        data1_mean = np.mean(self.data1)
        data2_mean = np.mean(self.data2)
        data1_std = np.std(self.data1)
        data2_std = np.std(self.data2)
        n_data1 = len(self.data1)
        n_data2 = len(self.data2)

        pooled_std = np.sqrt(((n_data1 - 1) * data1_std ** 2 + (n_data2 - 1) * data2_std ** 2) / (n_data1 + n_data2 - 2))
        t_statistic = (data2_mean - data1_mean) / (pooled_std * np.sqrt(1 / n_data1 + 1 / n_data2))
        p_value = stats.t.sf(np.abs(t_statistic), n_data1 + n_data2 - 2) * 2

        if p_value < alpha:
            print("The null hypothesis can be rejected")
        else:
            print("The null hypothesis cannot be rejected")
    
    def paired_test(self,alpha=0.05):
      n_data1 = len(self.data1)
      d = self.data1 - self.data2
      d_mean = d.mean()
      d_std = d.std(ddof=1)
      t_statistic = d_mean / (d_std / np.sqrt(n_data1))
      p_value = stats.t.sf(abs(t_statistic), n_data1-1) * 2

      if p_value < alpha:
            print("The null hypothesis can be rejected")
      else:
            print("The null hypothesis cannot be rejected")

In [6]:
data = [88, 92, 94, 94, 96, 97, 97, 97, 99, 99, 105, 109, 109, 109, 110, 112, 112, 113, 114, 115]
ttest = TTest(data , value=100)
ttest.one_sample_test()

The null hypothesis cannot be rejected


In [7]:
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols

class ANOVA:
    def __init__(self, *args):
        self.samples = args

    def oneway(self):
        f, p = stats.f_oneway(*self.samples)
        return f, p

    def twoway(self):
        #f, p = stats.f_oneway(*[sample[:, value] for sample in self.samples])
        #return f, p
        model = ols('DV ~ IV1 + IV2', data=self.samples).fit()
        aov_table = sm.stats.anova_lm(model, typ=2)

In [8]:
sample1 = [1, 2, 3]
sample2 = [4, 5, 6]
sample3 = [7, 8, 9]
anova = ANOVA(sample1 , sample2 , sample3)
anova.oneway()

(27.0, 0.0010000000000000002)

In [9]:
class ChiSquareTest:
    def __init__(self, observed, expected):
        self.observed = observed
        self.expected = expected
        self.df = len(observed) - 1
    
    def test_statistic(self):
        return np.sum((self.observed - self.expected) ** 2 / self.expected)

    def p_value(self):
        test_statistic = self.test_statistic()
        return 1 - stats.chi2.cdf(test_statistic, self.df)


In [10]:
class ABTest:
    def __init__(self, control, treatment):
        self.control = control
        self.treatment = treatment

    def test(self, alpha=0.05):
        control_mean = np.mean(self.control)
        treatment_mean = np.mean(self.treatment)
        control_std = np.std(self.control)
        treatment_std = np.std(self.treatment)
        n_control = len(self.control)
        n_treatment = len(self.treatment)

        pooled_std = np.sqrt(((n_control - 1) * control_std ** 2 + (n_treatment - 1) * treatment_std ** 2) / (n_control + n_treatment - 2))
        t_statistic = (treatment_mean - control_mean) / (pooled_std * np.sqrt(1 / n_control + 1 / n_treatment))
        p_value = stats.t.sf(np.abs(t_statistic), n_control + n_treatment - 2) * 2

        if p_value < alpha:
            print("The null hypothesis can be rejected")
        else:
            print("The null hypothesis cannot be rejected")

In [11]:

class ABTest(TTest):
    def __init__(self, control, treatment):
        super().__init__(control, treatment)

    def test(self, alpha=0.05):
        self.two_sample_test(alpha)


Use these classes to solve the following problems:
1. Z-test:
Suppose we are testing a new model of cell-phone and want to compare its bandwidth to the
previous model.
Sample = [4.186, 4.439, 4.781, 4.388, 4.947, 4.853, 4.889, 4.682, 4.428, 4.533,4.557, 4.761, 4.491, 4.334, 4.83 , 4.268, 4.68 , 4.437, 5.382, 5.111, 5.096, 5.232,5.033, 5.57 , 4.474, 4.789, 4.725, 4.84 , 4.817, 4.438, 4.754, 4.966, 4.285, 4.482,4.396, 4.418, 4.514, 5.383, 5.264, 4.309, 5.058, 4.392, 4.788, 4.934, 4.967, 4.554,4.42 , 5. , 5.126, 5.082, 4.944, 4.658]
● State the null and alternative hypotheses.
● Use the Z-test to determine whether the new model has a significantly different
bandwidth 4.5 GHz from the previous model with a standard deviation of 0.6
GHz, using a significance level of 0.05.
● Interpret the results


In [12]:
CellPhone_Sample = [4.186, 4.439, 4.781, 4.388, 4.947, 4.853, 4.889, 4.682, 4.428, 4.533,4.557, 4.761, 4.491, 4.334, 4.83 , 4.268, 4.68 , 4.437, 5.382, 5.111, 5.096, 
                    5.232,5.033, 5.57 , 4.474, 4.789, 4.725, 4.84 , 4.817, 4.438, 4.754, 4.966, 4.285, 4.482,4.396, 4.418, 4.514, 5.383, 5.264, 4.309, 5.058, 4.392, 
                    4.788, 4.934, 4.967, 4.554,4.42 , 5. , 5.126, 5.082, 4.944, 4.658]

Null Hypothesis :** H0** : The new model doesnt have a significantly different bandwidth from the pervious model

In [13]:
ztest = ZTest(CellPhone_Sample, value=4.5)
zscore,pvalue=ztest.run()
print(zscore)
print(pvalue)

5.35229534880839
8.68454972708201e-08


A z-test is a statistical test used to determine if there is a significant difference between the means of two groups or if a sample mean is significantly different from a population mean. The result of a z-test is a z-score, which represents the number of standard deviations that the observed difference is from the expected difference under the null hypothesis.

To interpret the result of a z-test, you need to compare the calculated z-score to a critical value from a standard normal distribution table. If the absolute value of the z-score is greater than the critical value, then you can reject the null hypothesis and conclude that there is a significant difference between the means.

The p-value of a z-test represents the probability of observing a z-score as extreme or more extreme than the calculated z-score if the null hypothesis is true. A small p-value (typically less than 0.05) indicates strong evidence against the null hypothesis and suggests that you can reject it.

In [14]:
alpha=0.05
if pvalue < alpha:
  print("The null hypothesis can be rejected")
else:
  print("The null hypothesis cannot be rejected")

The null hypothesis can be rejected


The new cell phone model have a significant different bandwidth than the previous model

2. T-test
A. One sample T-test: Suppose we have a new manufacturing process for
producing aluminum cans, and we want to test whether the mean weight of the
cans produced using this new process is significantly different from the target
weight of 15 grams. We randomly sample 30 cans produced using the new
process and measure their weights in grams. We obtain the following data:
Sample = [14.8, 15.2, 15.1, 15.3, 15.0, 14.9, 15.2, 14.8, 15.1, 15.0, 14.9,14.8, 15.2, 14.9, 15.0, 14.9, 15.1, 15.3, 15.0, 15.1, 14.8, 15.0, 15.2, 15.1,15.3, 15.1, 15.0, 14.8, 15.2, 15.0]
● State the null and alternative hypotheses.
● Use the appropriate T-test to determine whether the mean weight of the
produced cans is equal to the target weight of 15 grams, using a
significance level of 0.05.
● Interpret the results


<b>Null Hypothesis : H0 : The mean weight of the cans produced by the new manufacturing process is 15gms
Alternate hypothesis : H1 : The mean weight of the cans produced is either greater than or less than 15gms</b>

In a one-sample t-test, the null hypothesis is that the population mean is equal to a specified value. This value is often denoted as μ0 and represents the hypothesized value of the population mean.

The alternative hypothesis in a one-sample t-test can be either one-sided or two-sided. In a one-sided test, the alternative hypothesis is that the population mean is either greater than or less than μ0. In a two-sided test, the alternative hypothesis is that the population mean is not equal to μ0.

The purpose of a one-sample t-test is to determine if there is enough evidence to reject the null hypothesis and conclude that the population mean is significantly different from μ0.

In [15]:
Can_weights = [14.8, 15.2, 15.1, 15.3, 15.0, 14.9, 15.2, 14.8, 15.1, 15.0, 14.9,14.8, 15.2, 14.9, 15.0, 14.9, 15.1, 15.3, 15.0, 15.1, 14.8, 15.0, 15.2,
               15.1,15.3, 15.1, 15.0, 14.8, 15.2, 15.0] 

In [16]:
ttest = TTest(Can_weights,15)
ttest.one_sample_test()

The null hypothesis can be rejected


The mean weight of the cans produced in the new manufacturing process is not 15gms, which was the expected target wieight. The process has to be redifined

B. Two Sample T-test: Suppose a food company has developed a new flavor of
potato chips and wants to compare it to the current best-selling flavor. The
company randomly selects two groups of 20 customers each. The first group is
given the new flavor of potato chips, while the second group is given the
best-selling flavor. After trying the potato chips, each customer rates the flavor on
a scale of 1 to 10. The following are the flavor ratings for the two groups:
New flavor = [8, 7, 9, 6, 7, 8, 9, 7, 8, 7, 6, 8, 7, 9, 8, 7, 6, 9, 8, 7]
Best selling flavor = [6, 7, 8, 6, 7, 6, 7, 6, 8, 7, 6, 7, 6, 8, 7, 6, 7, 8, 6, 7]
● State the null and alternative hypotheses.
● Use the appropriate T-test to determine whether there is a significant
difference between the two groups or not, using a significance level of
0.05.
● Interpret the results

<b>Null Hypothesis: H0: The population mean of two groups are equal. (There is no significant difference between old flavor and new flavor)
Alternate Hypothesis : H1 : The population mean of one group is either greater than or less than the population mean of the other group.(one-sided test)</br>
Alternate Hypothesis : H1: The population mean of the two groups are not equal (two-sided test)</b>

In a two-sample t-test, the null hypothesis is that the population means of two groups are equal. This means that any observed difference between the sample means of the two groups is due to random chance and not due to a significant difference between the population means.

The alternative hypothesis in a two-sample t-test can be either one-sided or two-sided. In a one-sided test, the alternative hypothesis is that the population mean of one group is either greater than or less than the population mean of the other group. In a two-sided test, the alternative hypothesis is that the population means of the two groups are not equal.

The purpose of a two-sample t-test is to determine if there is enough evidence to reject the null hypothesis and conclude that there is a significant difference between the population means of the two groups.

In [17]:
Potato_New_flavor = [8, 7, 9, 6, 7, 8, 9, 7, 8, 7, 6, 8, 7, 9, 8, 7, 6, 9, 8, 7] 
Potato_Best_selling_flavor = [6, 7, 8, 6, 7, 6, 7, 6, 8, 7, 6, 7, 6, 8, 7, 6, 7, 8, 6, 7]


In [18]:
ttest = TTest(Potato_New_flavor,Potato_Best_selling_flavor)
ttest.two_sample_test()

The null hypothesis can be rejected


There is significant difference in the means of the two flavors

C. Paired T-test: Suppose a company wants to evaluate a new training program for
its employees. The company selects 20 employees and measures their
productivity before and after the training program. The following are the
productivity scores (number of tasks completed per hour) for each employee
before and after the training:
Before = [15, 18, 12, 10, 17, 16, 12, 14, 19, 18, 11, 13, 16, 17, 19, 14, 16,13, 15, 12]
After = [18, 20, 15, 13, 19, 18, 14, 16, 21, 20, 14, 16, 19, 20, 22, 16, 18,15, 17, 14]
● State the null and alternative hypotheses.
● Use the appropriate T-test to determine whether the new training program
has had an effect on employee productivity, with a significance level of
0.05.

<b> Null Hypothesis : H0 : The population mean of the differences between paired observations is equal to zero. (Training has no effect) <br />
Alternate Hypothesis : H1 : The population mean of the differences between paired observations is either greater than or less than zero (one side test) <br />
Alternate Hypothesis : H1: The population mean of the differences between paired observations is not equal to zero.(two sided)



In a paired t-test, the null hypothesis is that the population mean of the differences between paired observations is equal to zero. This means that any observed difference between the sample means of the paired observations is due to random chance and not due to a significant difference between the population means.

The alternative hypothesis in a paired t-test can be either one-sided or two-sided. In a one-sided test, the alternative hypothesis is that the population mean of the differences between paired observations is either greater than or less than zero. In a two-sided test, the alternative hypothesis is that the population mean of the differences between paired observations is not equal to zero.

The purpose of a paired t-test is to determine if there is enough evidence to reject the null hypothesis and conclude that there is a significant difference between the population means of the paired observations.

In [19]:
Before_training_prod = [15, 18, 12, 10, 17, 16, 12, 14, 19, 18, 11, 13, 16, 17, 19, 14, 16,13, 15, 12] 
After_training_prod = [18, 20, 15, 13, 19, 18, 14, 16, 21, 20, 14, 16, 19, 20, 22, 16, 18,15, 17, 14]


In [20]:
ttest = TTest(np.array(Before_training_prod),np.array(After_training_prod))
ttest.paired_test()

The null hypothesis can be rejected


The new training program had an impact on the productivity of the employees

3. ANOVA Test
A. One-Way: Suppose a company has three departments (A, B, and C) and wants
to test whether there is a significant difference in salaries between the
departments. The company selects 10 employees randomly from each
department and records their salaries.
Department A = [55, 60, 50, 58, 63, 62, 57, 56, 61, 59]
Department B = [50, 52, 48, 49, 55, 53, 51, 54, 47, 50]
Department C = [45, 43, 48, 50, 42, 47, 49, 46, 44, 48]
● State the null and alternative hypotheses.
● Use One-Way ANOVA-test to determine whether there is a significant
difference in salaries between the three departments.
● Interpret the results

<b> Null Hypothesis : H0: The population means of all departments are equal <br />
Alternate Hpothesis : H1: At least one of the population means is different from the others. This means that there is a significant difference between the population means of at least two of the groups.

In a one-way ANOVA test, the null hypothesis is that the population means of all groups are equal. This means that any observed differences between the sample means of the groups are due to random chance and not due to a significant difference between the population means.

The alternative hypothesis in a one-way ANOVA test is that at least one of the population means is different from the others. This means that there is a significant difference between the population means of at least two of the groups.

The purpose of a one-way ANOVA test is to determine if there is enough evidence to reject the null hypothesis and conclude that there is a significant difference between the population means of at least two of the groups.

In [21]:
DepartmentA = [55, 60, 50, 58, 63, 62, 57, 56, 61, 59]
DepartmentB = [50, 52, 48, 49, 55, 53, 51, 54, 47, 50] 
DepartmentC = [45, 43, 48, 50, 42, 47, 49, 46, 44, 48]


In [22]:
anova = ANOVA(DepartmentA  , DepartmentB , DepartmentC)
fscore,pvalue=anova.oneway()
print(fscore)
print(pvalue)

37.68181818181821
1.5361859680465563e-08


In [23]:
alpha=0.05
if pvalue < alpha:
  print("The null hypothesis can be rejected")
else:
  print("The null hypothesis cannot be rejected")

The null hypothesis can be rejected


<b>There is a significant difference between at least two of the population means. (Salary difference is there between atleast two departments)</b>

B. Two-Way: Suppose a company has three departments (A, B, and C) and wants
to test whether there is a significant difference in salaries between the
departments, while also considering the effect of gender. The company selects
10 employees randomly from each department and records their salaries and
gender.
Department A:
Male: [$55k, $60k, $50k, $58k, $63k]
Female: [$62k, $57k, $56k, $61k, $59k]
Department B:
Male: [$50k, $52k, $48k, $49k, $55k]
Female: [$53k, $51k, $54k, $47k, $50k]
Department C:
Male: [$45k, $43k, $48k, $50k, $42k]
Female: $47k, $49k, $46k, $44k, $48k]
● State the null and alternative hypotheses.
● Use Two-Way ANOVA-test to determine whether there is a significant
difference in salaries between the three departments.
● Interpret the results


<b>Null Hypothesis : H0: There is no significant interaction between the two independent variables (department, gender) on the dependent variable(salary). This means that the effect of one independent variable on the dependent variable is the same at all levels of the other independent variable.<br />
Alternate Hypothesis : H1 : There is a significant interaction between the two independent variables or that there is a significant main effect of at least one of the independent variables.

In a two-way ANOVA test, the null hypothesis is that there is no significant interaction between the two independent variables on the dependent variable. This means that the effect of one independent variable on the dependent variable is the same at all levels of the other independent variable.

In addition to testing for an interaction effect, a two-way ANOVA test also tests for main effects of each independent variable. The null hypothesis for each main effect is that the population means of the dependent variable are equal across all levels of the independent variable.

The alternative hypothesis in a two-way ANOVA test is that there is a significant interaction between the two independent variables or that there is a significant main effect of at least one of the independent variables.

The purpose of a two-way ANOVA test is to determine if there is enough evidence to reject the null hypotheses and conclude that there is a significant interaction between the independent variables or a significant main effect of at least one of the independent variables on the dependent variable.

In [24]:
import pandas as pd
data = {'Department': ['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'C'] * 2,
        'Gender': ['Male'] * 15 + ['Female'] * 15,
        'Salary': [55,60,50,58,63,62,57,56,61,59,50,52,48,49,55,53,51,54,47,50,45,43,48,50,42,47,49,46,44,48]}

df = pd.DataFrame(data)

In [25]:
anova = ANOVA(df)

In [26]:
df

Unnamed: 0,Department,Gender,Salary
0,A,Male,55
1,A,Male,60
2,A,Male,50
3,A,Male,58
4,A,Male,63
5,B,Male,62
6,B,Male,57
7,B,Male,56
8,B,Male,61
9,B,Male,59


In [27]:
anova.twoway()




Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "c:\Python310\lib\site-packages\patsy\compat.py", line 36, in call_and_wrap_exc
    return f(*args, **kwargs)
  File "c:\Python310\lib\site-packages\patsy\eval.py", line 169, in eval
    return eval(code, {}, VarLookupDict([inner_namespace]
  File "<string>", line 1, in <module>
  File "c:\Python310\lib\site-packages\patsy\eval.py", line 52, in __getitem__
    return d[key]
  File "c:\Python310\lib\site-packages\patsy\eval.py", line 52, in __getitem__
    return d[key]
TypeError: tuple indices must be integers or slices, not str

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Manjusha\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3378, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Manjusha\AppData\Local\Temp\ipykernel_25756\1751689729.py", line 1, in <module>
    anova.twoway()
  File "C:\Users

4. Chi-Square: Suppose we have a hypothesis that a six-sided die is fair, with each side
having an equal probability of landing face up. We roll the die 100 times and record the
number of times each side appears:
Side Observed Frequency
1 18
2 20
3 16
4 22
5 14
6 10
Given that the expected probabilities are equal to 1/6, Use the class CHi-Square to
estimate the goodness-of-fit through chi-square test to determine whether the observed
frequencies are consistent with the expected frequencies under the null hypothesis of a
fair die.

In [28]:
ObservedFrequency=[18,20,16,22,14,10]

# assuming observed is a 2D array of observed frequencies
observed = np.array(ObservedFrequency)
print(observed)
# calculate row and column totals


# calculate expected frequencies
expected= np.array([1/6]*6)



[18 20 16 22 14 10]


In [29]:
chi_square_test = ChiSquareTest(observed, expected)

In [30]:
test_stat=chi_square_test.test_statistic()
print(f"Test statistic: {test_stat:.3f}")
print(f"P-value: {chi_square_test.p_value():.3f}")

Test statistic: 10361.000
P-value: 0.000


The null hypothesis in a chi-square test is that there is no significant difference between the observed and expected frequencies. In other words, the null hypothesis assumes that any differences between the observed and expected frequencies are due to random chance.

A/B testing: Suppose a company has launched a new flavor of soda and wants to test if
it has a higher preference than the old flavor. The company conducts a survey with a
sample of 30 customers, randomly split into two groups of 15. One group is given the old
flavor of soda and the other group is given the new flavor of soda. Each participant rates
their preference on a scale of 1 to 10.
Old Flavor = [6, 7, 8, 5, 6, 7, 5, 8, 6, 7, 5, 6, 7, 6, 5]
New Flavor = [8, 9, 7, 8, 9, 6, 7, 8, 7, 8, 7, 8, 9, 6, 8]

State the null and alternative hypotheses <br />
In A/B testing, the null hypothesis is the default and it always says that there is no difference between the two groups being tested. You can think of this as a negative result to a test.<br />
Alternate Hypothesis: there is a difference between the two groups being tested. You can think of this as a positive result to a test

In [31]:
Old_Flavor = [6, 7, 8, 5, 6, 7, 5, 8, 6, 7, 5, 6, 7, 6, 5]
New_Flavor = [8, 9, 7, 8, 9, 6, 7, 8, 7, 8, 7, 8, 9, 6, 8]


● Use two sample t-test to determine whether there is a significant
difference in preference between the old and new flavors of soda, with a
significance level of 0.05

In [32]:
abtest = ABTest(Old_Flavor,New_Flavor)
abtest.test()

The null hypothesis can be rejected
