### Homework 5

* Create a class and name it Z-test this class have 
* Create a class and name it T-test this class includes One-sample, two-sample, and paired t-test
* Create a class and name it ANOVA it includes one way and two ways
* Create a class and name it Chi-Square
* Create a class and name it AB testing 


In [29]:
from scipy import stats
import numpy as np

class Ztest:
        
    def __init__(self, x, sigma, test_value = 0, alpha = 0.05, H1 = 'unequal', print_result = True):
        ''' 
        Runs a one-sample Z-test based on the specified input
        
        Input:
          x          : the data sample (a vector)
          sigma      : the standard deviation of the population from which x was drawn
          test_value : the hypothesized population mean
          alpha      : significance level for hypothesis testing
          H1         : alternative hypothesis: 'unequal' (default), 'smaller', or 'larger'
          print_result : if True, then results are printed to the console          
        '''
        # set attributes
        self.x = np.array(x)
        self.sigma = sigma
        self.test_value = test_value
        self.alpha = alpha
        self.H1 = H1
        # run the test 
        self.run_test()
        # show results
        if print_result:
            self.print_results()
        
    def run_test(self):
        '''Run the Z-test'''
        self.z = (self.x.mean() - self.test_value) / (self.sigma / np.sqrt(len(self.x)))        
        if self.H1 == 'unequal':
            self.p_value = 2 * stats.norm.sf(abs(self.z))
        elif self.H1 == 'smaller':
            self.p_value = stats.norm.cdf(self.z)
        elif self.H1 == 'larger':
            self.p_value = 1 - stats.norm.cdf(self.z)
        else:
            raise ValueError(f'Invalid alternative hypothesis: {self.H1}')        
        
    def print_results(self):
        '''Print the result of the statistical test'''
        print(f'\n---------------------------------')
        print(f'Z-test result')
        print(f'---------------------------------')
        print(f'sample mean = {self.x.mean():.2f} ± {np.std(self.x)/np.sqrt(len(self.x)):.2f} (n={len(self.x)})')
        print(f'sigma       = {self.sigma:.2f}')
        print(f'z           = {self.z:.2f}')
        print(f'p           = {self.p_value:.4f}')
       


In [30]:
class Student_ttest:
        
    def __init__(self, x, alpha = 0.05, H1 = 'unequal', paired = False, test_value = 0, print_result = True):
        ''' 
        Runs a one- or two-sample Student t-test based on the specified input
        
        Input:
          x            : the data sample (a vector or a list with two vectors in case of a two-sample test)
          alpha        : significance level for hypothesis testing
          H1           : alternative hypothesis: 'unequal' (default), 'smaller', or 'larger'
          paired       : specifies whether to treat the samples as paired or as independent (only relevant if its a two-sample test)
          test_value   : the hypothesized population mean (only relevant if its a one-sample test
          print_result : if True, then results are printed to the console
        '''
        # set attributes
        self.x = x
        self.alpha = alpha
        self.H1 = H1
        self.is_paired = paired
        self.test_value = test_value
        self.is_two_sample_test = len(x) == 2
        # run a one-sample t-test on the difference scores if this is a paired-samples t-test
        if self.is_two_sample_test and self.is_paired:
            self.x = np.array(self.x[1]) - np.array(self.x[0])
            self.is_paired = False
            self.is_two_sample_test = False
        # run the test 
        self.run_test()
        # show results
        if print_result:
            self.print_results()
        
    def t_test_one_sample(self):
        '''Compute t-value and p-value for a one-sample t-test'''
        n = len(self.x)
        mean = np.mean(self.x)
        std = np.std(self.x, ddof=1)
        self.t_value = (mean - self.test_value) / (std / np.sqrt(n))
        df = n - 1
        if self.H1 == 'unequal':
            self.p_value = 2 * stats.t.sf(np.abs(self.t_value), df)
        elif self.H1 == 'smaller':
            self.p_value = 1 - stats.t.sf(np.abs(self.t_value), df)
        else:
            self.p_value = stats.t.sf(np.abs(self.t_value), df)

    def t_test_two_sample(self):
        '''Compute t-value and p-value for a two-sample t-test'''
        n1, n2 = len(self.x[0]), len(self.x[1])
        mean1, mean2 = np.mean(self.x[0]), np.mean(self.x[1])
        std1, std2 = np.std(self.x[0], ddof=1), np.std(self.x[1], ddof=1)
        pooled_std = np.sqrt(((n1 - 1) * std1 ** 2 + (n2 - 1) * std2 ** 2) / (n1 + n2 - 2))
        self.t_value = (mean1 - mean2) / (pooled_std * np.sqrt(1 / n1 + 1 / n2))
        df = n1 + n2 - 2
        if self.H1 == 'unequal':
            self.p_value = 2 * stats.t.sf(np.abs(self.t_value), df)
        elif H1 == 'smaller':
            self.p_value = 1 - stats.t.sf(np.abs(self.t_value), df)
        else:
            self.p_value = stats.t.sf(np.abs(self.t_value), df)

    def run_test(self):
        '''Run the t-test'''
        if not self.is_two_sample_test:
            self.t_test_one_sample()
        else:
            self.t_test_two_sample()

    def print_results(self):
        '''Print the result of the statistical test'''
        print(f'\n---------------------------------')
        print(f'Student t-test result')
        print(f'---------------------------------')
        if self.is_two_sample_test:
            print(f'sample mean1 = {np.mean(self.x[0]):.2f} ± {np.std(self.x[0])/np.sqrt(len(self.x[0])):.2f} (n={len(self.x[0])})')
            print(f'sample mean2 = {np.mean(self.x[1]):.2f} ± {np.std(self.x[1])/np.sqrt(len(self.x[1])):.2f} (n={len(self.x[1])})')
        else:
            print(f'sample mean  = {np.mean(self.x):.2f} ± {np.std(self.x)/np.sqrt(len(self.x)):.2f} (n={len(self.x)})')
        print(f't            = {self.t_value:.2f}')
        print(f'p            = {self.p_value:.4f}')


In [31]:
# Testing out the classes on the assignment data (code from here on is not final)

x = [4.186, 4.439, 4.781, 4.388, 4.947, 4.853, 4.889, 4.682, 4.428,  4.533, 4.557, 4.761, 4.491, 4.334, 4.83 , 4.268, 4.68 , 4.437, 5.382, 5.111, 5.096, 5.232, 5.033, 5.57 , 4.474, 4.789, 4.725, 4.84 , 4.817, 4.438, 4.754, 4.966, 4.285, 4.482, 4.396, 4.418, 4.514, 5.383, 5.264, 4.309, 5.058, 4.392, 4.788, 4.934, 4.967, 4.554, 4.42 , 5. , 5.126, 5.082, 4.944, 4.658]
ztest_result = Ztest(x = x, sigma = .6, test_value = 4.5, alpha = .05, H1 = 'unequal')

# independent t-test
new_flavor =[8, 7, 9, 6, 7, 8, 9, 7, 8, 7, 6, 8, 7, 9, 8, 7, 6, 9, 8, 7]
best_selling_flavor = [6, 7, 8, 6, 7, 6, 7, 6, 8, 7, 6, 7, 6, 8, 7, 6, 7, 8, 6, 7]
ttest = Student_ttest(x = [new_flavor, best_selling_flavor], alpha = 0.05, H1 = 'unequal', paired = False)

# paired t-test
before = [15, 18, 12, 10, 17, 16, 12, 14, 19, 18, 11, 13, 16, 17, 19, 14, 16, 13, 15, 12]
after = [18, 20, 15, 13, 19, 18, 14, 16, 21, 20, 14, 16, 19, 20, 22, 16, 18, 15, 17, 14]
ttest = Student_ttest(x = [before, after], alpha = 0.05, H1 = 'unequal', paired = True)



---------------------------------
Z-test result
---------------------------------
sample mean = 4.74 ± 0.05 (n=52)
sigma       = 0.60
z           = 2.93
p           = 0.0034

---------------------------------
Student t-test result
---------------------------------
sample mean1 = 7.55 ± 0.22 (n=20)
sample mean2 = 6.80 ± 0.17 (n=20)
t            = 2.66
p            = 0.0113

---------------------------------
Student t-test result
---------------------------------
sample mean  = 2.40 ± 0.11 (n=20)
t            = 21.35
p            = 0.0000
