In [3]:
from faker import Faker
import random as random
import pandas as pd
from scipy import stats
import numpy as np
import scipy.linalg as la
from scipy.stats import f

Create Fake names using Faker

In [4]:
fake = Faker()

In [5]:
fake.name()

'Angelica Williams'

In [6]:
names = []
for i in range(0, 50):
    aName = fake.name()
    names.append(aName)
    

Generate random data

In [38]:
a = random.choices(range(30), k = 50)
b = random.choices(range(60), k = 50)
Group = random.choices(range(1,3), k = 50)

In [39]:
data = {'Names':names, 'Group':Group, 'A':a, 'B': b}

In [40]:
df = pd.DataFrame(data)
df

Unnamed: 0,Names,Group,A,B
0,Jose Sutton,2,11,35
1,Eric Chambers,2,5,55
2,Eric Lee,1,20,54
3,Brian Brooks,1,28,56
4,Tiffany Mejia,1,12,21
5,Kimberly Bennett,2,24,35
6,Derrick Castillo,1,14,54
7,Mrs. Danielle Rogers MD,2,28,12
8,Diana Bender,1,10,2
9,Timothy Davis,2,29,30


In [32]:
df.describe()

Unnamed: 0,Group,A,B
count,50.0,50.0,50.0
mean,1.44,15.96,30.92
std,0.501427,8.587984,16.833931
min,1.0,0.0,4.0
25%,1.0,9.25,18.0
50%,1.0,17.0,31.0
75%,2.0,23.0,46.25
max,2.0,29.0,59.0


## Hotelling T2 test for Multivariate Hypothesis Testing

$$ T^2 = n_1n_2(\bar{x}_1-\bar{x}_2)^T*C^{-1}*\frac{(\bar{x}_1-\bar{x}_2)}{n_1+n_2}$$

Where:


$$ C = \frac{(n_1-1)C_1 + (n_2-1)C_2}{(n_1+n_2-2)}$$

Split into treatment groups 1 and 2

In [41]:
Group1 = df[df['Group'] == 1]
Group2 = df[df['Group'] == 2]

In [42]:
Group1 = Group1[["A","B"]]
Group2 = Group2[["A","B"]]

In [46]:
def HotellingsT2(Group1,Group2, alpha):
    """Takes two treatment groups and user defined alpha
    
    Returns the F-Statistic, p-value and a boolean
    for whether test result is significant"""
    n1 = len(Group1)
    n2 = len(Group2)
    Group1averages = []
    Group2averages = []
    xi = list(Group1.columns)
    yi = list(Group2.columns)
    
    #column averages Group1
    for i in xi:
        totals = sum(Group1[i])
        ave = totals/n1
        Group1averages.append(ave)
    
    for i in yi:
        totals = sum(Group2[i])
        ave = totals/n2
        Group2averages.append(ave)
        
    #covarince matrix
    cov1 = np.cov(Group1, rowvar = False)
    cov2 = np.cov(Group2, rowvar = False)
    C1 = (n1-1)*cov1
    C2 = (n2-1)*cov2
    pooledC = (C1 + C2) / (n1+n2-2)
        
    ##transpose
    xbar1 = np.array(Group1averages)
    xbar2 = np.array(Group2averages)
    Transpose = np.transpose((xbar1-xbar2))
    ##sample mean
    
    diff = (xbar1-xbar2)/(n1+n2)
    ##inverse
    inverse = np.linalg.inv(pooledC)
    a = (n1*n2)*Transpose
    b = np.matmul(a,inverse)
    T = np.matmul(b, diff)
    
    ##degrees of freedom, and F test
    df1 = len(xi)
    df2 = (n1+n2-len(xi)-1)
    F = df2 * T / ((n1 + n2-2)*len(xi))
    p_value = 1 - (f.cdf(F, df1, df2))
    
    return F, p_value, (p_value<alpha)
    
    

In [47]:
HotellingsT2(Group1, Group2, 0.05)

(0.4119650474303101, 0.6647157723624206, False)