# Python Implementation of Hotellings T2

$$ T^2 = n_1n_2(\bar{x}_1-\bar{x}_2)^T*C^{-1}*\frac{(\bar{x}_1-\bar{x}_2)}{n_1+n_2}$$

Where:

$$ C = \frac{(n_1-1)C_1 + (n_2-1)C_2}{(n_1+n_2-2)}$$

In [2]:
##load packages
import pandas as pd
import numpy as np
from scipy.stats import f

In [4]:
def HotellingsT2(Group1,Group2, alpha):
    n1 = len(Group1)
    n2 = len(Group2)
    Group1averages = []
    Group2averages = []
    xi = list(Group1.columns)
    yi = list(Group2.columns)
    
    #column averages Group1
    for i in xi:
        totals = sum(Group1[i])
        ave = totals/n1
        Group1averages.append(ave)
    
    for i in yi:
        totals = sum(Group2[i])
        ave = totals/n2
        Group2averages.append(ave)
        
    #covarince matrix
    cov1 = np.cov(Group1, rowvar = False)
    cov2 = np.cov(Group2, rowvar = False)
    C1 = (n1-1)*cov1
    C2 = (n2-1)*cov2
    pooledC = (C1 + C2) / (n1+n2-2)
        
    ##transpose
    xbar1 = np.array(Group1averages)
    xbar2 = np.array(Group2averages)
    Transpose = np.transpose((xbar1-xbar2))
    ##sample mean
    
    diff = (xbar1-xbar2)/(n1+n2)
    ##inverse
    inverse = np.linalg.inv(pooledC)
    a = (n1*n2)*Transpose
    b = np.matmul(a,inverse)
    T = np.matmul(b, diff)
    
    ##degrees of freedom, and F test
    df1 = len(xi)
    df2 = (n1+n2-len(xi)-1)
    F = df2 * T / ((n1 + n2-2)*len(xi))
    p_value = 1 - (f.cdf(F, df1, df2))
    
    return df1,df2,F, p_value, (p_value<alpha)
    