In [1]:
import pandas as pd

In [3]:
def dframe_to_generic_2way_anova_table(dframe, factor1, factor2, value):
    name1, name2 = factor1, factor2
    vals1 = dframe[name1].unique()
    vals2 = dframe[name2].unique()
    nrows = len(vals1)
    ncols = len(vals2)
    table = []
    for n1 in vals1:
        table.append([])
        for n2 in vals2:
            condition = (dframe[name1] == n1) & (dframe[name2] == n2)
            table[-1].append(list(dframe[condition][value]))
    return table, nrows, ncols


def calculate_2way_anova(generic_2way_anova_table):
    
    table = generic_2way_anova_table

    def get_means_table(table):
        n = len(table)
        k = len(table[0])
        result = [[0 for i in range(k+1)] for i in range(n+1)]
        sum_total = 0
        len_total = 0
        for i in range(n):
            sum_row = 0
            len_row = 0
            for j in range(k):
                array = table[i][j]
                sum_row += sum(array)
                len_row += len(array)
                result[i][j] = sum(array)/len(array)
            result[i][k] = sum_row/len_row
            sum_total += sum_row
            len_total += len_row
        for j in range(k):
            sum_row = 0
            len_row = 0
            for i in range(n):
                array = table[i][j]
                sum_row += sum(array)
                len_row += len(array)
            result[n][j] = sum_row/len_row
        result[n][k] = sum_total/len_total
        return result


    means_table = get_means_table(table)
    a = len(table)
    b = len(table[0])
    N = sum(len(table[i][j]) for i in range(a) for j in range(b))


    ssa, ssb, ssab, sse, sst  = 0, 0, 0, 0, 0
    for i in range(a):
        for j in range(b):
            ssa  += len(table[i][j])*(means_table[i][b] - means_table[a][b])**2
            ssb  += len(table[i][j])*(means_table[a][j] - means_table[a][b])**2
            ssab += len(table[i][j])*(means_table[i][j] - means_table[i][b] - means_table[a][j] + means_table[a][b])**2
            for y in table[i][j]:
                sse += (y - means_table[i][j])**2
                sst += (y - means_table[a][b])**2
     
    mean_sse = sse/(N - a*b);
    result = {
        'factor1': {
            'Df': a-1,
            'Sum sq': ssa,
            'Mean sq': ssa/(a-1),
            'F value': ssa/(a-1)/mean_sse
        },
        'factor2': {
            'Df': b-1,
            'Sum sq': ssb,
            'Mean sq': ssb/(b-1),
            'F value': ssb/(b-1)/mean_sse
        },
        'interaction': {
            'Df': (a-1)*(b-1),
            'Sum sq': ssab,
            'Mean sq': ssab/((a-1)*(b-1)),
            'F value': ssab/((a-1)*(b-1))/mean_sse
        },
        'error': {
            'Df': N - a*b,
            'Sum sq': sst,
            'Mean sq': mean_sse,
            'F value': ''
        },
        'total': {
            'Df': N - 1,
            'Sum sq': sse,
            'Mean sq': sst/(N - 1),
            'F value': ''
        }
    }

    return result


def two_way_anova(dframe, factor1, factor2, value):
    
    table, a, b = dframe_to_generic_2way_anova_table(dframe, factor1, factor2, value)
    res = calculate_2way_anova(table)
    
    results = pd.DataFrame()
    
    for r in res.keys():
        for k,v in res[r].items(): 
            r = factor1 if r == 'factor1' else r
            r = factor2 if r == 'factor2' else r
            results.loc[r, k] = v
    
    return results

In [4]:
dframe = pd.read_csv('atherosclerosis.csv')
two_way_anova(dframe,'age','dose','expr')

Unnamed: 0,Df,Sum sq,Mean sq,F value
age,1.0,197.452754,197.452754,7.44984
dose,1.0,16.912241,16.912241,0.638094
interaction,1.0,0.927077,0.927077,0.0349784
error,60.0,1805.549496,26.50429,
total,63.0,1590.257424,28.659516,


In [5]:
dframe = pd.read_csv('birds.csv')
two_way_anova(dframe,'hormone','sex','var4')

Unnamed: 0,Df,Sum sq,Mean sq,F value
hormone,1.0,0.847472,0.847472,0.0865281
sex,1.0,0.119762,0.119762,0.0122279
interaction,1.0,89.483384,89.483384,9.13639
error,60.0,678.101013,9.794173,
total,63.0,587.650394,10.763508,
