In [2]:
from pyDOE2 import *
import numpy as np
import pandas as pd

import pandas
import researchpy as rp
import seaborn as sns

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp
import itertools

In [3]:
# example
# factors = ['a','b','c','d']
# data = ff2n(4)

def generate_factors(data, factors):
    y = []
    for i in range(len(data)):
        string = ""
        for j in factors:
            if data[j][i] == 1:
                string += j
        y.append(string) 
    return y

def generate_factor_effect(data, factor_combination):
    df = pd.DataFrame()
    for i in factor_combination:
        temp = [1]*len(data)
        for letter in i:
            temp = temp*data[letter]
        df[i] = list(temp)
    return df

In [4]:
# k = number of factors
# n = number of replicates
n = 2
k = 4
factor_names = ['a','b','c','d']
replicate1 = [90,74,81,83,77,81,88,73,98,72,87,85,99,79,87,80]
replicate2 = [93,78,85,80,78,80,82,70,95,76,83,86,90,75,84,80]
data = pd.DataFrame(ff2n(k), columns = factor_names)
data['r1'] = replicate1
data['r2'] = replicate2

In [5]:
data['sum'] = data['r1'] + data['r2']
data['avg'] = data['sum']/n

In [6]:
factor_combination = generate_factors(data, factor_names)

In [7]:
factor_combination

['',
 'a',
 'b',
 'ab',
 'c',
 'ac',
 'bc',
 'abc',
 'd',
 'ad',
 'bd',
 'abd',
 'cd',
 'acd',
 'bcd',
 'abcd']

In [68]:
data.index = factor_combination

In [69]:
data

Unnamed: 0,a,b,c,d,r1,r2,sum,avg
,-1.0,-1.0,-1.0,-1.0,90,93,183,91.5
a,1.0,-1.0,-1.0,-1.0,74,78,152,76.0
b,-1.0,1.0,-1.0,-1.0,81,85,166,83.0
ab,1.0,1.0,-1.0,-1.0,83,80,163,81.5
c,-1.0,-1.0,1.0,-1.0,77,78,155,77.5
ac,1.0,-1.0,1.0,-1.0,81,80,161,80.5
bc,-1.0,1.0,1.0,-1.0,88,82,170,85.0
abc,1.0,1.0,1.0,-1.0,73,70,143,71.5
d,-1.0,-1.0,-1.0,1.0,98,95,193,96.5
ad,1.0,-1.0,-1.0,1.0,72,76,148,74.0


In [88]:
factor_effect = generate_factor_effect(data, factor_combination)

In [90]:
factor_effect

Unnamed: 0,Unnamed: 1,a,b,ab,c,ac,bc,abc,d,ad,bd,abd,cd,acd,bcd,abcd
0,1,-1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0
1,1,1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,-1.0
2,1,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,1.0,-1.0
3,1,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0
4,1,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0
5,1,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,1.0,1.0
6,1,-1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0
7,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
8,1,-1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0
9,1,1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0


In [95]:
contrasts = []
for i in factor_combination:
    temp = 0
    for j in range(len(data)):
        temp += factor_effect[i][j]*data['sum'][j]
    contrasts.append(temp)
data['contrast'] = contrasts

In [109]:
data['effect'] = data['contrast']/((2**(k-1))*n)

In [110]:
data['SS'] = (data['contrast']**2)/((2**k)*n)

In [113]:
data['df'] = 1 # 2k factorial

In [114]:
data['MSE'] = data['SS']/data['df']

In [115]:
data

Unnamed: 0,a,b,c,d,r1,r2,sum,avg,contrast,effect,SS,df,MSE
,-1.0,-1.0,-1.0,-1.0,90,93,183,91.5,2649.0,165.5625,219287.53125,1,219287.53125
a,1.0,-1.0,-1.0,-1.0,74,78,152,76.0,-145.0,-9.0625,657.03125,1,657.03125
b,-1.0,1.0,-1.0,-1.0,81,85,166,83.0,-21.0,-1.3125,13.78125,1,13.78125
ab,1.0,1.0,-1.0,-1.0,83,80,163,81.5,65.0,4.0625,132.03125,1,132.03125
c,-1.0,-1.0,1.0,-1.0,77,78,155,77.5,-43.0,-2.6875,57.78125,1,57.78125
ac,1.0,-1.0,1.0,-1.0,81,80,161,80.5,11.0,0.6875,3.78125,1,3.78125
bc,-1.0,1.0,1.0,-1.0,88,82,170,85.0,-9.0,-0.5625,2.53125,1,2.53125
abc,1.0,1.0,1.0,-1.0,73,70,143,71.5,-83.0,-5.1875,215.28125,1,215.28125
d,-1.0,-1.0,-1.0,1.0,98,95,193,96.5,63.0,3.9375,124.03125,1,124.03125
ad,1.0,-1.0,-1.0,1.0,72,76,148,74.0,-35.0,-2.1875,38.28125,1,38.28125
