In [1]:
import numpy as np 
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols

# $2^k$ Factorial Design and 2-level Fractional Factorial Designs

A $2^3$ factorial design was used to develop a nitride etch process on a single-wafer plasma etching tool. The design
factors are the gap between the electrodes, the gas flow ($C_2F_6$ is used as the reactant gas), and the RF power
applied to the cathode. Each factor is run at two levels, and the design is replicated twice. The response variable is the etch rate for silicon nitride (Å/m). Do the followings:

- Calculate the factor effects, sum of squares, mean squares, F-statistics and p-values. 
- Calculate sum of squares, mean squares, F-statistics and p-values for main effects, two-way interactions and three-way interactions.
- Calculate the F-statiscs for the model and evaluate it using $R^2$.
- Decide which factors are significant? Define a regression model with significant factors.

Factors are the following:
- A: the gap between the electrodes
- B: the gas flow
- C: the RF power

In [2]:
data = {
    "Run": [1, 2, 3, 4, 5, 6, 7, 8],
    "A": [-1, 1, -1, 1, -1, 1, -1, 1],
    "B": [-1, -1, 1, 1, -1, -1, 1, 1],
    "C": [-1, -1, -1, -1, 1, 1, 1, 1],
    "Replicate 1": [550, 669, 633, 642, 1037, 749, 1075, 729],
    "Replicate 2": [604, 650, 601, 635, 1052, 868, 1063, 860],
    "Total": [1154, 1319, 1234, 1277, 2089, 1617, 2138, 1589],
    "Factor_level_A": [0.80, 1.20, 0.80, 1.20, 0.80, 1.20, 0.80, 1.20],
    "Factor_level_B": [125, 125, 200, 200, 125, 125, 200, 200],
    "Factor_level_C": [275, 275, 275, 275, 325, 325, 325, 325]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Run,A,B,C,Replicate 1,Replicate 2,Total,Factor_level_A,Factor_level_B,Factor_level_C
0,1,-1,-1,-1,550,604,1154,0.8,125,275
1,2,1,-1,-1,669,650,1319,1.2,125,275
2,3,-1,1,-1,633,601,1234,0.8,200,275
3,4,1,1,-1,642,635,1277,1.2,200,275
4,5,-1,-1,1,1037,1052,2089,0.8,125,325
5,6,1,-1,1,749,868,1617,1.2,125,325
6,7,-1,1,1,1075,1063,2138,0.8,200,325
7,8,1,1,1,729,860,1589,1.2,200,325


In [3]:
# number of replicates
n = 2
# number of factors
k = 3
# number of observations
N = (2**3) * n
N

16

### Calculate Factor Effects

In [4]:
treatment_comb_1 = df.loc[0, "Total"]
a = df.loc[1, "Total"]
b = df.loc[2, "Total"]
ab = df.loc[3, "Total"]
c = df.loc[4, "Total"]
ac = df.loc[5, "Total"]
bc = df.loc[6, "Total"]
abc = df.loc[7, "Total"]

In [5]:
A = (1 / (4 * n)) * (a +  ab + ac + abc - treatment_comb_1 - b - c - bc)
A

-101.625

In [6]:
B = (1 / (4 * n)) * (b + ab + bc + abc - treatment_comb_1 - a - c - ac)
B

7.375

In [7]:
C = (1 / (4 * n)) * (c + ac + bc + abc - treatment_comb_1 - a - b - ab)
C

306.125

In [8]:
AB = (1 / (4 * n)) * (ab + abc + treatment_comb_1 - a - b + c - ac - bc)
AB

-24.875

In [9]:
AC = (1 / (4 * n)) * (treatment_comb_1 - a + b - ab + abc  - c + ac - bc)
AC

-153.625

In [10]:
BC = (1 / (4 * n)) * (treatment_comb_1 + a - b - ab - c - ac + bc + abc)
BC

-2.125

In [11]:
ABC = (1 / (4 * n)) * (abc - bc - ac + c - ab + b + a  - treatment_comb_1)
ABC

5.625

The largest effects are for power (C = 306.125), gap (A = -101.625), and the power–gap interaction (AC = -153.625).

### Calculate Sum of Squares

In [12]:
# we have 2^3 design
SS_A = (((2**3) * A) ** 2) / N
SS_A

41310.5625

In [13]:
SS_B = (((2**3) * B) ** 2) / N
SS_B

217.5625

In [14]:
SS_C = (((2**3) * C) ** 2) / N
SS_C

374850.0625

In [15]:
SS_AB = (((2**3) * AB) ** 2) / N
SS_AB

2475.0625

In [16]:
SS_AC = (((2**3) * AC) ** 2) / N
SS_AC

94402.5625

In [17]:
SS_BC = (((2**3) * BC) ** 2) / N
SS_BC

18.0625

In [18]:
SS_ABC = (((2**3) * ABC) ** 2) / N
SS_ABC

126.5625

In [19]:
SS_M = SS_A + SS_B + SS_C + SS_AB + SS_AC + SS_BC + SS_ABC
SS_M

513400.4375

In [20]:
overall_sum_squares_observations = np.sum(df["Replicate 1"]**2) + np.sum(df["Replicate 2"]**2) 
overall_sum_squares_observations

10167789

In [21]:
overall_sum_squares_total = np.sum(df["Total"])**2 / N
overall_sum_squares_total

9636368.0625

In [22]:
SST = overall_sum_squares_observations - overall_sum_squares_total
SST

531420.9375

In [23]:
SSE = SST - SS_M
SSE

18020.5

In [24]:
# degrees of freedom
df_A = 1
df_B = 1
df_C = 1
df_AB = 1
df_AC = 1
df_BC = 1
df_ABC = 1

df_T = N - 1
df_E = df_T - (df_A + df_B + df_C + df_AB + df_AC + df_BC + df_ABC)
df_E

8

In [25]:
MS_A = SS_A / df_A
MS_B = SS_B / df_B
MS_C = SS_C / df_C
MS_AB = SS_AB / df_AB
MS_AC = SS_AC / df_AC
MS_BC = SS_BC / df_BC
MS_ABC = SS_ABC / df_ABC
MSE = SSE / df_E
print(MS_A)
print(MS_B)
print(MS_C)
print(MS_AB)
print(MS_AC)
print(MS_BC)
print(MS_ABC)
print(MSE)

41310.5625
217.5625
374850.0625
2475.0625
94402.5625
18.0625
126.5625
2252.5625


In [26]:
F_A = MS_A / MSE
F_B = MS_B / MSE
F_C = MS_C / MSE
F_AB = MS_AB / MSE
F_AC = MS_AC / MSE
F_BC = MS_BC / MSE
F_ABC = MS_ABC / MSE
print(F_A)
print(F_B)
print(F_C)
print(F_AB)
print(F_AC)
print(F_BC)
print(F_ABC)

18.339363502677507
0.09658444549263338
166.41050470297716
1.0987763935517882
41.908964790100164
0.008018645431591798
0.05618601037707056


In [27]:

p_A = 1 - st.f.cdf(F_A, df_A, df_E)
p_B = 1 - st.f.cdf(F_B, df_B, df_E)
p_C = 1 - st.f.cdf(F_C, df_C, df_E)
p_AB = 1 - st.f.cdf(F_AB, df_AB, df_E)
p_AC = 1 - st.f.cdf(F_AC, df_AC, df_E)
p_BC = 1 - st.f.cdf(F_BC, df_BC, df_E)
p_ABC = 1 - st.f.cdf(F_ABC, df_ABC, df_E)
print(p_A)
print(p_B)
print(p_C)
print(p_AB)
print(p_AC)
print(p_BC)
print(p_ABC)

0.0026786104709705105
0.7639107082242295
1.2332619993005167e-06
0.3251678561221075
0.00019339576675891923
0.9308485642080176
0.8185860578539552


In [28]:
# Main Effects 
SS_Main_Effects = SS_A + SS_B + SS_C
df_Main_Effects = 3
MS_Main_Effects = SS_Main_Effects / df_Main_Effects
F_Main_Effects = MS_Main_Effects / MSE
p_Main_Effects = 1 - st.f.cdf(F_Main_Effects, df_Main_Effects, df_E)
print("SS Main Effects: ", SS_Main_Effects)
print("MS Main Effects: ", MS_Main_Effects)
print("F- Main Effects: ", F_Main_Effects)
print("p-value Main Effects: ", p_Main_Effects)


SS Main Effects:  416378.1875
MS Main Effects:  138792.72916666666
F- Main Effects:  61.615484217049094
p-value Main Effects:  7.166093880295321e-06


In [29]:
# Two-Way Interaction Effects 
SS_Two_Way_Interactions = SS_AB + SS_AC + SS_BC
df_Two_Way_Interactions = 3
MS_Two_Way_Interactions = SS_Two_Way_Interactions / df_Two_Way_Interactions
F_Two_Way_Interactions = MS_Two_Way_Interactions / MSE
p_Two_Way_Interactions = 1 - st.f.cdf(F_Two_Way_Interactions, df_Two_Way_Interactions, df_E)
print("SS Two-Way Interaction Effects: ", SS_Two_Way_Interactions)
print("MS Two-Way Interaction Effects: ", MS_Two_Way_Interactions)
print("F- Two-Way Interaction Effects: ", F_Two_Way_Interactions)
print("p-value Two-Way Interaction Effects: ", p_Two_Way_Interactions)

SS Two-Way Interaction Effects:  96895.6875
MS Two-Way Interaction Effects:  32298.5625
F- Two-Way Interaction Effects:  14.338586609694515
p-value Two-Way Interaction Effects:  0.0013915191459764609


In [30]:
# Three-Way Interaction Effects 
SS_Three_Way_Interactions = SS_ABC
df_Three_Way_Interactions = 1
MS_Three_Way_Interactions = SS_Three_Way_Interactions / df_Three_Way_Interactions
F_Three_Way_Interactions = MS_Three_Way_Interactions / MSE
p_Three_Way_Interactions = 1 - st.f.cdf(F_Three_Way_Interactions, df_Three_Way_Interactions, df_E)
print("SS Two-Way Interaction Effects: ", SS_Three_Way_Interactions)
print("MS Two-Way Interaction Effects: ", MS_Three_Way_Interactions)
print("F- Two-Way Interaction Effects: ", F_Three_Way_Interactions)
print("p-value Two-Way Interaction Effects: ", p_Three_Way_Interactions)

SS Two-Way Interaction Effects:  126.5625
MS Two-Way Interaction Effects:  126.5625
F- Two-Way Interaction Effects:  0.05618601037707056
p-value Two-Way Interaction Effects:  0.8185860578539552


In [31]:
#sum of squares for the model --> SS_M 
# Let calculate the test statisctics for the model
df_model = 7 # degrees of freedom of the model
MS_M = SS_M / df_model
F_M = MS_M / MSE
F_M

32.55977121294399

In [32]:
#Evaluate the model using R^2 and R^2_adj (adjusted R^2)
R_square = SS_M / SST
R_square_adj = 1 - ((SSE / df_E) / (SST / df_T))
print("R^2 =", R_square)
print("R^2_adj =", R_square_adj)


R^2 = 0.9660899698743992
R^2_adj = 0.9364186935144986


### Regression Model

We already have $p$-values for each factor effect and interaction effects. The factor effects which have $p<0.05$ is significant for the model. These are A, C and AC.

Therefore, the model is
$$ y = \beta_0 + \beta_1 x_1 + \beta_3 x_3 + \beta_{13} x_1 x_3 $$

When put the parameters into the model above, we get

$$ y = \beta_0 + (\frac{-101.625}{2}) x_1 + (\frac{306.125}{2}) x_3 + (\frac{-153.625}{2}) x_1 x_3 $$