In [1]:
import numpy as np 
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols

# $2^k$ Factorial Design and 2-level Fractional Factorial Designs

A $2^3$ factorial design was used to develop a nitride etch process on a single-wafer plasma etching tool. The design
factors are the gap between the electrodes, the gas flow ($C_2F_6$ is used as the reactant gas), and the RF power
applied to the cathode. Each factor is run at two levels, and the design is replicated twice. The response variable is the etch rate for silicon nitride (Å/m). Do the followings:

- Calculate the factor effects, sum of squares, mean squares, F-statistics and p-values. 
- Calculate sum of squares, mean squares, F-statistics and p-values for main effects, two-way interactions and three-way interactions.
- Calculate the F-statiscs for the model and evaluate it using $R^2$.
- Decide which factors are significant? Define a regression model with significant factors.

Factors are the following:
- A: the gap between the electrodes
- B: the gas flow
- C: the RF power

In [2]:
data = {
    "Run": [1, 2, 3, 4, 5, 6, 7, 8],
    "A": [-1, 1, -1, 1, -1, 1, -1, 1],
    "B": [-1, -1, 1, 1, -1, -1, 1, 1],
    "C": [-1, -1, -1, -1, 1, 1, 1, 1],
    "Replicate 1": [550, 669, 633, 642, 1037, 749, 1075, 729],
    "Replicate 2": [604, 650, 601, 635, 1052, 868, 1063, 860],
    "Total": [1154, 1319, 1234, 1277, 2089, 1617, 2138, 1589],
    "Factor_level_A": [0.80, 1.20, 0.80, 1.20, 0.80, 1.20, 0.80, 1.20],
    "Factor_level_B": [125, 125, 200, 200, 125, 125, 200, 200],
    "Factor_level_C": [275, 275, 275, 275, 325, 325, 325, 325]
}

data = pd.DataFrame(data)
data


Unnamed: 0,Run,A,B,C,Replicate 1,Replicate 2,Total,Factor_level_A,Factor_level_B,Factor_level_C
0,1,-1,-1,-1,550,604,1154,0.8,125,275
1,2,1,-1,-1,669,650,1319,1.2,125,275
2,3,-1,1,-1,633,601,1234,0.8,200,275
3,4,1,1,-1,642,635,1277,1.2,200,275
4,5,-1,-1,1,1037,1052,2089,0.8,125,325
5,6,1,-1,1,749,868,1617,1.2,125,325
6,7,-1,1,1,1075,1063,2138,0.8,200,325
7,8,1,1,1,729,860,1589,1.2,200,325


In [None]:
# number of replicates
n = 2
# number of observations
N = (2**3) * n
N

16

### Calculate Factor Effects

In [3]:
# Treatment Combinations
treatment_comb_1 = data.loc[0, "Total"]
a = data.loc[1, "Total"]
b = data.loc[2, "Total"]
ab = data.loc[3, "Total"]
c = data.loc[4, "Total"]
ac = data.loc[5, "Total"]
bc = data.loc[6, "Total"]
abc = data.loc[7, "Total"]

In [None]:
A = ...
A

In [None]:
B = ...
B

In [None]:
C = ...
C

In [None]:
AB = ...
AB

In [None]:
AC = ...
AC

In [None]:
BC = ...
BC

In [None]:
ABC = ...
ABC

### Calculate Sum of Squares Using Factor Effects

$$ SS_i = \frac{Contrast_i}{\text{Number of Observations}} $$

In [None]:
SS_A = ...
SS_A

In [None]:
SS_B = ...
SS_B

In [None]:
SS_C = ...
SS_C

In [None]:
SS_AB = ...
SS_AB

In [None]:
SS_AC = ...
SS_AC

In [None]:
SS_BC = ...
SS_BC

In [None]:
SS_ABC = ...
SS_ABC

In [None]:
# sum of squares of the model
SS_M = ...
SS_M

In [None]:
overall_sum_squares_observations = ...
overall_sum_squares_total = ...
SST = overall_sum_squares_observations - overall_sum_squares_total
SST

In [None]:
SSE = SST - SS_M
SSE

### Calculate Mean of Squares

In [None]:
# degrees of freedom, we have 2 replicates
df_A = 1
df_B = 1
df_C = 1
df_AB = 1
df_AC = 1
df_BC = 1
df_ABC = 1

df_T = N - 1
df_E = df_T - (df_A + df_B + df_C + df_AB + df_AC + df_BC + df_ABC)

In [None]:
MS_A = ...
MS_B = ...
MS_C = ...
MS_AB = ...
MS_AC = ...
MS_BC = ...
MS_ABC = ...
MSE = ...
print(MS_A)
print(MS_B)
print(MS_C)
print(MS_AB)
print(MS_AC)
print(MS_BC)
print(MS_ABC)
print(MSE)

### Calculate F-statistics

In [None]:
F_A = ...
F_B = ...
F_C = ...
F_AB = ...
F_AC = ...
F_BC = ...
F_ABC = ...
print(F_A)
print(F_B)
print(F_C)
print(F_AB)
print(F_AC)
print(F_BC)
print(F_ABC)

### Calculate $p$-values

In [None]:

p_A = ...
p_B = ...
p_C = ...
p_AB = ...
p_AC = ...
p_BC = ...
p_ABC = ...
print(p_A)
print(p_B)
print(p_C)
print(p_AB)
print(p_AC)
print(p_BC)
print(p_ABC)

### Main Effect Results

In [None]:
SS_Main_Effects = ...
df_Main_Effects = ...
MS_Main_Effects = ...
F_Main_Effects =...
p_Main_Effects = ...
print("SS Main Effects: ", SS_Main_Effects)
print("MS Main Effects: ", MS_Main_Effects)
print("F- Main Effects: ", F_Main_Effects)
print("p-value Main Effects: ", p_Main_Effects)


### Two-Way Interaction Results

In [None]:
SS_Two_Way_Interactions = ...
df_Two_Way_Interactions = ...
MS_Two_Way_Interactions = ...
F_Two_Way_Interactions = ...
p_Two_Way_Interactions = ...
print("SS Two-Way Interaction Effects: ", SS_Two_Way_Interactions)
print("MS Two-Way Interaction Effects: ", MS_Two_Way_Interactions)
print("F- Two-Way Interaction Effects: ", F_Two_Way_Interactions)
print("p-value Two-Way Interaction Effects: ", p_Two_Way_Interactions)

### Three-Way Interaction Results

In [None]:
SS_Three_Way_Interactions = ...
df_Three_Way_Interactions = ...
MS_Three_Way_Interactions = ...
F_Three_Way_Interactions = ...
p_Three_Way_Interactions = ...
print("SS Two-Way Interaction Effects: ", SS_Three_Way_Interactions)
print("MS Two-Way Interaction Effects: ", MS_Three_Way_Interactions)
print("F- Two-Way Interaction Effects: ", F_Three_Way_Interactions)
print("p-value Two-Way Interaction Effects: ", p_Three_Way_Interactions)

### Model Results

In [None]:
#sum of squares for the model --> SS_M 
# Let calculate the test statisctics for the model
df_model = ... # degrees of freedom of the model
MS_M = ...
F_M = ...
F_M

In [None]:
#Evaluate the model using R^2 and R^2_adj (adjusted R^2)
R_square = ...
R_square_adj = ...
print("R^2 =", R_square)
print("R^2_adj =", R_square_adj)


### Write the Regression Model

...
