In [None]:
# Due to confidentiality reasons, the data presented here has been altered and does not represent actual laboratory data obtained from experiments. 
# As a result, the results displayed in this project may exhibit statistical anomalies. 
# Nevertheless, the codes and procedures employed remain valid.

In [1]:
import pandas as pd
import statsmodels.api as sm
from itertools import combinations
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

# Import Data

In [10]:
data_oil = pd.read_excel("Oil and catalyst Mixture designs.xlsx", sheet_name="Oil design")
data_cat = pd.read_excel("Oil and catalyst Mixture designs.xlsx", sheet_name="Catalyst design")

# Oil Mixture

# OLS Regression Density (kg/m3)

In [17]:
input_oil = data_oil[data_oil.columns[1:4]]
response_oil = data_oil[data_oil.columns[4:7]]


# Generate polynomial interactions up to the 3rd order
poly = PolynomialFeatures(degree=3, include_bias=False)
input_poly_oil = poly.fit_transform(input_oil)

# Create column names for the polynomial terms
poly_cols = poly.get_feature_names(input_oil.columns)

# Convert the polynomial array to a DataFrame
input_oil = pd.DataFrame(input_poly_oil, columns=poly_cols)

# Add a constant column for the intercept term in the input DataFrame
input_oil = sm.add_constant(input_oil)

# Fit the regression model
model = sm.OLS(response_oil['Density (kg/m3)'], input_oil)
results = model.fit()


# Access the overall model p-value
model_pvalue = results.f_pvalue

# Print the model p-value
print("Overall model p-value:", model_pvalue)

# Print the statistical regression analysis summary
print(results.summary())


Overall model p-value: 0.05298282339725555
                            OLS Regression Results                            
Dep. Variable:        Density (kg/m3)   R-squared:                       0.857
Model:                            OLS   Adj. R-squared:                  0.642
Method:                 Least Squares   F-statistic:                     3.994
Date:                Thu, 09 Nov 2023   Prob (F-statistic):             0.0530
Time:                        21:21:32   Log-Likelihood:                -47.858
No. Observations:                  16   AIC:                             115.7
Df Residuals:                       6   BIC:                             123.4
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const    



# OLS Regression Viscosity (mPa.s)

In [18]:
input_oil = data_oil[data_oil.columns[1:4]]
response_oil = data_oil[data_oil.columns[4:7]]


# Generate polynomial interactions up to the 3rd order
poly = PolynomialFeatures(degree=3, include_bias=False)
input_poly_oil = poly.fit_transform(input_oil)

# Create column names for the polynomial terms
poly_cols = poly.get_feature_names(input_oil.columns)

# Convert the polynomial array to a DataFrame
input_oil = pd.DataFrame(input_poly_oil, columns=poly_cols)

# Add a constant column for the intercept term in the input DataFrame
input_oil = sm.add_constant(input_oil)

# Fit the regression model
model = sm.OLS(response_oil['Viscosity (mPa.s)'], input_oil)
results = model.fit()


# Access the overall model p-value
model_pvalue = results.f_pvalue

# Print the model p-value
print("Overall model p-value:", model_pvalue)

# Print the statistical regression analysis summary
print(results.summary())


Overall model p-value: 0.2444253818846114
                            OLS Regression Results                            
Dep. Variable:      Viscosity (mPa.s)   R-squared:                       0.730
Model:                            OLS   Adj. R-squared:                  0.324
Method:                 Least Squares   F-statistic:                     1.799
Date:                Thu, 09 Nov 2023   Prob (F-statistic):              0.244
Time:                        21:21:47   Log-Likelihood:                -32.761
No. Observations:                  16   AIC:                             85.52
Df Residuals:                       6   BIC:                             93.25
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const     



# OLS Regression FFA

In [19]:
input_oil = data_oil[data_oil.columns[1:4]]
response_oil = data_oil[data_oil.columns[4:7]]

# Generate polynomial interactions up to the 3rd order
poly = PolynomialFeatures(degree=3, include_bias=False)
input_poly_oil = poly.fit_transform(input_oil)

# Create column names for the polynomial terms
poly_cols = poly.get_feature_names(input_oil.columns)

# Convert the polynomial array to a DataFrame
input_oil = pd.DataFrame(input_poly_oil, columns=poly_cols)

# Add a constant column for the intercept term in the input DataFrame
input_oil = sm.add_constant(input_oil)

# Fit the regression model
model = sm.OLS(response_oil['FFA'], input_oil)
results = model.fit()


# Access the overall model p-value
model_pvalue = results.f_pvalue

# Print the model p-value
print("Overall model p-value:", model_pvalue)

# Print the statistical regression analysis summary
print(results.summary())


Overall model p-value: 0.2215532811346332
                            OLS Regression Results                            
Dep. Variable:                    FFA   R-squared:                       0.742
Model:                            OLS   Adj. R-squared:                  0.354
Method:                 Least Squares   F-statistic:                     1.913
Date:                Thu, 09 Nov 2023   Prob (F-statistic):              0.222
Time:                        21:21:59   Log-Likelihood:                -20.670
No. Observations:                  16   AIC:                             61.34
Df Residuals:                       6   BIC:                             69.07
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const     



# Catalyst Mixture

# OLS Regression Surface area (m2/g)

In [20]:
input_cat = data_cat[data_cat.columns[1:4]]
response_cat = data_cat[data_cat.columns[4:6]]

# Square each column
sqTable = pd.DataFrame({'A_sq': input_cat['A']**2,
                         'B_sq': input_cat['B']**2,
                         'C_sq': input_cat['C']**2})


# Merge the firstTable with sqTable
meTable = pd.concat([input_cat, sqTable], axis=1)


# Perform the operations on firstTable A-B, A-C, B-C
subTable = pd.DataFrame({'A-B': input_cat['A'] - input_cat['B'],
                         'A-C': input_cat['A'] - input_cat['C'],
                         'B-C': input_cat['B'] - input_cat['C']})

# Merge subTable with meTable
input_cat = pd.concat([meTable, subTable], axis=1)

gg = input_cat

# Fit the regression model
model = sm.OLS(response_cat['Surface area (m2/g)'], input_cat)
results = model.fit()


# Access the overall model p-value
model_pvalue = results.f_pvalue

# Print the model p-value
print("Overall model p-value:", model_pvalue)

# Print the statistical regression analysis summary
print(results.summary())

Overall model p-value: 0.0016802235005440903
                             OLS Regression Results                            
Dep. Variable:     Surface area (m2/g)   R-squared:                       0.821
Model:                             OLS   Adj. R-squared:                  0.732
Method:                  Least Squares   F-statistic:                     9.190
Date:                 Thu, 09 Nov 2023   Prob (F-statistic):            0.00168
Time:                         21:22:58   Log-Likelihood:                -90.167
No. Observations:                   16   AIC:                             192.3
Df Residuals:                       10   BIC:                             197.0
Df Model:                            5                                         
Covariance Type:             nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------



# OLS Regression Pore volume (cc/g)

In [21]:
input_cat = data_cat[data_cat.columns[1:4]]
response_cat = data_cat[data_cat.columns[4:6]]

# Square each column
sqTable = pd.DataFrame({'A_sq': input_cat['A']**2,
                         'B_sq': input_cat['B']**2,
                         'C_sq': input_cat['C']**2})


# Merge the firstTable with sqTable
meTable = pd.concat([input_cat, sqTable], axis=1)


# Perform the operations on firstTable A-B, A-C, B-C
subTable = pd.DataFrame({'A-B': input_cat['A'] - input_cat['B'],
                         'A-C': input_cat['A'] - input_cat['C'],
                         'B-C': input_cat['B'] - input_cat['C']})

# Merge subTable with meTable
input_cat = pd.concat([meTable, subTable], axis=1)



# Fit the regression model
model = sm.OLS(response_cat['Pore volume (cc/g)'], input_cat)
results = model.fit()


# Access the overall model p-value
model_pvalue = results.f_pvalue

# Print the model p-value
print("Overall model p-value:", model_pvalue)

# Print the statistical regression analysis summary
print(results.summary())

Overall model p-value: 0.07322868952088613
                            OLS Regression Results                            
Dep. Variable:     Pore volume (cc/g)   R-squared:                       0.589
Model:                            OLS   Adj. R-squared:                  0.384
Method:                 Least Squares   F-statistic:                     2.871
Date:                Thu, 09 Nov 2023   Prob (F-statistic):             0.0732
Time:                        21:23:10   Log-Likelihood:                 26.001
No. Observations:                  16   AIC:                            -40.00
Df Residuals:                      10   BIC:                            -35.37
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
A        

