In [None]:
import mqr
from mqr.plot import Figure

In [None]:
# Data and calculation libraries
import numpy as np
import pandas as pd
import scipy.stats as st
import statsmodels.formula.api as smf
import statsmodels.api as sm

---
# DoE

The `mqr` library provides a wrapper around pyDOE3 that makes the designs easy to combine and then use in a DataFrame for saving, loading with observations/experimental results.

The features are:
* creation from pyDOE3 functions fullfact, fracfact and ccdesign,
* labelling with point types, for easy management in analysis,
* transforming from labels to physical values, for easier experimental technique,
* easy concatenation and blocking.

The main type is `mqr.doe.Design`.  
To concatenate designs use a plus symbol: `design1 + design2`.  
To block designs, either pass the block number when creating the design (`Design.from_fullfact(..., block=1)`),
or change the block for a design using `design.as_block(...)`.  
To randomise a design, call `design.randomise_runs()`, which preserves blocks by default.  
To scale labels to physical values, define a `mqr.doe.Transform` and apply it to a design like matrix multiplication `Design @ Transform`.

---
## Experimental workflow
1. Design the experiment using the tools in mqr.doe (or pyDOE3 directly).
1. Randomise the runs.
1. Save the design to a _design file_, and append the experimental observations as a new column to make an _experiment file_.
1. (optional) Instead of creating a new file, get the dataframe version of a design and enter data directly into the notebook as an extra column on the dataframe: `design['Observation'] = np.array([...])`
1. Load the _experiment file_ (if you created one) ready for analysis with ANOVA and regression tools.

---

In [None]:
from mqr.doe import Design

### 1 Fractional Factorial Design

In [None]:
names = var_list = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6']
gen = 'a b c d abcd abc'

Design.from_fracfact(names, gen)

### 2 Fractional Factorial with Centre Points

In [None]:
names = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6']
gen = 'a b c d abcd abc'
nc = 3

Design.from_fracfact(names, gen) + Design.from_centrepoints(names, nc)

### 3 Central Composite Design — Full Factorial
With blocking

In [None]:
names = ['x1', 'x2', 'x3', 'x4']
levels = [2, 2, 2, 2]
nc = 3

blk1 = Design.from_fullfact(names, levels) + Design.from_centrepoints(names, nc)
blk2 = Design.from_axial(names) + Design.from_centrepoints(names, nc)
design = blk1 + blk2.as_block(2)
design

### 4 Central Composite Design — Fractional Factorial
With blocking

In [None]:
names = ['x1', 'x2', 'x3', 'x4']
gen = 'a b c abc'
nc = 3

blk1 = Design.from_fracfact(names, gen) + Design.from_centrepoints(names, nc)
blk2 = Design.from_axial(names) + Design.from_centrepoints(names, nc)
design = blk1 + blk2.as_block(2)
design

---
# Practicalities

### 5 Replicating the runs

In [None]:
blk1.replicate(2)

### 6 Randomising the runs
Rearrange the rows of a dataframe by calling `df.sample(frac=1)`. The `frac` argument is what proportion of the rows to return.

In [None]:
np.random.seed(1234) # Warning: seeding the random number generator will produce the same ordering every run
design = blk1 + blk2.as_block(2) + blk1.as_block(3) + blk2.as_block(4)
design.randomise_runs()

### 7 Transforming the level labels to physical values
While linear transformations don't effect the regression (the result will just be in different units),
writing down exactly which values correspond to each level might be convenient for careful, disciplined experimental technique.

The difference between linear ($y = Ax$) and affine ($y = Ax + b$) makes a difference here.
If after translating the experiment (by $b$), the response surface doesn't go through the origin,
a constant/intercept term should be included in the regression.
In the examples below, the regression is performed in label units/label-space.

First, define a transform that maps the levels that correspond to each label
(when `mqr.doe` constructs a transform from labels like below, it assumes the transfrom is affine).
Then, apply the transform to the design using the matrix multiplication operator.

In [None]:
from mqr.doe import Transform

# Constructs an affine transformation in the same order as design.names
tr = Transform.from_map([
    {-1:100, 1:110},
    {0:5, 1:25},
    {-1.5:1.02, 3.2:1.48},
    {-2:43.7, 0:48.2}])
display(tr)
design @ tr

### 8 Transforming the values to categories

In [None]:
cat_design = Design.from_fracfact(
    names=['x1', 'x2', 'x3', 'x4'],
    gen='a b c abc',)
tr = Transform.from_categories([
    {-1: '-one', 1: '+one'},
    {-1: '-two', 1: '+two'},
    {-1: '-thr', 1: '+thr'},
    {-1: '-fou', 1: '+fou'},
])
display(tr)
cat_design @ tr

### 9 Save the experimental design to file for printing etc.
The `index_label` argument in `DataFrame.to_csv(...)` tells Pandas to include the index column with the given name.

In [None]:
np.random.seed(1294194915) # Randomly generated seed (above)
frozen_design = design.randomise_runs().to_df()
frozen_design.to_csv(
    'doe-section6-1294194915.csv',
    index_label='run')

---
# DoE and Regression - centre point design
This is the centre point example from above.

Having saved the experimental design,
performed the experiment,
and added a column to the csv with the observations,
the data is now ready to analyse.

In [None]:
names = ['Ht', 'Theta0', 'Ra', 'Rc']
gen = 'a b c abc'
nc = 3

design = Design.from_fracfact(names, gen) + Design.from_centrepoints(names, nc)

In [None]:
obs = pd.read_csv(
    mqr.sample_data('doe-centrepoint.csv'),
    index_col=0)
obs.sort_index(inplace=True)

In [None]:
centres = obs.query('PtType==0')
corners = obs.query('PtType==1')

In [None]:
delta = np.mean(corners['Toss']) - np.mean(centres['Toss'])

Ncorner = corners.shape[0]
Ncentre = centres.shape[0]

SScentre = Ncorner * Ncentre * np.power(delta, 2) / (Ncorner + Ncentre)
SSerror = np.var(centres['Toss']) * (Ncentre)
MSerror = SSerror / (Ncentre - 1)

Fcentre = SScentre / MSerror
Pcentre = 1.0 - st.f.cdf(Fcentre, 1, Ncentre - 1)

display(f'{SScentre}, {SSerror}, {MSerror}')
display(f'Curvature F-stat={Fcentre} and p={Pcentre}')

In [None]:
result = smf.ols(formula='Toss ~ Ht + Theta0 + Ra + Rc -1', data=corners).fit()

In [None]:
mqr.nbtools.vstack(
    mqr.anova.adequacy(result),
    mqr.nbtools.hstack(
        sm.stats.anova_lm(result, typ=2),
        mqr.anova.coeffs(result)))

In [None]:
with Figure(6, 4, 2, 2) as (fig, axs):
    mqr.plot.regression.residuals(result, axs=axs)
    plot = mqr.nbtools.grab_figure(fig)

mqr.nbtools.hstack(
    plot,
    mqr.nbtools.vstack(
        mqr.inference.dist.test_1sample(result.resid),
        mqr.inference.mean.test_1sample(result.resid)))

In [None]:
effect = 'Toss'

corners = obs.query('PtType==1')
centres = obs.query('PtType==0')

with Figure(8, 2, 1, 4, sharey=True) as (fig, axs):
    for i, name in enumerate(design.names):
        axs[i].plot(corners.groupby(name).mean()[effect], color='C0', marker='.')
        axs[i].plot(centres.groupby(name).mean()[effect], color='C1', marker='o')
        axs[i].set_xlabel(name)

---
# DoE and Regression - central composite design
This is the central composite example from 

Having saved the experimental design,
performed the experiment,
and added a column to the csv with the observations,
the data is ready to analyse.

In [None]:
names = ['Ht', 'Theta0', 'Ra', 'Rc']
gen = 'a b c abc'
nc = 3

blk1 = Design.from_fracfact(names, gen) + Design.from_centrepoints(names, nc)
blk2 = Design.from_axial(names) + Design.from_centrepoints(names, nc)
design = blk1 + blk2.as_block(2)

In [None]:
# Load experimental data, and sort for easier reading
obs = pd.read_csv(
    mqr.sample_data('doe-composite.csv'),
    index_col=0,).sort_index()
obs

### Blocking
First, check whether the blocks show any difference by comparing the means of their centre points.

In [None]:
mqr.inference.mean.test_2sample(
    obs.query('Block == 1 and PtType == 0')['Toss'],
    obs.query('Block == 2 and PtType == 0')['Toss'],
    pooled=False,
    alternative='two-sided')

There is no evidence to reject the hypothesis that means are equal. So, treat the experiment as one block (ie. don't include a block factor in the regression).

### Model

In [None]:
expr = '''Toss ~
    Ht + Theta0 + Ra + Rc +
    I(Ht * Theta0) + I(Ht * Ra) + I(Ht * Rc) + I(Theta0 * Ra) +
    I(Ht**2) + I(Theta0**2) + I(Ra**2) + I(Rc**2)'''
result = smf.ols(expr, obs).fit()

In [None]:
display(sm.stats.anova_lm(result, typ=2))

Drop all insignificant higher-order terms: `Ht*Theta0`, `Ht*Ra`, `Ht**2`, `Ra**2`, `Rc**2`

In [None]:
expr = '''Toss ~
    Ht + Theta0 + Ra + Rc +
    I(Ht * Rc) + I(Theta0 * Ra) +
    I(Theta0**2)'''
result = smf.ols(expr, obs).fit()

In [None]:
mqr.nbtools.vstack(
    mqr.anova.adequacy(result),
    mqr.nbtools.hstack(
        mqr.anova.summary(result, typ=2),
        mqr.anova.coeffs(result)))

In [None]:
with Figure(6, 4, 2, 2) as (fig, axs):
    mqr.plot.regression.residuals(result, axs=axs)
    plot = mqr.nbtools.grab_figure(fig)

mqr.nbtools.hstack(
    plot,
    mqr.nbtools.vstack(
        mqr.inference.dist.test_1sample(result.resid),
        mqr.inference.mean.test_1sample(result.resid)))

In [None]:
with Figure(8, 2, 1, 4, sharey=True) as (fig, axs):
    for i, name in enumerate(design.names):
        df = design.get_factor_df(name)
        axs[i].plot(df[name], result.predict(df), marker='.')
        axs[i].set_xlabel(name)