In [None]:
import mqr
from mqr.plot import Figure
from mqr.nbtools import hstack, vstack

In [None]:
from IPython.display import display

# Datasets

In [None]:
import pandas as pd

data = pd.read_csv(mqr.sample_data('study-random-5x5.csv'))

# Process

In [None]:
from mqr.summary import Sample, Study
from mqr.process import Process, Specification

In [None]:
# Just a sample
sample = Sample(data['KPO1'])
sample

In [None]:
study = Study(data, measurements=['KPI1', 'KPI2', 'KPI3', 'KPO1', 'KPO2'])
study

In [None]:
import numpy as np
import pickle

# Use the mean and covariance from the random generator
# to construct some good and bad processes
with open(mqr.sample_data('study-random-5x5-mean-cov.pkl'), 'rb') as f:
    mean, cov = pickle.load(f)

offset2 = 2*np.sqrt(cov[2, 2])

---
The `Process` type is a model of a process with various input and output quantities. It calculates metrics like capability and expected defect rate.

In [None]:
import numpy as np

specs = {
    'KPI1': Specification(mean[0], mean[0]-np.sqrt(cov[0,0])*6, mean[0]+np.sqrt(cov[0,0])*6),
    'KPI2': Specification(mean[1], mean[1]-np.sqrt(cov[1,1])*6, mean[1]+np.sqrt(cov[1,1])*6),
    'KPI3': Specification(mean[2]+offset2, mean[2]-np.sqrt(cov[2,2])*6+offset2, mean[2]+np.sqrt(cov[2,2])*6+offset2),
    'KPO1': Specification(mean[3], mean[3]-np.sqrt(cov[3,3])*6, mean[3]+np.sqrt(cov[3,3])*6),
    'KPO2': Specification(mean[4], mean[4]-np.sqrt(cov[4,4])*2, mean[4]+np.sqrt(cov[4,4])*2),
}
p = Process(study, specs)
p

---
The main process overlays:
* short- and long-term fitted Gaussians, and 
* specification limits.

In [None]:
with Figure(6, 4, 2, 1) as (fig, ax):
    mqr.plot.process.pdf(study['KPI1'], specs['KPI1'], p.capabilities['KPI1'], show_long_term=True, ax=ax[0])
    mqr.plot.process.tolerance(specs['KPI1'], ax=ax[1])

---
The overlays on top of histograms

In [None]:
with Figure(7, 6, 3, 1) as (fig, ax):
    mqr.plot.process.capability(p, 'KPI1', show_long_term=True, ax=ax[0])
    mqr.plot.process.capability(p, 'KPI3', show_long_term=True, ax=ax[1])
    mqr.plot.process.capability(p, 'KPO2', show_long_term=False, ax=ax[2])

---
Correlations between all KPIs and KPOs

In [None]:
with Figure(7, 7, 5, 5) as (fig, ax):
    mqr.plot.correlation.matrix(
        study.get_data(),
        show_conf=True,
        conf=0.95,
        ax=ax)

---
# Scatter plot

In [None]:
with Figure(3, 3) as (fig, ax):
    ax.scatter(data['KPI2'], data['KPO2'])

---
# Probability plots

In [None]:
import scipy.stats as st
from statsmodels.api import ProbPlot

with Figure(3, 3) as (fig, ax):
    gen = ProbPlot(
        data['KPI1'],
        dist=st.norm,
        fit=True)
    gen.probplot(line='s', ax=ax)

The library `reliability` (for reliability modelling) is excellent. Unfortunately, its plotting interface doesn't allow plotting into axes that have already been created (for example with `matplitlib.pyplot.subplots` or `mqr.plots.Figure`). Instead, `reliability` returns the figure, then it can be manipulated.

In [None]:
from reliability.Probability_plotting import Normal_probability_plot
fig = Normal_probability_plot(data['KPI1'].values)
fig.set_size_inches(5, 4)
ax = fig.get_axes()
ax[0].set_xlabel('KPI1')
ax[0].set_ylabel('Fraction below')
plot = mqr.nbtools.grab_figure(fig)

hstack(plot, data.head())

---
## Grouped probability plots
The grouped probability plots provided by `mqr` show subsets of a dataframe,
but each subset is shown on a plot whose statistics were calculated from the whole dataset.

In the examples below, the cdf lines and the quantile points are calculated from the whole dataset.

In [None]:
grp = [
    data['operator'] == 'Op A',
    data['operator'] == 'Op B'
]

CDF lines

In [None]:
with Figure(6, 3, 1, 2) as (fig, ax):
    mqr.plot.probplot.pp_grp(data['KPO1'], grp=grp, grp_ax=ax)
    mqr.plot.probplot.pp_grp_cdfline(data['KPO1'], grp_ax=ax)
    
    ax[0].set_xlabel('KPO1 (Op A)')
    ax[1].set_xlabel('KPO1 (Op B)')

Lines through quantiles

In [None]:
grp_kwargs = [
    {'color': 'C1', 'marker': 'o'},
    {'color': 'C3', 'marker': 's'},
]

with Figure(6, 3, 1, 2) as (fig, ax):
    mqr.plot.probplot.pp_grp(data['KPO2'], grp=grp, grp_kwargs=grp_kwargs, grp_ax=ax)
    mqr.plot.probplot.pp_grp_qline(data['KPO2'], grp_ax=ax)
    
    ax[0].set_xlabel('KPO2 (Op A)')
    ax[1].set_xlabel('KPO2 (Op B)')