# Describe, diagnose, predict and prescribe overview
## Purpose
Demonstrate different analytic types on a simple system:
- Descriptive analytics
- Diagnostic analytics
- Predictive analytics
- Prescriptive analytics
## System
- d is a stochastic, autocorrelated disturbance
- u is a manipulated input
- y is a dependent KPI


In [None]:
# Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.inspection import permutation_importance
from sklearn.metrics import r2_score, mean_absolute_error
from scipy.optimize import minimize

In [None]:
# Import data
data_url = "https://drive.google.com/uc?id=1fUxXrOrEX2m2UpHra2_xMhDOwOCG-8hw&export=download"
df = pd.read_csv(data_url)
df

## Descriptive analytics
- Descriptive statistics
- Statistical visualization

In [None]:
# Descriptive statistics
df.describe().T

In [None]:
# Statistical visualization
# Time series
axx = df.plot(marker='.',ls='none',alpha=0.5,subplots=True)
axx[2].axhline(1.5,color='red',label='Product quality target')
axx[2].legend()

In [None]:
# Statistical visualization
# Scatter plot
ax = df.plot.scatter(x='u',y='y',alpha=0.5)
ax.axhline(1.5,color='red',label='Product quality target')
ax.legend()
fraction_spec = (df['y']<1.5).sum()/len(df['y'])
ax.set_title(f'Fraction of samples meeting product quality target: {fraction_spec:0.2f}')

In [None]:
# Statistical visualization
# Box plot
df.boxplot(column='y')

## Diagnostic analytics: Model-based

In [None]:
# Modelling: Train models
X = df[['u','d']].copy()
Y = df['y'].copy()
lm = LinearRegression()
lm.fit(X.values,Y.values)
Ylm_hat = lm.predict(X.values)
knn = KNeighborsRegressor(n_neighbors=20)
knn.fit(X.values,Y.values)
Yknn_hat = knn.predict(X.values)

In [None]:
# Modelling: Performance (on training data)
r2_lm = r2_score(Y.values,Ylm_hat)
mae_lm = mean_absolute_error(Y.values,Ylm_hat)
r2_knn = r2_score(Y.values,Yknn_hat)
mae_knn = mean_absolute_error(Y.values,Yknn_hat)
print(f'Linear regression: R2={r2_lm:0.2f}, MAE={mae_lm:0.2f}')
print(f'KNN regression: R2={r2_knn:0.2f}, MAE={mae_knn:0.2f}')


## Diagnostic analytics: Linear models
- Regression coefficients
- Linear effects plot

In [None]:
# Regression coefficients
print(f'Linear model: Intercept {lm.intercept_:0.2f}')
lm_coef = pd.Series(data=lm.coef_,index=['u','d'])
lm_coef.plot.bar()
plt.title('Linear model: Coefficients')

In [None]:
# Linear effects plot
ugrid = np.linspace(df['u'].min(),df['u'].max(),10)
Nu = len(ugrid)
dgrid = np.linspace(df['d'].min(),df['d'].max(),10)
Nd = len(dgrid)
fig, ax = plt.subplots()
ax.set_prop_cycle('color',[plt.cm.jet(i) for i in np.linspace(0, 1, Nd)])
for d_current in dgrid:
    X_current = np.stack((ugrid,[d_current]*Nu),axis=1)
    ax.plot(ugrid,lm.predict(X_current),label='d = ' + str(round(d_current,2)))
ax.legend()
ax.set_xlabel('u')
ax.set_ylabel(r'$\hat{y}$')
ax.set_title('Linear regression: Effects plot')

## Diagnostic analytics: Nonlinear models
- Variable importance
- Nonlinear effects plot

In [None]:
# Variable importance
vi = permutation_importance(knn,X.values,Y.values)
knn_vi = pd.Series(data=vi.importances_mean,index=['u','d'])
knn_vi.plot.bar()
plt.title('Nonlinear model: Variable importance')

In [None]:
# Nonlinear effects plot
ugrid = np.linspace(df['u'].min(),df['u'].max(),10)
Nu = len(ugrid)
dgrid = np.linspace(df['d'].min(),df['d'].max(),10)
Nd = len(dgrid)
fig, ax = plt.subplots()
ax.set_prop_cycle('color',[plt.cm.jet(i) for i in np.linspace(0, 1, Nd)])
for d_current in dgrid:
    X_current = np.stack((ugrid,[d_current]*Nu),axis=1)
    ax.plot(ugrid,knn.predict(X_current),label='d = ' + str(round(d_current,2)))
ax.legend()
ax.set_xlabel('u')
ax.set_ylabel(r'$\hat{y}$')
ax.set_title('Nonlinear regression: Effects plot')

## Predictive analytics
- Future predictions for defined inputs (u and d)

In [None]:
# Create future expectation of u and d
unext = np.linspace(-10,10,100)
dnext = np.zeros_like(unext)
X_next = np.stack((unext,dnext),axis=1)
# Create future predictions: Linear model
ypred_lm = lm.predict(X_next)
ypred_knn = knn.predict(X_next)
fig, axx = plt.subplots(3,1)
axx[0].plot(unext,'k.',label='Future u')
axx[0].legend()
axx[1].plot(dnext,'k.',label='Future d')
axx[1].legend()
axx[2].plot(ypred_lm,'b.',label='Linear model predictions')
axx[2].plot(ypred_knn,'g.',label='Nonlinear model predictions')
axx[2].legend()


# Prescriptive analytics
- Best u for fixed d

In [None]:
# Fixed d 
dfixed = -3

In [None]:
# Optimization objective function
def objective(u_val, d_val, model):
    X = np.vstack((u_val,d_val)).T
    y_pred = model.predict(X)
    return y_pred

In [None]:
# Optimization bounds
ubounds = [(-10,10)]
u0 = 5
result = minimize(objective,u0,args=(dfixed,knn),method='Nelder-Mead',bounds=ubounds)
ubest = result.x[0]
print(f'Best value for u, given d: {ubest:0.2f}')