# Signs of Bias & Variance

| Signs of Bias | Signs of Variance |
| --- | --- |
| Poor intuition with new data | Noise in data set |
| Poor intuition with Training data | Overfitting |
| Poor intuition compared to similar models | Complexity |
| Underfitting | High MSE |
| Simplicity | |

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from utils_common import compute_model_output

In [None]:
# Generate a random data set
xx = np.array([25, 50])
yy = np.array([25, 50])
means = [xx.mean(), yy.mean()]  
stds = [xx.std() / 3, yy.std() / 3]
covs = [[stds[0]**2, stds[0]*stds[1]*0.9], 
[stds[0]*stds[1]*0.9, stds[1]**2]] 
m = np.random.multivariate_normal(means, covs, 300).T

xx = np.array([0, 50])
yy = np.array([0, 50])
means = [xx.mean(), yy.mean()]  
stds = [xx.std() / 3, yy.std() / 3]
covs = [[stds[0]**2, stds[0]*stds[1]*0.01], 
[stds[0]*stds[1]*0.01, stds[1]**2]] 
n = np.random.multivariate_normal(means, covs, 10).T

In [None]:
# Variance
plt.xlabel("Feature")
plt.ylabel("Target")
plt.scatter(m[0], m[1], color='blue')
plt.scatter(n[0], n[1], color='blue')
plt.legend()
plt.show()

In [None]:
# Bias
plt.xlabel("Feature")
plt.ylabel("Target")
plt.scatter(m[0], m[1], color='pink', label='Women')
plt.scatter(n[0], n[1], color='blue', label='Men')
plt.legend()
plt.show()

Relationship between Bias, Variance and Fit/Intuition

In [None]:
#Good Fit/Intuition with low bias and low variance
x = np.array([-10, 10])
y = np.array([-10, 10])
means = [x.mean(), y.mean()]  
stds = [x.std() / 3, y.std() / 3]
covs = [[stds[0]**2, stds[0]*stds[1]*0.2], 
[stds[0]*stds[1]*0.2, stds[1]**2]] 
m = np.sort(np.random.multivariate_normal(means, covs, 8).T)
fig, ax = plt.subplots(1,3, figsize=(15, 5))
ax[0].scatter(m[0], m[1], color='blue', s=100)
ax[0].plot(m[0], m[1], color='red')
x = np.linspace(10, -10, 100)
y = -x**2 + 4*x + 2  
ax[2].plot(x, y, color='red')
x = x + np.random.uniform(-5, 5, size=x.shape)
y = y + np.random.uniform(-10, 10, size=y.shape)
ax[1].scatter(x, y, c='b')
ax[2].scatter(x, y, c='b')
x_lin = np.array([-10,10])
tmp_f_mb = compute_model_output(x_lin, 10, -10,)
ax[1].plot(x_lin, tmp_f_mb, c='r')
ax[0].title.set_text("High Variance/Overfitting")
ax[1].title.set_text("High Bias/Underfitting")
ax[2].title.set_text("Low Bias and Low Variance/Good Fit")
for ax in ax.flat:
    ax.set_xticks([])
    ax.set_xticklabels([])
    ax.set_yticks([])
    ax.set_yticklabels([])

plt.show()