In [None]:
from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

In [None]:
def standard_units(arr):
    return (arr - np.average(arr))/np.std(arr)

def correlation(t, x, y):
    x_standard = standard_units(t.column(x))
    y_standard = standard_units(t.column(y))
    return np.average(x_standard * y_standard)

def slope(t, x, y):
    r = correlation(t, x, y)
    y_sd = np.std(t.column(y))
    x_sd = np.std(t.column(x))
    return r * y_sd / x_sd

def intercept(t, x, y):
    x_mean = np.mean(t.column(x))
    y_mean = np.mean(t.column(y))
    return y_mean - slope(t, x, y)*x_mean

def fitted_values(t, x, y):
    """Return an array of the regression estimates at all the x values"""
    a = slope(t, x, y)
    b = intercept(t, x, y)
    return a*t.column(x) + b

def residuals(t, x, y):
    predictions = fitted_values(t, x, y)
    return t.column(y) - predictions

# Residuals

**Please run all cells before this cell, including the import cell at the top of the notebook.**

In [None]:
galton = Table.read_table('galton.csv')
heights = Table().with_columns('MidParent', galton.column('midparentHeight'),'Child', galton.column('childHeight'))
heights

In [None]:
heights = heights.with_columns('Fitted Value', fitted_values(heights, 'MidParent', 'Child'),
                               'Residual', residuals(heights, 'MidParent', 'Child'))
heights

In [None]:
correlation(heights, 'MidParent', 'Child')

In [None]:
heights.scatter('MidParent')

# Residual plot

**Please run all cells before this cell, including the import cell at the top of the notebook.**

In [None]:
def plot_residuals(t, x, y):
    tbl = t.with_columns('Fitted', fitted_values(t, x, y), 'Residual', residuals(t, x, y))
    tbl.select(x, y, 'Fitted').scatter(0)
    tbl.scatter(x, 'Residual')

In [None]:
plot_residuals(heights, 'MidParent', 'Child')