In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Topic 7: Functions##

### Defining a Function ###

In [None]:
def triple(x):
    return 3 * x

In [None]:
triple(3)

In [None]:
num = 4

In [None]:
triple(num)

In [None]:
triple(num * 5)

### Note About Scopes

In [None]:
# Should give an error.  Why?
x

In [None]:
x = 5

In [None]:
triple(2 * x)

In [None]:
# x should still be 5
x

### Parameters are Type Agnostic ###

In [None]:
triple('ha')

In [None]:
triple(np.arange(4))

### Discussion Question

In [None]:
def percent_of_total(s):
    return np.round(s / sum(s) * 100, 2)

In [None]:
percent_of_total(make_array(1,2,3,4))

In [None]:
percent_of_total(make_array(1, 213, 38))

### Docstrings ###

In [None]:
def percents(counts):
    """Convert the values in the array to an array of percents out of the sum of the array.
    
    >>> percents(make_array(10, 15, 25))
    array([20., 30., 50.])
    """
    
    total = counts.sum()
    return np.round(counts / total * 100, 2)


In [None]:
percents?

In [None]:
help(percents)

In [None]:
percents(make_array(10, 15, 25))

### Multiple Arguments

$ h^2 = x^2 + y^2 \hspace{20 pt} => \hspace{20 pt} h = \sqrt{ x^2 + y^2 } $

In [None]:
def hypotenuse(x, y):
    hypot_squared = (x ** 2 + y ** 2)
    return hypot_squared ** 0.5

In [None]:
hypotenuse(9, 12)

In [None]:
hypotenuse(2, 2)

### Default Arguments ###

In [None]:
def hypotenuse_round(x, y, decimal_places):
    hypot_squared = (x ** 2 + y ** 2)
    return np.round(hypot_squared ** 0.5, decimal_places)

In [None]:
hypotenuse_round(2, 2, 3)

In [None]:
# This will give an error.  The number of arguments doesn't match the function definition.
hypotenuse_round(2, 2)

In [None]:
def hypotenuse_round_default(x, y, decimal_places=2):
    hypot_squared = (x ** 2 + y ** 2)
    return np.round(hypot_squared ** 0.5, decimal_places)

In [None]:
hypotenuse_round_default(2, 2, 3)

In [None]:
hypotenuse_round_default(2, 2)

In [None]:
hypotenuse_round_default(2, 2, decimal_places = 4)

In [None]:
# This will given an error.  Named arguments must match the name in the function definition.
hypotenuse_round_default(2, 2, places = 4)

## Apply ##

In [None]:
ages = Table().with_columns(
    'Person', make_array('Jim', 'Pam', 'Michael', 'Creed'),
    'Birth Year', make_array(1985, 1988, 1967, 1904)
)
ages

In [None]:
def cap_at_1980(x):
    return min(x, 1980)

In [None]:
cap_at_1980(1975)

In [None]:
cap_at_1980(1991)

In [None]:
ages.apply(cap_at_1980, 'Birth Year')

In [None]:
def name_and_age(name, year):
    age = 2020 - year
    return name + ' is ' + str(age)

In [None]:
ages.apply(name_and_age, 'Person', 'Birth Year')

## Overlaid Histograms

In [None]:
height = Table.read_table('galton.csv').select(1, 2, 7).relabeled(2, 'child')
height.show(6)

In [None]:
height.hist('father', unit='inch')

In [None]:
height.hist('child', unit='inch')

In [None]:
height.hist(unit='inch', bins=np.arange(55, 80, 2))

## Prediction ##

In [None]:
height

In [None]:
height.scatter("child")

In [None]:
height = height.with_column(
    'parent average', (height.column('mother') + height.column('father')) / 2
)
height

In [None]:
height.scatter('parent average', 'child')

In [None]:
height.scatter('parent average', 'child')
_ = plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
_ = plots.plot([68.5, 68.5], [50, 85], color='red', lw=2)

In [None]:
close_to_68 = height.where('parent average', are.between(67.5, 68.5))
close_to_68

In [None]:
close_to_68.column('child').mean()

In [None]:
def predict_child(pa):
    close_points = height.where('parent average', are.between(pa - 0.5, pa + 0.5))
    return close_points.column('child').mean()

In [None]:
predict_child(68)

In [None]:
predict_child(62)

In [None]:
# Apply predict_child to all the midparent heights

height.with_column(
    'prediction', height.apply(predict_child, 'parent average')
).select(2, 3, 4).scatter('parent average')