# Lecture 6 : Functions

## 4.1: Module 4 Notebook 1

In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Functions ##

In [None]:
def double(x):
    """ Double x """
    return 2*x

In [None]:
double(6)

In [None]:
double(10/4)

In [None]:
z = 10
double(z/4)

In [None]:
x #notice x is only defined within the function

In [None]:
x = 17
double(x)

In [None]:
x

In [None]:
double(300)

## Why use functions?

In [None]:
counts = make_array(1, 2, 3)
total = counts.sum()
np.round((counts/total)*100, 2)

In [None]:
def percents(counts):
    """Convert the counts to percents out of the total."""
    total = counts.sum()
    return np.round((counts/total)*100, 2)

In [None]:
percents(make_array(2, 4, 8, 6))

In [None]:
def percents(counts, decimal_places=2, extra=0):
    """Convert the counts to percents out of the total + extra."""
    total = counts.sum() + extra
    return np.round((counts/total)*100, decimal_places)

In [None]:
parts = make_array(2, 1, 4)

percents(parts, decimal_places=4, extra=6)

In [None]:
percents(parts, extra=6)

## Function with optional arguments ##

In [None]:
def percents(s, places=2):
    return np.round(s/sum(s) * 100, places)

In [None]:
x = make_array(2, 5, 16)
percents(x)

In [None]:
percents(x, 4)

## Apply##

In [None]:
def cut_off_at_100(y):
    return min(y, 100)

In [None]:
ages = Table().with_columns(
    'Person', make_array('A', 'B', 'C', 'D'),
    'Age', make_array(63, 110, 99, 102)
)

In [None]:
ages

In [None]:
ages.apply(cut_off_at_100, 'Age')

In [None]:
ages.with_column('Cut Off Age', ages.apply(cut_off_at_100, 'Age'))

In [None]:
type(cut_off_at_100)

In [None]:
cut_off_at_100

In [None]:
ages.with_column('Cut 0ff Age', ages.apply(cut_off_at_100, 'Age'))

## Apply with multiple columns ##

### Prediction dataset ###

In [None]:
heights = Table.read_table("galton.csv")
heights

In [None]:
heights.scatter('midparentHeightHeight', 'childHeightHeight')

In [None]:
heights.scatter('midparentHeightHeight', 'childHeightHeight')
plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
plots.plot([68.5, 68.5], [50, 85], color='red', lw=2);

In [None]:
heights.where('midparentHeight', are.between(67.5, 68.5)).column('childHeight').mean()

In [None]:
heights.scatter('midparentHeight', 'childHeight')
plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
plots.plot([68.5, 68.5], [50, 85], color='red', lw=2)
plots.scatter(68, 66.24, color='gold', s=50);

In [None]:
def predict_childHeight(h):
    close = heights.where('midparentHeight', are.between(h-0.5, h+0.5))
    return close.column('childHeight').mean()

In [None]:
predict_childHeight(68)

In [None]:
predict_childHeight(65)

In [None]:
predictions = heights.apply(predict_childHeight, 'midparentHeight')

In [None]:
heights = heights.with_column('childHeight Prediction', predictions)

In [None]:
heights

In [None]:
#Create a table that works well with Table.scatter
heights_chart = heights.select('midparentHeight', 'childHeight', 'childHeight Prediction')
#make the scatter plot
heights_chart.scatter('midparentHeight')

### Apply on multiple columns of the prediction dataset ##

In [None]:
heights

In [None]:
def difference(x, y):
    return x-y

In [None]:
difference(4,5)

In [None]:
# difference between each woman's height and her mother's

def difference(x, y):
    return x-y

heights.where('gender', 'female').apply(difference, 'childHeight', 'mother')
#How could we add this as a column to the table?

## Grouping by category ##

In [None]:
all_cones = Table.read_table('cones.csv')
all_cones

In [None]:
cones = all_cones.drop('Color')
cones

In [None]:
cones.group('Flavor')

In [None]:
cones.group('Flavor', list)

In [None]:
cones.group('Flavor', len)

In [None]:
cones.group('Flavor', min)

In [None]:
min(cones.where('Flavor', 'chocolate').column('Price'))

In [None]:
cones.group('Flavor', np.average)

In [None]:
def spread(x):
    return max(x) - min(x)

In [None]:
cones.group('Flavor', spread)

In [None]:
all_cones

In [None]:
all_cones.group(['Flavor', 'Color'])

In [None]:
all_cones.group(['Flavor', 'Color'], np.average)

## Pivot Tables ##

In [None]:
all_cones

In [None]:
all_cones.group(['Flavor', 'Color'])

In [None]:
all_cones.pivot('Flavor', 'Color')

In [None]:
all_cones.pivot('Color', 'Flavor')

In [None]:
all_cones.pivot('Color', 'Flavor', values = 'Price', collect = list)

## Examples ##

In [None]:
all_cones.pivot('Color', 'Flavor', values = 'Price', collect = np.average)