In [17]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action='ignore', category=np.VisibleDeprecationWarning)

## Comparisons with arrays

In [18]:
# Let's assign an array of pets
pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')

In [19]:
# We can do comparison expressions for arrays.  This returns an array of booleans!
pets == 'dog'

array([False, False,  True, False,  True, False], dtype=bool)

In [20]:
# 'True' is equal to 1, while 'False' is equal to 0
True == 1

True

In [22]:
False == 0

True

In [24]:
True + True

2

In [25]:
# This counts the number of entries of pets which are 'dog'
sum(pets == 'dog')

2

In [22]:
# When we np.count_nonzero entries in an array full of boolean values, we are really counting the number of 'True'

<function numpy.count_nonzero(a, axis=None, *, keepdims=False)>

In [None]:
x = np.arange(20, 31)

In [None]:
# Can you imagine what the output will look like?
x > 28

In [None]:
# How about for this?
sum(x > 28)

## Rows & Apply

In [2]:
# Let's load the family heights data data
heights = Table.read_table('family_heights.csv')
heights.show(3)

family,father,mother,child,children,order,sex
1,78.5,67,73.2,4,1,male
1,78.5,67,69.2,4,2,female
1,78.5,67,69.0,4,3,female


In [1]:
# Let's grab the first row:


In [2]:
# The data type of a row


In [3]:
# You can use indices to grab entries of a row using .item:


In [4]:
# Or you can use column labels:


In [7]:
# Let's define only_heights to be a table with only the heights of father, mother, and child
only_heights = heights.select('father', 'mother', 'child')

father,mother,child
78.5,67.0,73.2
78.5,67.0,69.2
78.5,67.0,69.0
78.5,67.0,69.0
75.5,66.5,73.5
75.5,66.5,72.5
75.5,66.5,65.5
75.5,66.5,65.5
75.0,64.0,71.0
75.0,64.0,68.0


In [5]:
# What if we want to see the average height of each family?
# Let's use apply with one argument to apply a function to each row.

In [6]:
# This is an array that we can that we can use to add a column to our table!

## Conditional Statements

In [None]:
x = 20

In [None]:
# Here's a conditional statement.  Notice that after if, we have a comparison expression.  
# It's either True or False.
# Because x >= 18 is True, code following the if line will run.

if x >= 18:    
    print('You can legally vote.')

In [None]:
# Because x >= 21 is False, code following the if line will not run!

if x >= 21:
    print('You can legally drink.')

In [None]:
# Conditional statements are useful in functions when you want the output behavior to be different
# depending on the input.


In [10]:
# Once functions return a value, no further computation is done!


In [None]:
# elif and else are ways of dealing with many cases.


## Simulation

Let's play a game: we each roll a die. 

If my number is bigger: you pay me a dollar.

If they're the same: we do nothing.

If your number is bigger: I pay you a dollar.

Steps:
1. Find a way to simulate two dice rolls.
2. Compute how much money we win/lose based on the result.
3. Do steps 1 and 2 10,000 times.

### Conditional Statements

In [None]:
# Let's play a game where we each roll a dice.  If my roll is higher than your roll, I win a dollar. 
# If your roll is higher, then I lose a dollar (to you).
# If our rolls are tied, then I win nothing and lose nothing.
# Work in progress:

def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1

In [None]:
one_round(4, 3)

In [None]:
one_round(2, 6)

In [None]:
# Final correct version
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1
    elif your_roll > my_roll:
        return -1
    elif your_roll == my_roll:
        return 0

In [None]:
one_round(1, 1)

In [None]:
one_round(6, 5)

In [None]:
one_round(7, -1)

### Random Selection

In [None]:
mornings = make_array('wake up', 'sleep in')

In [None]:
# np.random.choice will choose a random element from the array given as argument

In [None]:
# Each time np.random.choice is invoked, you will get a possibly different random output!

In [None]:
# We can also supply an argument to specify the number of times we want to make a random choice.
# All results are stored in an array.
np.random.choice(mornings, 7)

In [None]:
# Let's count the number of 'wake up' days in a week.


In [None]:
# Let's count the number of 'sleep in' days in a week


In [None]:
# If we want to count the number of 'wake up' days and the number of 'sleep in' days in the same week, 
# we should store the result of np.result.choice(mornings,7)
morning_week = np.random.choice(mornings, 7)
morning_week

In [None]:
sum(morning_week == 'wake up')

In [None]:
sum(morning_week == 'sleep in')

### Simulating the roll of a die

In [None]:
# Make die_faces the numbers 1 through 6 in an array

In [None]:
# Now np.random.choice(die_faces) is simulating the roll of a die.

In [None]:
def simulate_one_round():
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)
    return one_round(my_roll, your_roll)

In [None]:
simulate_one_round()

### Appending Arrays

In [None]:
# Here are two arrays
first = np.arange(4)
second = np.arange(10, 17)

In [None]:
# We can append first with a value.  This places the value at the end of the array:

In [None]:
# Notice though, that first didn't actually change in the above cell.

In [None]:
# We can also append arrays with arrays:

### Repeated Betting ###

In [None]:
# Let's make results an 'empty array'.  This is an array that has no entries in it!

In [None]:
# Let's append to results the simluation of one round of our dice roll game and store the outcome in result:

In [None]:
# We can play over and over just by repeatedly running the cell...

## `For` Statements

In [None]:
# But a better way to do things over and over is to use iteration.  Here is a for-loop:
for pet in make_array('cat', 'dog', 'rabbit'):
    print('I love my ' + pet)

In [None]:
# Here is a tiresome way to do the same thing:
pet = make_array('cat', 'dog', 'rabbit').item(0)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(1)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(2)
print('I love my ' + pet)

In [None]:
# Let's write a for-loop to play our dice game 5 times.

In [None]:
# Let's write a for-loop to play our dice game 10000 times.

In [None]:
len(game_outcomes)

In [None]:
results = Table().with_column('My winnings', game_outcomes)

In [None]:
results

In [None]:
results.group('My winnings').barh('My winnings')

### Another example: simulating heads in 100 coin tosses

In [None]:
# Let's simulate a coin flip.

In [None]:
# Let's flip the coin 100 times, then count the number of heads.

In [None]:
# Let's define a function num_heads() which flips a coin 100 times, then counts the number of heads.

In [None]:
# Decide how many times you want to repeat the experiment

In [None]:
# Simulate that many outcomes by writing a for-loop which will run the experiment the number of times specified.

In [None]:
# Let's make a histogram!