In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action='ignore', category=np.VisibleDeprecationWarning)

## Comparison ##

In [None]:
# A boolean value can be either True or False

In [1]:
# Capitalization matters!

In [None]:
# Comparison expressions evaluate to booleans

In [None]:
# Assignment and comparison are different!
3 = 3

In [2]:
# Sometimes, different types can be correctly compared:

3 == 3.0

True

In [None]:
10 != 2

In [None]:
# Named objects can be used in comparisons:
x = ...
y = ...

In [None]:
# For inequalities, you can do a < x < b.  This is equivalent to a < x AND x < a.


(The comparison `12 < x < 20` is equivalent to `12 < x and x < 20`.)

## Comparisons with arrays

In [5]:
# Let's assign an array of pets
pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')

In [None]:
# We can do comparison expressions for arrays.  This returns an array of booleans!


In [None]:
# 'True' is equal to 1, while 'False' is equal to 0
True == 1

2

In [None]:
# This counts the number of entries of pets which are 'dog'
sum(pets == 'dog')

True

In [22]:
# When we np.count_nonzero entries in an array full of boolean values, we are really counting the number of 'True'

<function numpy.count_nonzero(a, axis=None, *, keepdims=False)>

In [None]:
x = np.arange(20, 31)

In [None]:
# Can you imagine what the output will look like?
x > 28

In [None]:
# How about for this?
sum(x > 28)

## Rows & Apply

In [2]:
# Let's load the family heights data data
heights = Table.read_table('family_heights.csv')
heights.show(3)

family,father,mother,child,children,order,sex
1,78.5,67,73.2,4,1,male
1,78.5,67,69.2,4,2,female
1,78.5,67,69.0,4,3,female


In [3]:
# Let's grab the first row:


Row(family='1', father=78.5, mother=67.0, child=73.200000000000003, children=4, order=1, sex='male')

In [4]:
# The data type of a row


datascience.tables.Row

In [5]:
# You can use indices to grab entries of a row using .item:


78.5

In [6]:
# Or you can use column labels:


78.5

In [7]:
# Let's define only_heights to be a table with only the heights of father, mother, and child
only_heights = ...

father,mother,child
78.5,67.0,73.2
78.5,67.0,69.2
78.5,67.0,69.0
78.5,67.0,69.0
75.5,66.5,73.5
75.5,66.5,72.5
75.5,66.5,65.5
75.5,66.5,65.5
75.0,64.0,71.0
75.0,64.0,68.0


In [8]:
# What if we want to see the average height of each family?
# Let's use apply with one argument to apply a function to each row.

array([ 72.9       ,  71.56666667,  71.5       ,  71.5       ,
        71.83333333,  71.5       ,  69.16666667,  69.16666667,
        70.        ,  69.        ,  69.83333333,  69.16666667,
        68.66666667,  67.83333333,  67.33333333,  68.5       ,
        67.5       ,  67.16666667,  66.66666667,  65.33333333,
        65.33333333,  70.5       ,  72.83333333,  72.        ,
        71.66666667,  71.66666667,  70.83333333,  68.66666667,
        70.33333333,  69.5       ,  68.83333333,  68.83333333,
        68.33333333,  70.        ,  68.66666667,  68.        ,
        67.66666667,  67.66666667,  67.33333333,  66.5       ,
        66.33333333,  66.66666667,  70.33333333,  67.33333333,
        69.33333333,  69.        ,  70.16666667,  70.        ,
        68.73333333,  70.        ,  69.5       ,  69.4       ,
        69.4       ,  69.06666667,  68.9       ,  68.16666667,
        67.5       ,  67.16666667,  70.5       ,  70.16666667,
        69.66666667,  66.66666667,  68.        ,  66.6 

In [9]:
# This is an array that we can that we can use to add a column to our table!

father,mother,child,average
78.5,67.0,73.2,72.9
78.5,67.0,69.2,71.5667
78.5,67.0,69.0,71.5
78.5,67.0,69.0,71.5
75.5,66.5,73.5,71.8333
75.5,66.5,72.5,71.5
75.5,66.5,65.5,69.1667
75.5,66.5,65.5,69.1667
75.0,64.0,71.0,70.0
75.0,64.0,68.0,69.0


## Conditional Statements

In [None]:
x = 20

In [None]:
# Here's a conditional statement.  Notice that after if, we have a comparison expression.  
# It's either True or False.
# Because x >= 18 is True, code following the if line will run.

if x >= 18:    
    print('You can legally vote.')

In [None]:
# Because x >= 21 is False, code following the if line will not run!

if x >= 21:
    print('You can legally drink.')

In [None]:
# Conditional statements are useful in functions when you want the output behavior to be different
# depending on the input.


In [10]:
# Once functions return a value, no further computation is done!


'You can legally vote.'

'You can legally vote.'

In [None]:
# elif and else are ways of dealing with many cases.


In [None]:
# Let's work with bike trips
trip = Table().read_table('trip.csv')
trip.show(3)

In [None]:
# Let's write a function 'trip_kind' to classify bike trips as 'round trips' or 'one way'.

# Now apply trip_kind, using 'Start Station' and 'End Station' as arguments:


In [None]:
# For each start station, for trips under 10 minutes, how many are round trip and how many are one way?


## Simulation

Let's play a game: we each roll a die. 

If my number is bigger: you pay me a dollar.

If they're the same: we do nothing.

If your number is bigger: I pay you a dollar.

Steps:
1. Find a way to simulate two dice rolls.
2. Compute how much money we win/lose based on the result.
3. Do steps 1 and 2 10,000 times.

### Conditional Statements

In [None]:
# Let's play a game where we each roll a dice.  If my roll is higher than your roll, I win a dollar. 
# If your roll is higher, then I lose a dollar (to you).
# If our rolls are tied, then I win nothing and lose nothing.
# Work in progress:

def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1

In [None]:
one_round(4, 3)

In [None]:
one_round(2, 6)

In [None]:
# Final correct version
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1
    elif your_roll > my_roll:
        return -1
    elif your_roll == my_roll:
        return 0

In [None]:
one_round(1, 1)

In [None]:
one_round(6, 5)

In [None]:
one_round(7, -1)

### Random Selection

In [None]:
mornings = make_array('wake up', 'sleep in')

In [None]:
np.random.choice(mornings)

In [None]:
np.random.choice(mornings)

In [None]:
np.random.choice(mornings)

We can also pass an argument that specifies how many times to make a random choice:

In [None]:
np.random.choice(mornings, 7)

In [None]:
sum(np.random.choice(mornings, 7) == 'wake up')

In [None]:
sum(np.random.choice(mornings, 7) == 'sleep in')

In [None]:
morning_week = np.random.choice(mornings, 7)
morning_week

In [None]:
sum(morning_week == 'wake up')

In [None]:
sum(morning_week == 'sleep in')

### Simulating the roll of a die

In [None]:
die_faces = np.arange(1, 7)

In [None]:
np.random.choice(die_faces)

In [None]:
def simulate_one_round():
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)
    return one_round(my_roll, your_roll)

In [None]:
simulate_one_round()

### Appending Arrays

In [None]:
first = np.arange(4)
second = np.arange(10, 17)

In [None]:
np.append(first, 6)

In [None]:
first

In [None]:
np.append(first, second)

In [None]:
first

In [None]:
second

### Repeated Betting ###

In [None]:
results = make_array()

In [None]:
results = np.append(results, simulate_one_round())
results

In [None]:
results = np.append(results, simulate_one_round())
results

## `For` Statements

In [None]:
for pet in make_array('cat', 'dog', 'rabbit'):
    print('I love my ' + pet)

In [None]:
pet = make_array('cat', 'dog', 'rabbit').item(0)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(1)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(2)
print('I love my ' + pet)

In [None]:
game_outcomes = make_array()

for i in np.arange(5):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

In [None]:
game_outcomes = make_array()

for i in np.arange(10000):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

In [None]:
len(game_outcomes)

In [None]:
results = Table().with_column('My winnings', game_outcomes)

In [None]:
results

In [None]:
results.group('My winnings').barh('My winnings')

### Another example: simulating heads in 100 coin tosses

In [None]:
coin = make_array('heads', 'tails')

In [None]:
sum(np.random.choice(coin, 100) == 'heads')

In [None]:
# Simulate one outcome

def num_heads():
    return sum(np.random.choice(coin, 100) == 'heads')

In [None]:
# Decide how many times you want to repeat the experiment

repetitions = 10000

In [None]:
# Simulate that many outcomes

outcomes = make_array()

for i in np.arange(repetitions):
    outcomes = np.append(outcomes, num_heads())

In [None]:
heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(29.5, 70.6))

## Optional: Advanced `where` ##

In [None]:
ages = make_array(16, 22, 18, 15, 19, 15, 16, 21)
age = Table().with_column('Age', ages)

In [None]:
age

In [None]:
age.where('Age', are.above_or_equal_to(18))

In [None]:
voter = ages >= 18

In [None]:
voter

In [None]:
age.where(voter)

In [None]:
is_voter = are.above_or_equal_to(18)

In [None]:
type(is_voter)

In [None]:
is_voter(22)

In [None]:
is_voter(3)

In [None]:
age.apply(is_voter, 'Age')

In [None]:
ages >= 18

In [None]:
voter

In [None]:
def my_voter_function(x):
    return x >= 18

In [None]:
age.where('Age', are.above_or_equal_to(18))

In [None]:
age.where(voter)

In [None]:
age.where('Age', my_voter_function)