In [1]:
# Example 2.2.1.
# Imagine I flip a coin, with two possible outcomes: heads (H) or tails (T).
# What is the sample space for this experiment? What about for three flips in a row?
import itertools


def make_sample_space(outcomes, flips):
    return itertools.product(outcomes, repeat=flips)

outcomes = ("H", "T")
print(["".join(x) for x in make_sample_space(outcomes, 1)])
print(["".join(x) for x in make_sample_space(outcomes, 3)])

['H', 'T']
['HHH', 'HHT', 'HTH', 'HTT', 'THH', 'THT', 'TTH', 'TTT']


In [2]:
# Example 2.2.2.
# For the experiment where I flip a coin three times in a row, consider the
# event that I get exactly one T. Which outcomes are in this event?

outcomes_with_one_tail = [x for x in make_sample_space(outcomes, 3) if x.count('T') == 1]
print(["".join(x) for x in outcomes_with_one_tail])
print(len(list(outcomes_with_one_tail)))

['HHT', 'HTH', 'THH']
3


In [3]:
# Example 2.2.3.
# Suppose I have two bowls, each containing 100 balls numbered 1 through
# 100. I pick a ball at random from each bowl and look at the numbers on them.
# How many elements are in the sample space for this experiment?

outcomes = list(range(1,101))
print(len(list(make_sample_space(outcomes, 2))))
# or len(outcomes) ** number_of_experiments
# len(outcomes) ** 2

10000


In [4]:
# Example 2.2.4.
# Which set of outcomes defines the event that the two balls add up to 200?

# only (100, 100)

In [5]:
# Example 2.2.5.
# Let E be the event that the two balls add up to 201. Which outcomes are
# elements of E?

# None, empty list.

In [6]:
# Example 2.3.1.
# Imagine I flip a fair coin, with two possible outcomes: heads (H) or
# tails (T). What is the probability that I get exactly one T if I
# flip the coin once? What if I flip it three times?

# there is only one event with a T, and two events in the sample space,
# so 1 / 2 = 0.5

# if one throws three times, there are 3 events with one T, and 8
# events in the sample space, so 3 / 8.
outcomes = ("H", "T")
three_throws = list(make_sample_space(outcomes, 3))
three_throws_with_one_tail = [x for x in three_throws if x.count("T") == 1]
print("EVENTS WITH ONE TAIL:", ["".join(x) for x in three_throws_with_one_tail])
print("SAMPLE SPACE:", ["".join(x) for x in three_throws])
print("PROBABILITY ONE TAIL:", len(three_throws_with_one_tail) / len(list(three_throws)))

EVENTS WITH ONE TAIL: ['HHT', 'HTH', 'THH']
SAMPLE SPACE: ['HHH', 'HHT', 'HTH', 'HTT', 'THH', 'THT', 'TTH', 'TTT']
PROBABILITY ONE TAIL: 0.375


In [7]:
# Example 2.3.2.
# Suppose I have two bowls, each containing 100 balls numbered 1 through
# 100. I pick a ball at random from each bowl and look at the numbers on
# them. What is the probability that the numbers add up to 200?

# There is only one outcome in the sample space in which both balls add
# up to 200 (100, 100). The sample space is 100**2 = 10000, so the
# probability is 1/10000

In [8]:
# Example 2.3.3.
# Let E be the event that the numbers on the balls in the previous example
# add up to exactly 51. What is the probability of E?

# (1, (num - 1)), (2, (num - 2)) ... ((num - 1), 1)
# So in this case 1 through to 50 outcomes add to 51.
# 50/10000

In [9]:
# Example 2.3.4.
# Suppose I choose a PIN containing exactly 4 digits, where each digit is
# chosen at random and is equally likely to be any of the 10 digits 0-9.
# What is the probability that my PIN contains four different digits?
sample_space = list(make_sample_space(range(10), 4))
all_diff_digits = [x for x in sample_space if len(set(x)) == 4]
print(len(all_diff_digits), "/", len(sample_space))
print(len(all_diff_digits) / len(sample_space))

# or, as the text points out, for the four digits to be different one
# can choose as the first digits 0-9, for the second 0-9 minus the
# number selected as first digit, for the third 0-9 minus the digits
# used for the first and second position, etc. So,
#   10 * 9 * 8 * 7 = 5040

5040 / 10000
0.504


In [10]:
# Example 2.3.5.
# Suppose I have a bowl with 3 green balls (g), 2 blue balls (b), and
# 1 red ball (r). I draw a single ball at random from the bowl and
# report its color. What is the probability I got a blue ball?

# sample space is ['g1', 'g2', 'g3', 'b1', 'b2', 'r']
# so 2/6 = 1/3

In [11]:
# Example 2.4.1.
# I have a jar with four different colored balls and I choose one
# at random. Is the distribution over the color I get uniform or not?

# If each ball has 0.25 probability (uniform distribution), then yes.

In [12]:
# Example 2.4.2.
# What is the distribution over the sum of two fair dice?
# Is it uniform or not?

# No, you can get 2 o 12 with fewer combinations of outcomes,
# (1, 1), (6, 2)
# 6-8 sum can be made with many more outcomes, e.g. for six:
# (1, 5), (2, 4), (3, 3), (4, 2), (5, 1)
from collections import defaultdict

sample_space = make_sample_space(range(1, 7), 2)
sum_dict = defaultdict(int)
for event in sample_space:
    sum_dict[event[0] + event[1]] += 1
print(sum_dict)

# probability for sums of two die.
for k_sum, v_freq in sum_dict.items():
    print(k_sum, ":", round(v_freq / sum(sum_dict.values()), 3))

defaultdict(<class 'int'>, {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 5, 9: 4, 10: 3, 11: 2, 12: 1})
2 : 0.028
3 : 0.056
4 : 0.083
5 : 0.111
6 : 0.139
7 : 0.167
8 : 0.139
9 : 0.111
10 : 0.083
11 : 0.056
12 : 0.028


In [13]:
# Exercise 2.1
# Suppose I have a dictionary with 5000 words in it, and I decide
# to generate a 5-word “sentence” by choosing each word at random
# from the dictionary (all words are equally likely).
#    a) What is the size of the sample space in this experiment?
#    b) If E is the event “my sentence starts with the word the”,
#       how many outcomes are there in E?
#    c) What is P(E)?
#    d) Let A be the event “my sentence ends with the word the”.
#       Are A and E mutually exclusive? If so, explain why. If
#       not, give an example of an outcome that belongs to both
#       A and E.

# a) If the word can occur five times in a row, them 5000 ** 5.
#    If a word only happens once in a sentence, then 5000 *
#    4999 * 4998 * 4997 * 4996.
# b) 5000 ** 4.
# c) 5000 ** 4 / 5000 ** 5
# d) No, both conditions can be met simultaneously, as long
#    as a word can be used more than once in a sentence.
#    See a). (the, the, the, the, the) is an outcome that
#    belongs to both A and E.

In [14]:
# Exercise 2.2
# Which of the following are possible probability distributions?
# For each distribution, state whether it is uniform or not,
# and whether the distribution includes a certain or impossible
# event.
#    a) (1.3, 2)
#    b) (0.2, 0.2, 0.2, 0.2)
#    c) (0.2, 0.2, 0.2, 0.2, -0.1, 0.3)
#    d) (0.2, 0.2, 0.2, 0.2, 0.2)
#    e) (0, 1, 0)
#    f) (0)
#    g) (1)
#    h) (-.5, -.5)
#    i) (1/2, 1/2)
#    j) (1/2, 1/4)
#    k) (1/8, 1/4, 5/8)
#    l) (3/16, 1/8, 7/16)

# Remember that there are two rules. P(Ei) must be between 0,
# 1 or anything in between, and sum(E1, E2, .. En) = 1. So,
# which are  possible probability distributions?
#    d, e, f, g, i and k 
# Which are uniform?
#    d and i
# Which have certain or impossible events?
#    e, f, g

In [15]:
# Exercise 2.3
# Write down the full distribution over the sum of two fair
# dice. That is, complete the example we started at the end
# of the last section (Example 2.4.2).

# See Example 2.4.2 above for full distribution.

In [16]:
# Exercise 2.4
# Suppose I have a bowl with 4 green balls (g), 2 blue balls
# (b), and 3 red balls (r). I draw a single ball uniformly
# at random from the bowl and report its color. What is the
# probability distribution over the different colors?

# [4/9, 2/9, 3/9] for green, blue and red respectively.

In [17]:
# Exercise 2.5
# Suppose I have a bowl with 10 balls in it, all of which
# are either blue, red, or green. I draw a single ball
# uniformly at random and report its color. The number
# of balls of each color is such that the probability
# of getting a blue ball is 0.4 and the probability of
# getting a green ball is 0.3. What is the probability
# of getting a red ball? How many balls of each color are
# there?

# 0.4 * 10 = 4 blue balls, 0.3 * 10 = 3 green balls.
# The probability of getting a red ball is 0.3, and
# there are 3 red balls.

In [18]:
# Exercise 2.6
# Suppose I have a spinner divided into 8 equal-sized sections,
# colored as shown:
#    (has 8 sections, 4 of them red, 3 of them yellow, and 1 blue)
# Assume the arrow on the spinner is carefully balanced so it is
# not more likely to end up in one place than another. I spin the
# arrow and report the color of the section it ends up in. What
# is the sample space of equally likely outcomes? What is the
# probability of ending up in a red section?

# What is the sample space of equally likely outcomes?
#    The arrow landing in one of the sections.
# What is the probability of ending up in a red section?
#    4/8 = 0.5

In [19]:
# Exercise 2.7
# Suppose I replace the colored sections on the spinner in
# the previous problem with just three sections: a red one
# that covers 29% of the surface, a yellow one that covers
# 62%, and a blue one that covers 9%:
#   How can we use the tools of probability theory introduced
# so far to compute the probability that, if we spin the arrow,
# it will end up in the red section?

# Think of the sample space as 1% sections of the board. The
# probability for red is 29 percent sections divided by all
# the sections (100).
#   p(red) = 29/100 = 0.29
#   p(yellow) = 62/100 = 0.62
#   p(blue) = 9/100 = 0.09

In [20]:
# Exercise 2.8
# Suppose I have a group containing the following first-
# and second-year university students from various countries.
# The first 3 are male, and the last 4 female:
#
#    Name          Home country     Year
#    Andrew        UK               1
#    Sebastian     Germany          1
#    Wei           China            1
#    Fiona         UK               1
#    Lea           Germany          2
#    Ajitha        UK               1
#    Sarah         UK               2 
#
# I choose a student uniformly at random from the group.
# For each set of events given below, answer the following
# questions:
#
#     (i) Are the events mutually exclusive?
#    (ii) If so, do they cover all possible outcomes in the
#         sample space?
#   (iii) What is the probability of each event?
#    (iv) Do these events and their probabilities, taken
#         together, form a probability distribution?
#
#      a) E1 = the student is male,
#         E2 = the student is
#         female
#      b) E1 = the student is from the UK,
#         E2 = the student is from China
#      c) E1 = the student’s name is Andrew or Sebastian,
#         E2 = the student is from Germany
#      d) E1 = the student is a first year student,
#         E2 = the student is from Germany
#      e) E1 = the student’s name starts with A,
#         E2 = the student is from outside the UK,
#         E3 = the student’s name is Fiona or Sarah

# a)   i) yes
#     ii) yes
#    iii) 3/7, 4/7
#     iv) yes
# b)   i) yes
#     ii) no
#    iii) 4/7, 1/4
#     iv) no
# c)   i) no
#     ii) no
#    iii) 2/7, 2/7
#     iv) no
# d)   i) no
#     ii) no
#    iii) 5/7, 2/7
#     iv) no
# e)   i) yes
#     ii) yes
#    iii) 2/7, 3/7, 2/7
#     iv) yes