In [None]:
import numpy as np
import matplotlib.pyplot as plt

## Problem 1: function for a n-sided die
Write a function that simulates a single roll of a n-sided die. The function of the inputs are 
the probabilities for each side, given as a vector p = [p1, p2, ..., pn]

the output is a face of the die after a single roll

In [None]:
def nSidedDie(p):
    choices = [(i + 1) for i in range(len(p))]
    choice = np.random.choice(choices,p = p)
    return choice

In [None]:
p=[0.10,  0.15,  0.20,  0.05,  0.30, 0.10, 0.10]

In [None]:
def rollNSideDie(p, N):
    sample_space = [i + 1 for i in range(len(p))]
    samples = np.random.choice(sample_space,size = N, p = p)
    b = range(8)
    h1, bin_edges = np.histogram(samples, bins=range(1, 9))
    plt.figure(3)
    b1 = bin_edges[0:7]
    plt.stem(b1, h1)
    plt.xlabel('The die value')
    plt.ylabel('die value occurrences')

    fig2 = plt.figure(num = 4)
    p1 = h1 / N
    plt.stem(b1 , p1)
    plt.title('Stem plt - die: Probability mass function')
    plt.xlabel('die value')
    plt.ylabel('Probability')
    plt.show()
    

In [None]:
rollNSideDie(p, 10000)

## Problem 2: Number of rolls needed to get a "7" with two dice

You roll a pair of fair dice and calculate the sum o fthe faces. You are interested in the number of rolls it takes until you get a sum of "7". The first time you get a "7" the experiment is considered a "success". You record the number of rolls and you stop the experiment

You repeat the experiment N = 100,000 times. Each time you keep track of the number of rolls it takes to have "success".

In [None]:
def single_trial():
    counter = 0
    die1 = -1
    die2 = -1
    while die1 + die2 != 7:
        counter += 1
        die1 = np.random.randint(1, 7)
        die2 = np.random.randint(1, 7)
        
    return counter

def n_die_trials(N):
    count = [None] * N
    for iteration in range(N):
        result = single_trial()
        count[iteration] = result
        
    b = range(1, 25)
    h1, bin_edges = np.histogram(a = count, bins = b)
    b1 = bin_edges[1:25]
    fig2 = plt.figure(5)
    p1 = h1/N
    plt.stem(b1,p1)
    plt.title('Stem plot - Getting 7 from pair of dice: Probability mass function')
    plt.xlabel('Number of rolls to get 7')
    plt.ylabel('Probability')
    plt.show()

In [None]:
N = 100000
n_die_trials(100000)

Note that the problem models a geometric distribution such that

$$ 
            \begin{equation}
             \nonumber P_X(k) = \left\{
              \begin{array}{l l}
                 p(1-p)^{k}& \quad \text{for  } k=0,1,2,3,...\\
                0  & \quad \text{ otherwise}
              \end{array} \right.
            \end{equation} 
            $$
            
where p = 6 / 36

## Problem 3: Getting 50 heads when tossing 100 coins

Consider the following experiment:
You toss 100 fair coins and record the number of "heads". This is considered a single experiment. If you get exactly 50 heads, the experiment is considered a "success"

You repeat the experiment N = 100,000 times. After the N experiments are completed count the total successes, and calculate the probability of success

In [None]:
def one_trial():
    experiment_result = np.random.randint(0, 2, 100)
    return sum(experiment_result) == 50

In [None]:
def n_trials(n):
    result = 0
    for i in range(n):
        result += one_trial()
    return result

In [None]:
N = 100000
print("The probability of getting exactly 50 heads in 100 coin tosses:",n_trials(N) / N)

## Problem 4: The Password Hacking Problem

Your computer system uses a 4-letter password for login. For our purposes the
password is restricted to lower case letters of the alphabet only. It is easy to
calculate that the total number of passwords which can be produced is n = 26 4 .
o
A hacker creates a list of
random 4-letter words, as candidates for
matching the password. Note that it is possible that some of the words may
be duplicates. The number that you must use has been given to you.
o
You are given your own 4-letter password and you are going to check
if the hacker’s list contains at least one word that matches your password.
This process of checking is considered one experiment. If a word in the list
matches your password, the experiment is considered a success. Repeat the
experiment for N = 1000 times and find the probability that at least one of
the words in the hacker's list will match your password.
o
The hacker creates a longer list of k* random 4-letter words. The
numbers k and have been given to you. Repeat the previous experiment for
N = 1000 times and find the probability that at least one of the words in
the hacker's list will match your password.
o
Repeat the previous experiment for N = 1000 times to find the
approximate number ( m ) of words that must be contained in the hacker's
list so that the probability of at least one word matching the password is
p = 0.5 . You should do this by trial and error: assume a value for ( m ) and
calculate the corresponding probability as you did in the previous part. The
answer will be value of ( m ) that makes this probability approximately
equal to p = 0.5 .

In [None]:
import random
import string

In [None]:
def random_string(string_length = 4):
    letters = string.ascii_lowercase
    return ''.join((random.choice(letters)) for i in range(string_length))

In [None]:
def single_trial(m):
    word_given = random_string(4)
    hacker_words = [random_string(4) for _ in range(m)]
    return word_given in hacker_words

In [None]:
N = 1000
single_trial(N)

In [None]:
def n_trials(n, m):
    sums = 0
    for _ in range(n):
        sums += single_trial(m)
    return sums

In [None]:
n = 1000
m = 80000
k = 7
result = n_trials(n, m)
print(result / n)

In [None]:
result = n_trials(n, k * m)
print(result / n)

In [None]:
m = 80000 * 4
result = n_trials(n, m)
print(result / n)

In [None]:
print("The value of m so that the probability p is approximately p is", m)

1