## Assignment 1 - Module 2 Submission code

### Defining function to calculate the expected waiting time, takes parameters - probability values of letters and the target pattern to match 

In [39]:
# Waiting time of finding a pattern in say 'n' (10) sequences

'''
Waiting Time:
The time it would take until a particular pattern is first encountered in a sequence.
By time, we usually mean how long a sequence has to be generated till we see our pattern of interest.
'''

# Step 1: Generate sequences - given values of P(R) and P(Y)
# Step 2: Find the patterns in each sequence, and if present store the length of each sequence upto which the search was done
# Step 3: Calculate avg value of the list (length) values for n runs
import random
import numpy as np


def waiting_time(probability, pattern):
    # Probability parameter is a dict with probability values of all occurrences of R and Y
    # Check if they add upto 1
    assert abs(sum(probability.values()) -1) < 0.01 , "Probability values don't add upto 1!!!!"
    
    # Sanity check in pattern
    for letter in pattern:
        assert letter in probability, "Sequence contains letters that are not in composition/probability items"
    
    # Minimum length of sequence should be the length of the pattern
    pattern_size = len(pattern)
    sequence = ''
    
    # Generate sequences and Calculate waiting times
    while( len(sequence)<pattern_size or sequence[-pattern_size:]!= pattern ): # If either the length is below minimum pattern size length or if the last three values at any point during sequence generation matches the patter that we are looking to find, then stop generating sequences
        ran = random.uniform(0,1) # Get a random value between range 0 and 1
        p=0
        for letter in probability:
            p+=probability[letter] # For every letter in the dict, add the p value to the random value
            
            if(ran<p): # Add a letter to sequence based on the p value ( e.g. if 'ran' value is <0.5 then the letter is R)
                sequence+= letter
                break # Break here to not continue the for loop since we;ve already found a new letter to add to the sequence.

         # print(sequence)
    return (len(sequence))
     


### Calculating expected waiting time for pattern 'RR' over 100 randomly generated sequences, with P(R) = 0.5

In [43]:
probability = dict()
probability['R'] = 0.5
probability['Y'] = 1-probability['R']
print(probability)

# Target pattern
pattern = "RR"

# Number of patterns to generate - find average waiting time using this
n = 100 # We see on an average how long does it take to get this pattern over 50 different sequences

waiting_time_total = [waiting_time(probability, pattern) for i in range(n)] # calculating waiting time for every generated sequence

print("Number of sequences generated: {}".format(n))
print("Waiting times for all 100 sequences",waiting_time_total)
print("Average waiting time of {} sequences: {:.3f}".format(n,np.mean(waiting_time_total)))
    

{'R': 0.5, 'Y': 0.5}
Number of sequences generated: 100
Waiting times for all 100 sequences [5, 2, 6, 6, 4, 7, 5, 11, 9, 3, 4, 2, 2, 11, 2, 2, 16, 6, 5, 2, 3, 2, 13, 6, 3, 9, 9, 4, 13, 7, 2, 9, 2, 6, 7, 15, 4, 8, 6, 2, 7, 3, 21, 7, 14, 15, 4, 4, 8, 5, 2, 4, 5, 2, 4, 2, 2, 6, 2, 2, 5, 2, 3, 4, 8, 3, 6, 4, 15, 8, 12, 13, 7, 6, 3, 11, 6, 13, 2, 13, 5, 5, 3, 3, 2, 8, 28, 4, 3, 6, 8, 4, 2, 4, 5, 17, 4, 2, 2, 3]
Average waiting time of 100 sequences: 6.210


### As we see above, the expected waiting time for 'RR' when P(R)=0.5 is around 6 - which is similar to my calculated result

### Calculating expected waiting time for 'RY' over 100 randomly generated sequences, with P(R) = 0.5

In [46]:
probability = dict()
probability['R'] = 0.5
probability['Y'] = 1-probability['R']
print(probability)

# Target pattern
pattern = "RY"

# Number of patterns to generate - find average waiting time using this
n = 100 # We see on an average how long does it take to get this pattern over 50 different sequences

waiting_time_total = [waiting_time(probability, pattern) for i in range(n)]

print("Number of sequences generated: {}".format(n))
print("Waiting times for all 100 sequences",waiting_time_total)
print("Average waiting time of {} sequences: {:.3f}".format(n,np.mean(waiting_time_total)))
    

{'R': 0.5, 'Y': 0.5}
Number of sequences generated: 100
Waiting times for all 100 sequences [4, 4, 4, 3, 5, 2, 2, 5, 2, 2, 2, 3, 4, 4, 3, 2, 2, 12, 3, 6, 5, 4, 4, 2, 5, 5, 9, 3, 5, 6, 2, 4, 4, 5, 2, 2, 7, 3, 2, 2, 3, 4, 5, 6, 5, 4, 6, 2, 3, 3, 2, 4, 5, 3, 6, 6, 3, 3, 5, 4, 3, 6, 4, 2, 3, 4, 3, 5, 3, 4, 5, 6, 7, 3, 8, 4, 2, 3, 3, 3, 2, 6, 4, 2, 6, 4, 6, 3, 3, 2, 3, 5, 5, 6, 7, 3, 3, 3, 9, 4]
Average waiting time of 100 sequences: 4.040


### As we see above, the expected waiting time for 'RY' when P(R)=0.5 is around 4 - which is similar to my calculated result

### Calculating expected waiting time for 'RR' over 100 randomly generated sequences, with P(R) = 0.75

In [51]:
probability = dict()
probability['R'] = 0.75
probability['Y'] = 1-probability['R']
print(probability)

# Target pattern
pattern = "RR"

# Number of patterns to generate - find average waiting time using this
n = 100 # We see on an average how long does it take to get this pattern over 50 different sequences

waiting_time_total = [waiting_time(probability, pattern) for i in range(n)]

print("Number of sequences generated: {}".format(n))
print("Waiting times for all 100 sequences",waiting_time_total)
print("Average waiting time of {} sequences: {:.3f}".format(n,np.mean(waiting_time_total)))
    

{'R': 0.75, 'Y': 0.25}
Number of sequences generated: 100
Waiting times for all 100 sequences [2, 2, 11, 3, 3, 4, 2, 4, 6, 2, 3, 2, 2, 3, 6, 3, 2, 2, 2, 2, 2, 7, 3, 10, 2, 3, 2, 4, 4, 2, 2, 5, 3, 2, 2, 2, 2, 4, 2, 2, 2, 4, 5, 3, 2, 3, 2, 2, 6, 3, 2, 2, 2, 2, 2, 6, 2, 2, 4, 2, 4, 2, 2, 2, 2, 2, 2, 3, 2, 4, 3, 4, 2, 5, 2, 3, 2, 4, 2, 4, 2, 3, 2, 2, 2, 7, 3, 2, 2, 3, 2, 5, 2, 2, 5, 4, 3, 2, 2, 8]
Average waiting time of 100 sequences: 3.080


### As we see above, the expected waiting time for 'RR' when P(R)=0.75 is around 3.11 - which is similar to my calculated result

### Calculating expected waiting time for 'RY' over 100 randomly generated sequences, with P(R) = 0.75

In [57]:
probability = dict()
probability['R'] = 0.75
probability['Y'] = 1-probability['R']
print(probability)

# Target pattern
pattern = "RY"

# Number of patterns to generate - find average waiting time using this
n = 100 # We see on an average how long does it take to get this pattern over 50 different sequences

waiting_time_total = [waiting_time(probability, pattern) for i in range(n)]

print("Number of sequences generated: {}".format(n))
print("Waiting times for all 100 sequences",waiting_time_total)
print("Average waiting time of {} sequences: {:.3f}".format(n,np.mean(waiting_time_total)))
    

{'R': 0.75, 'Y': 0.25}
Number of sequences generated: 100
Waiting times for all 100 sequences [3, 2, 11, 14, 3, 2, 3, 5, 3, 3, 5, 3, 3, 4, 3, 9, 2, 15, 5, 9, 2, 4, 2, 5, 14, 4, 16, 3, 5, 7, 10, 5, 2, 4, 6, 4, 6, 6, 4, 3, 3, 3, 3, 6, 5, 8, 4, 5, 10, 8, 3, 7, 5, 2, 4, 5, 3, 3, 5, 2, 3, 3, 4, 20, 11, 8, 9, 2, 5, 3, 3, 5, 6, 5, 2, 8, 2, 3, 16, 5, 6, 2, 7, 9, 2, 5, 2, 3, 2, 2, 8, 9, 2, 4, 10, 7, 4, 12, 10, 6]
Average waiting time of 100 sequences: 5.480


### As we see above, the expected waiting time for 'RY' when P(R)=0.75 is around 5.33 - which is similar to my calculated result