# Handout 01
#### Sara Díaz del Ser  
#### _(in collaboration with Paula Romero)_

In [1]:
from math import factorial, sqrt, pi, e
from tqdm.notebook import tqdm
import random
from collections import defaultdict, Counter
import timeit
import numpy as np

### Ex. 1 _(5 pts)_ Approximations to π

#### (a) _(2 pts)_ **Srinivasa Ramanujan calculates π**

The mathematician Srinivasa Ramanujan found an infinite series that can be used to
generate a numerical approximation of $\pi$.

Write a function called estimate_pi that uses this formula to compute and return an
estimate of π. It should use a while loop to compute terms of the summation until the
last term is smaller than 1e-15 (which is Python notation for $10^{−15}$). (‘Term’ refers
to the expression after the summation sign, not the summation itself.) You can check
the result by comparing it to math.pi. The error should be less than $10^{−15}$.

In [2]:
def estimate_pi():
    """Estimate pi using Srinivasa Ramanujan's infinite series"""
    sum = 0
    k = 0
    while (k < 1e-15):
        sum += (factorial(4 * k) * (1103 + 26390 * k)) / ((factorial(k) ** 4) * (396 ** (4 * k)))
        k += 1

    pi_inverse = 2 * np.sqrt(2) * sum / 9801
    return (pi_inverse) ** (-1)


In [3]:
estimate_pi()


3.1415927300133055

In [4]:
# Calculate the error
error = np.pi - estimate_pi()
if error < 1e-15:
    print("This function's error is smaller than 1e-15.\nError: ", error)


This function's error is smaller than 1e-15.
Error:  -7.642351240733092e-08


#### (b) _(1 pt)_ **Leibniz’s alternat(-ive/-ing) series for π**

In [5]:
def leibniz_pi(max_iter):
    """Estimate pi using Leibniz’s alternative series"""
    sum = 0
    n = 0
    while (n < max_iter):
        sum += ((-1) ** n) / (2 * n + 1)
        n += 1
    return sum * 4

In [6]:
# Determine how many iterations leibniz_pi() needs to find pi
for i in tqdm(range(0, 10000), desc="Finding max iteration number"):
    if (round(leibniz_pi(max_iter=i), 2) == 3.14):
        print("Number found: ", i)
        break

Finding max iteration number:   0%|          | 0/10000 [00:00<?, ?it/s]

Number found:  152


In [7]:
# Calculate the error
error = np.pi - leibniz_pi(max_iter=152)
print("Pi: ", leibniz_pi(max_iter=152), "\nError for max_iter=152: ", error)

Pi:  3.1350137774059244 
Error for max_iter=152:  0.006578876183868765


#### (c) _(2 pts)_ **How a (bad but tenacious) dart player calculates $\pi$: the Monte-Carlo Technique**

In [8]:
def monte_carlo_pi(size: int):
    """Estimate pi using the Monte Carlo Technique"""
    circle = 0
    total = 0

    for i in range(size):
        # The randomly generated points must be inside the limit region
        x = random.uniform(-1, 1)
        y = random.uniform(-1, 1)

        # Check if each point is in the circle and add to that count
        if (x ** 2 + y ** 2 <= 1):
            circle += 1

        # Add to total
        total += 1

    return 4 * (circle / total)


In [9]:
# For each differently-sized sample of points
sizes = [10 ** (i) for i in range(1, 7)]

for size in tqdm(sizes):
    # Estimate pi
    estimated_pi = monte_carlo_pi(size=size)
    print(f"Sample of size {size} estimates pi as: {estimated_pi}.\nMargin error of {pi - estimated_pi}\n")

  0%|          | 0/6 [00:00<?, ?it/s]

Sample of size 10 estimates pi as: 4.0.
Margin error of -0.8584073464102069

Sample of size 100 estimates pi as: 2.92.
Margin error of 0.2215926535897932

Sample of size 1000 estimates pi as: 3.164.
Margin error of -0.02240734641020703

Sample of size 10000 estimates pi as: 3.1548.
Margin error of -0.01320734641020671

Sample of size 100000 estimates pi as: 3.14804.
Margin error of -0.006447346410206833

Sample of size 1000000 estimates pi as: 3.141372.
Margin error of 0.00022065358979306282



### Ex. 3 _(4 pts)_ The Birthday Paradox


#### (a) _(1 pts)_ **Write a function that checks for duplicates**

In [10]:
def has_duplicates(input_list:list) -> bool:
    """Checks if list has duplicates"""
    return (len(set(input_list)) != len(input_list))

In [11]:
# Example
list_1, list_2 = [2,3,4,2], [2,3,4]
print(f"Does the list: {list_1} have duplicates? {has_duplicates(list_1)}")
print(f"Does the list: {list_2} have duplicates? {has_duplicates(list_2)}")

Does the list: [2, 3, 4, 2] have duplicates? True
Does the list: [2, 3, 4] have duplicates? False


In [12]:
# Instead of using sets, we can build a function from scratch to find duplicates
def has_duplicates_from_scratch(input_list:list) -> bool:
    """Checks if list has duplicates without using set()"""
    unique = []
    for i in range(len(input_list)):
        if input_list[i] in unique:
            # has duplicates, return True
            return True
        else:
            # add to unique list
            unique.append(input_list[i])
    return False

In [13]:
# Example
list_1, list_2 = [2,3,4,2], [2,3,4]
print(f"Does the list: {list_1} have duplicates? {has_duplicates_from_scratch(list_1)}")
print(f"Does the list: {list_2} have duplicates? {has_duplicates_from_scratch(list_2)}")

Does the list: [2, 3, 4, 2] have duplicates? True
Does the list: [2, 3, 4] have duplicates? False


#### (b.i) _(1 pts)_ **Estimate the probability on the basis generating 10000 trials of n = 27 birthdays and determine the fraction of trials where at least two people share a birthday.**

In [14]:
def bday_probability(n:int=27,trials:int=10000) -> int:
    """Estimates probability of at least two people having the same birthday based on
    10000 randomly-generated trials"""
    matches = 0
    for k in range(trials):
        # Generate 365 random birthdays in a list
        birthdays = [ random.randint(1,365) for i in range(n)]
        # Check for duplicates
        if has_duplicates(birthdays):
            matches +=1
    return matches/trials

#### (b.ii) _(1 pts)_ **Compare your estimates to the approximated probability and the exact probabiliity**

In [15]:
def approx_bday_prob(n:int=27, m:int=365) -> int:
    """Approximates probability of two people having the same birthday"""
    return 1 - e**(-(n**2)/(2*m))

In [16]:
def exact_bday_prob(students:int=27, m:int=365) -> int:
    """Calculates the exact probability of at least two people sharing the
    same birthday"""
    prob = 1
    for n in range(students+1):
        prob *= (m - n)/m
    return 1-prob

In [17]:
# Comparison of all thrree methods
print(f"Comparison of the methods of determining the probability of two bithdays coinciding on the same day, "
      f"in a class of 27 students\n")
print(f"a) Generation of 10000 random trials: {bday_probability()}\n")
print(f"b) Approximated probability: {approx_bday_prob()}\nDifference with a): {abs(bday_probability() - approx_bday_prob())}\n")
print(f"c) Exact probability: {exact_bday_prob()}\nDifference with a): {abs(bday_probability() - exact_bday_prob())}\n")


Comparison of the methods of determining the probability of two bithdays coinciding on the same day, in a class of 27 students

a) Generation of 10000 random trials: 0.6233

b) Approximated probability: 0.6316162690635074
Difference with a): 0.003716269063507416

c) Exact probability: 0.6544614723423995
Difference with a): 0.02476147234239945



#### (b.iii) _(1 pts)_ **Estimate the probability of at least three people have a non-unique birthday**

In [18]:
def count_duplicates(input_list:list) -> bool:
    """Counts the number of non-unique items in a list"""
    # Find the non-unique instances in the list, and calc total
    return sum([each for each in Counter(input_list).values() if each > 1 ])

In [19]:
def birthday_paradox(n:int=27,trials:int=10000,non_unique:int=3) -> int:
    """Estimates probability of at least thee people having a non-unique birthday"""
    matches = 0
    for k in range(trials):
        # Generate 365 random birthdays in a list
        birthdays = [ random.randint(1,365) for i in range(n)]
        # Check if there are at least 3 non-unique birthdays
        if count_duplicates(input_list=birthdays)>=3:
            matches +=1
    return matches/trials

In [20]:
# Estimated probability of at least three people in a 27 person class sharing a non-unique birthday
birthday_paradox()

0.2413

### Ex. 5 _(4 pts)_ Anagrams

#### (a) _(3 pts)_ **Write a program to find anagrams in a list.** 
Limit your output to words having at least 6 anagrams.


In [21]:
def anagram_finder(file:str, limit:int=6) -> dict:
    """Find all anagrams in a list of words"""
    with open (file, 'r') as f:
        word_list = f.read().split()

        processed_list = [ ("".join(sorted(list(word))), word) for word in word_list ]

        anagrams = defaultdict(list)
        for key, word in processed_list:
            anagrams[key].append(word)

        return [ word_list for word_list in anagrams.values() if len(word_list)>limit ]

In [22]:
start = timeit.default_timer()
results = anagram_finder(file="words.txt")
stop = timeit.default_timer()
print('Time: ', stop - start)

Time:  0.3808194839999999


In [23]:
# Print out results 
print(f'\nResults:\n')
print_lst = [ print(each) for each in results ]



Results:

['abets', 'baste', 'bates', 'beast', 'beats', 'betas', 'tabes']
['acers', 'acres', 'cares', 'carse', 'escar', 'races', 'scare', 'serac']
['alerts', 'alters', 'artels', 'estral', 'laster', 'ratels', 'salter', 'slater', 'staler', 'stelar', 'talers']
['algins', 'aligns', 'lasing', 'liangs', 'ligans', 'lingas', 'signal']
['amens', 'manes', 'manse', 'means', 'mensa', 'names', 'nemas']
['anestri', 'nastier', 'ratines', 'retains', 'retinas', 'retsina', 'stainer', 'stearin']
['angriest', 'astringe', 'ganister', 'gantries', 'granites', 'ingrates', 'rangiest']
['apers', 'asper', 'pares', 'parse', 'pears', 'prase', 'presa', 'rapes', 'reaps', 'spare', 'spear']
['ardebs', 'bardes', 'beards', 'breads', 'debars', 'sabred', 'serdab']
['ares', 'arse', 'ears', 'eras', 'rase', 'sear', 'sera']
['aridest', 'astride', 'diaster', 'disrate', 'staider', 'tardies', 'tirades']
['ariled', 'derail', 'dialer', 'laired', 'railed', 'redial', 'relaid']
['arles', 'earls', 'lares', 'laser', 'lears', 'rales', 

#### (b) _(1 pts)_ **Modify your code so it prints the largest set of anagrams first**

In [24]:
def anagram_finder(file:str, limit:int=6,large_first:bool=False) -> dict:
    """Find all anagrams in a list of words"""
    with open (file, 'r') as f:
        word_list = f.read().split()

        processed_list = [ ("".join(sorted(list(word))), word) for word in word_list ]

        anagrams = defaultdict(list)
        for key, word in processed_list:
            anagrams[key].append(word)

        # Make sure it can be printed in order
        if large_first:
            anagrams = { x: anagrams[x] for x in sorted(anagrams, key=lambda x: len(anagrams[x]), reverse=True) }

        return [ word_list for word_list in anagrams.values() if len(word_list)>limit ]

In [25]:
start = timeit.default_timer()
# results = anagram_finder(file="words.txt", large_first=False)
results_sorted = anagram_finder(file="words.txt", large_first=True)
stop = timeit.default_timer()
print('Time: ', stop - start)

Time:  0.3163872000000003


In [26]:
# Print out results 
print(f'\nResults:\n')
print_lst = [ print(each) for each in results_sorted ]



Results:

['alerts', 'alters', 'artels', 'estral', 'laster', 'ratels', 'salter', 'slater', 'staler', 'stelar', 'talers']
['apers', 'asper', 'pares', 'parse', 'pears', 'prase', 'presa', 'rapes', 'reaps', 'spare', 'spear']
['least', 'setal', 'slate', 'stale', 'steal', 'stela', 'taels', 'tales', 'teals', 'tesla']
['capers', 'crapes', 'escarp', 'pacers', 'parsec', 'recaps', 'scrape', 'secpar', 'spacer']
['estrin', 'inerts', 'insert', 'inters', 'niters', 'nitres', 'sinter', 'triens', 'trines']
['acers', 'acres', 'cares', 'carse', 'escar', 'races', 'scare', 'serac']
['anestri', 'nastier', 'ratines', 'retains', 'retinas', 'retsina', 'stainer', 'stearin']
['arles', 'earls', 'lares', 'laser', 'lears', 'rales', 'reals', 'seral']
['aspers', 'parses', 'passer', 'prases', 'repass', 'spares', 'sparse', 'spears']
['ates', 'east', 'eats', 'etas', 'sate', 'seat', 'seta', 'teas']
['carets', 'cartes', 'caster', 'caters', 'crates', 'reacts', 'recast', 'traces']
['earings', 'erasing', 'gainers', 'reagins'

#### (c) _(1 pts)_ **Which set of 8 letters contains the most anagrams and what are they?**
Hint:the solution has seven anagrams


In [27]:
# Find the ones with 8 letters
eight_letters = [ each for each in results if len(each)== 8 ]

# Print the biggest one
print(f"Set of 8 letters that contains the most anagrams:\n{max(eight_letters, key=len)}")

Set of 8 letters that contains the most anagrams:
['acers', 'acres', 'cares', 'carse', 'escar', 'races', 'scare', 'serac']


### Ex. 4 _(2 pts)_ Making Triangles

#### _(3 pts)_ Imagine you are given a stick of length 1 (meter) and you break the stick randomly at two points breaking leaving you with 3 smaller sticks of random length. How likely is it that the three sticks can be combined to form 3 sides of a triangle?

Write Python code to simulate 1 000 000 trials splitting the stick randomly into 3 pieces and estimate the probability that a triangle can be formed from the three pieces

In [28]:
# Calculate probability of triangle
def is_triangle(list_num:list) -> bool:
    """Determine if three given sticks are a triangle"""
    angle = np.arccos(min(list_num[1],list_num[0])/max(list_num[1],list_num[0])) + \
            np.arccos(min(list_num[2],list_num[0])/max(list_num[2],list_num[0])) + \
            np.arccos(min(list_num[1],list_num[2])/max(list_num[1],list_num[2]))
    if round(angle) == round(np.pi):
        return True
    return False

def prob_is_triangle(trials:int=1e6)-> float:
    """Simulate 1 000 000 trials splitting the stick randomly into 3 pieces
    and estimate the probability that a triangle can be formed from the three pieces."""
    count = 0
    # Simulate 1e6 trials
    for i in tqdm(range(int(trials))):
        # Generate three random pieces of stick
        random_int = [ random.randint(1,100)/100 for i in range(3)]
        three = [ n/sum(random_int) for n in random_int ]
        if is_triangle(three):
            count+=1
    # Estimate the probability
    return count/trials


In [29]:
prob_is_triangle()

  0%|          | 0/1000000 [00:00<?, ?it/s]

0.465666

### Ex. 2 _(5 pts)_ Happy numbers

Happy numbers are defined by the following process: Start with a positive number. Replace the number with the sum of the squares of its digits and repeat until you reach the number 1 or the process enters a loop not involving the number 1. A number that reaches 1 is called a happy number all other numbers are unhappy. 

#### (a) _(2 pts)_ Find all happy numbers between 1 and 100 by writing a function `is_happy(n)` that checks whether a number is happy or unhappy. It should return `true` if the the number is happy and `false` otherwise.

In [30]:
def is_happy(n):
    """Checks if number is happy, using a WHILE LOOP"""
    num = set()
    while n != 1:
        n = sum(int(i)**2 for i in str(n))
        if n in num:
            return False
        num.add(n)
    return True

In [31]:
# Print out happy numbers from 1 to 100
happy_numbers = [ i for i in range(1,101) if is_happy(i) ]
happy_numbers

[1, 7, 10, 13, 19, 23, 28, 31, 32, 44, 49, 68, 70, 79, 82, 86, 91, 94, 97, 100]

#### (b) _(1 pt)_ Solve the problem (a) in two different ways using i. while-loops and ii. recursion.

In [32]:
def is_happy2(n):
    """Checks if number is happy, using a recursion"""
    n = sum(int(i)**2 for i in str(n))
    if n == 1:
        return True
    
    # If the result of the sum is not one, but it's a single digit [2,9], stop recursion
    elif n in range(2,10):
        return False
    
    # Continue recursion
    else:
        result = is_happy2(n)
        return result

In [33]:
happy_numbers = [ i for i in range(1,101) if is_happy2(i) ]
happy_numbers

[1, 7, 10, 13, 19, 23, 28, 31, 32, 44, 49, 68, 70, 79, 82, 86, 91, 94, 97, 100]

#### (c) _(2 pts)_ Modify (either the iterative or the recursive version of is_happy) to take an additional argument p (i.e. def how_happy(n,p): and instead squaring digits raise each digit to the power of p and then take the sum of the powers. 

In [34]:
def how_happy(n, p):
    """Checks how happy a number is, using a recursion"""
    n = sum(int(i)**p for i in str(n))
    if n == 1:
        return 'Happy'
    
    elif n == 4:
        return 'Almost happy'
    
    # If the result of the sum is not one, but it's a single digit [2,9], stop recursion
    elif (n in range(2,10)) and (n != 4):
        return 'Unhappy'
    
    # Continue recursion
    else:
        result = how_happy(n, p)
        return result

In [35]:
happy_numbers = [ how_happy(i, 2) for i in range(1,101) ]
happy_numbers

['Happy',
 'Almost happy',
 'Unhappy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Happy',
 'Almost happy',
 'Almost happy',
 'Happy',
 'Unhappy',
 'Unhappy',
 'Happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Happy',
 'Almost happy',
 'Unhappy',
 'Unhappy',
 'Happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Happy',
 'Almost happy',
 'Unhappy',
 'Happy',
 'Happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 'Almost happy',
 '