In [None]:
###
# Lab 2: Analysis of Runtimes
# http://rosalind.info/classes/581/
###

# borrowing functions from week 1
import numpy as np

def fibonacci(n):
    f = np.ones(n)
    for i in range(2,n):
        f[i] = f[i-1] + f[i-2]
    return f[n-1]

def recursive_fibonacci(n):
    if n == 1 or n == 2:
        return 1
    else:
        a = recursive_fibonacci(n-1)
        b = recursive_fibonacci(n-2)
    return a + b


In [None]:
'''
Example 1. Implementation of General Purpose Single-Array Integer-Based Doubling Experiment for Fibonacci and 
Any Other Single-Array Integer Function
Background: A quick in silico experiment one can conduct to test the complexity scaling of a particular function such
as fibonacci (which we call a 'Single-Array Integer-Based function') is to build a general purpose 'wrapper' function
that takes in that particular function and a desired maximum array size and iterate over array sizes that double 
(simulating problem complexity) and record how the runtime changes as a function of input size.
'Fun' reading: https://www.nayuki.io/page/fast-fibonacci-algorithms

Given: A desired starting array size (e.g. n = 2), a maximum bound (n_max) and a function func (e.g. fibonacci(n))

Return: A matrix/nested list/2D np array/dict where each row is an 'observation' i.e. the first entry is the problem
size and the second entry is the runtime.

Problem:
(a) Write a function time_trial(n, func) that takes in a particular problem size and function func and returns the time
it took for that problem to run. The function time.process_time() may be useful here.

(b) Write a function doubling_test(n, n_max, func) that takes in n, n_max and func using the helper function above to 
return a list of tuples [[2, 100],[4, 400],[8, 1600],...] where the first entry of each nested tuple is the size of the 
array and the second entry is the run time. 


'''
import time

# time_trial() has two arguments
# 1. func() - takes an integer performs a calculation on an array generated from that integer
# ex. func() could be fibonacci(n) which generates the fibonacci sequence and returns the n-th fibonacci number
# 2. n - integer value of the problem size, here the argument for func() above
# returns the time it takes to generate and solve 

def time_trial(n, func):
    start = time.process_time()
    output = func(n)
    fin = time.process_time()
    return fin - start 

# doubling_test() has three arguments
# 1. n - integer for problem size (starting, e.g. n = 2)
# 2. n_max - the largest value for problem size (ending, e.g. n_max = 128)
# returns a data matrix of value pairs; the problem size and corresponding run time

def doubling_test(n, n_max, func):
    data = []
    while n < n_max: # n = 2, 4, 8, ... 
        current = time_trial(n, func)
        print(n, current)
        # [2, 1e-05], [4, 2e-05], [8, 4e-05] etc.
        data.append([n, current])
        n *= 2
    return np.array(data)

print('The doubling test for recursive fibonacci')
recursive_data = doubling_test(2, 50, recursive_fibonacci)

print('The doubling experiment for faster fibonacci')
interative_data = doubling_test(2, 2000, fibonacci)


In [None]:
'''
Example 2a. Study of Fibonacci Complexity Scaling, Plotting Results Linear and Log-Log in Matplotlib
Here we plot the runtimes for the recursive data on linear and log-log plot scales.
'''
%matplotlib inline
import matplotlib.pyplot as plt

n_data = recursive_data[:,0]
time_data = recursive_data[:,1]

plt.figure(figsize=(6, 4))
plt.scatter(n_data, time_data)
plt.xlim(0, 40)
plt.ylim(-0.1, 2)

#plt.scatter(np.log(n_data), np.log(time_data))
#plt.xlim(0, 5)
#plt.ylim(-15, 10)

In [None]:
'''
Example 2b. Study of Fibonacci Complexity Scaling, Plotting Results Linear and Log-Log in Matplotlib
'''
# a handy one-liner for showing plots in the page; must be called before import
%matplotlib inline
import matplotlib.pyplot as plt

# slice data
n_data = interative_data[:,0]
time_data = interative_data[:,1]

plt.figure(figsize=(6, 4))
plt.scatter(n_data, time_data)
plt.xlim(-10, 1500)
plt.ylim(-0.0001, 0.005)

#plt.scatter(np.log(n_data), np.log(time_data))
#plt.xlim(0, 8)
#plt.ylim(-12, 0)



In [None]:
'''
Example 3. Two-sum and improvements
Background: Recall the in class implementation of two-sum naive that seems intuitively inefficient and can be shown
to run in O(n^2) (why?). In the next problem, we will construct a faster version.
'''
# set seed
np.random.seed(0)

# visualize on 2D array
def two_sum_naive(n):
    # lst = [1, -31, 26, 83, ... ] length n 
    lst = np.random.randint(low = -100, high = 100 + 1, size = n)
    count = 0
    for i in range(len(lst)):            # i = 0, lst[i] = 1
        for j in range(i + 1, len(lst)): # i = 0, j = 1 -> lst[i] = 1, lst[j] = -31
            if (lst[i] + lst[j] == 0):   # i = 0, j = 2 -> lst[i] = 1, lst[j] = 26 ... etc.
                count += 1
    return count

print('The doubling time for two sum naive')
naive_data = doubling_test(2, 4000, two_sum_naive)

n_data = naive_data[:,0]
time_data = naive_data[:,1]

%matplotlib inline
plt.figure(figsize=(6, 4))
#plt.scatter(n_data, time_data)
plt.scatter(np.log(n_data), np.log(time_data))

In [None]:
'''
Problem 1. Two-sum and improvements (continued)
Given: an array size n

Return: a function two_sum_improved(n) that builds a random array of integers - here we use 
np.random.randint(low = -100, high = 100 + 1, size = n) 
- and returns the number of pairs that sum to 0 (excluding duplicates).

Problem:
(a) Construct a list lst of random integers from -100 to 100 of size n and sort them.
(b) Using the provided binarySearch function (that takes in a list and a particular item of interest for which we are 
searching), construct a function two_sum_improved(n) that takes in a list and uses the lines from (a) to return the 
number of pairs that sum to 0. 

'''
# binarySearch - take for granted now that this runs in O(log(n)) 
# example: binarySearch([1,2,4], 2) returns True
# where alist = [1,2,4] and item = 2

def binarySearch(alist, item):
    first = 0
    last = len(alist) - 1
    found = False
    while first <= last and not found:
        midpoint = (first + last)//2
        if alist[midpoint] == item:
            found = True
        else:
            if item < alist[midpoint]:
                last = midpoint - 1
            else:
                first = midpoint + 1
    return found

# TODO
# generate random list of integers size n
# sort list
# initialize counter 
# ...

def two_sum_improved(n):
    lst = np.random.randint(low = -100, high = 100 + 1, size = n)
    lst.sort() # in place, ascending order, O(n log n) (Timsort, merge sort)
    count = 0
    # fill in here


print('The doubling time for two sum improved')
improved_data = doubling_test(2, 4000, two_sum_improved)

n_data = improved_data[:,0]
time_data = improved_data[:,1]

plt.figure(figsize=(6, 4))
plt.scatter(n_data, time_data)
#plt.scatter(np.log(n_data), np.log(time_data))

In [None]:
'''
Example 4. 3-Sum Improvements
Recall the implementation of three_sum_naive in class that ran in O(n^3). Here we will use the same logic to improve upon
and develop a faster 3-sum.
'''

def three_sum_naive(n):
    lst = np.random.randint(low = -100, high = 100 + 1, size = n)
    count = 0
    for i in range(len(lst)):
        for j in range(i + 1, len(lst)):
            for k in range(j + 1, len(lst)):
                if (lst[i] + lst[j] + lst[k] == 0):
                    count += 1
    return count


# let us use the same logic to improve this; what is the new runtime?


print('The doubling time for three sum naive')
naive_data = doubling_test(2, 500, three_sum_naive)

n_data = naive_data[:,0]
time_data = naive_data[:,1]

plt.figure(figsize=(6, 4))
plt.scatter(n_data, time_data)
#plt.scatter(np.log(n_data), np.log(time_data))




In [None]:
'''
Problem 2. 3-Sum Improvements
Use binary search and analogous logic to improve this; write a function three_sum_improved(n) that computes the number of
triplets in a list that sum to 0 with a runtime faster than O(n^3).
'''
def three_sum_improved(n):
    lst = np.random.randint(low = -100, high = 100 + 1, size = n)
    lst.sort() # in place, ascending order, O(n log n) (Timsort, merge sort)
    count = 0
    
    # fill in here



print('The doubling time for three sum improved')
improved_data = doubling_test(2, 500, three_sum_improved)

n_data = improved_data[:,0]
time_data = improved_data[:,1]

plt.figure(figsize=(6, 4))
plt.scatter(n_data, time_data)
#plt.scatter(np.log(n_data), np.log(time_data))




In [None]:
'''
Problem 3.
Provide the big-O times for the following code blocks.
The following formula may be useful: n + nr + nr^2 + nr^3 + ... = n / (1 - r)

a.
def problem_a(n):
    sum = 0 
    i = n
    while i > 0:
        for j in range(n):
            sum += 1        
        i = i // 2
    return sum 

b.
def problem_b(n):
    sum = 0
    k = n
    while k > 0:
        for i in range(k):
            sum += 1
        k = k // 2
    return sum
    
c.    
def problem_c(n):
    sum = 0
    i = n/2
    while i < n:
        j = 1
        while j < n:
            k = 1
            while k < n:
                sum += 1
                k = k * 2
            j = j * 2    
        i += 1
    return sum 
'''

In [None]:
'''
Problem 4.
Give tilde approximations for the following quantities:
a. 2n + 3
b. 2log(n) + n + 3n^3 + 3
c. 1/n + log(n)
d. n^4 / n^3
e. n^2 + 2^n
'''

In [None]:
'''
HW #9
Write a function birthday(n) that takes an integer n and uses np.random.randint(low = 0, high = n) to
generate a random sequence of integers between 0 and n-1. Run experiments to validate the hypothesis that the number 
of integers generated before the first repeated value is found is ~ sqrt ( pi * n / 2).

a. Write a command that generates a random number between 0 and n - 1.

b. Write a function birthday(n) that returns the number of values generated until a value is repeated as a function of the range of possible values n.

c. Write a function birthday_sim(n) that repeats birthday(n) a total of n_sim = 10000 times.

d. Show that as n increases (e.g. with a doubling experiment), from n = 2 to n = 1000,
the value of birthday_sim(n) approaches sqrt(pi * n / 2).
'''
 
    