# Coding 

### Convert a decimal number to binary 

In [None]:
def decimal_to_binary(N):
    binary_digits = []
    while N > 0:
        binary_digits.append(N % 2)
        N /= 2
    return binary_digits

### Write a program to print out the power set of a set.

The power set of a set S is the set of all possible subsets of S inclusing the empty set and S itself.

In [None]:
def get_powerset(S):
    # Make sure S is a list
    S = list(S)
    
    # Remove one element from S
    e = [S.pop()]
    
    # Get the powerset of S
    if len(S) == 0:
        pow_S = [S]
    else:
        pow_S = get_powerset(S)
    
    # Add e to each item in the powerset of S
    pow_Se = [e+s for s in pow_S]
    
    # Get the full powerset by combining the previous two sets
    powerset = pow_S + pow_Se
    
    return powerset

In [None]:
get_powerset([7,3,8])

### Find the optimal time to buy and sell a stock in order to maximize profit (or minimize loss) given a time series of stock prices. You must buy before you sell (can't be the same day).

In [None]:
def find_optimal_transaction_points(stock_vals):
    # Initialize using first and second values in time series
    running_min = stock_vals[0]
    max_profit = stock_vals[1] - stock_vals[0]
    t_buy = 0
    t_sell = 1
    t_min = 0

    # Loop through time series
    for t in range(1, len(stock_vals)):
        current_value = stock_vals[t]
        profit = current_value - running_min

        if profit > max_profit:
            t_buy = t_min
            t_sell = t
            max_profit = profit

        if current_value < running_min:
            running_min = current_value
            t_min = t
    
    return max_profit, t_buy, t_sell

In [None]:
import numpy as np
vals = np.random.random(10) * 10
print(find_max_diff(vals))
print(find_optimal_transaction_points(vals))

### Find the square root of a number without using the square root function.

In [None]:
#######################
# Binary search method
#######################
def calc_sqrt_binary_search(n, tol=0.01):
    # Initialize variables
    n_iter = 0
    if n < 0:
        raise ValueError('Negative numbers not supported.')
    elif n == 0:
        factor1 = 0.0
        factor2 = 0.0
    elif n > 1:
        factor1 = 1.0
        factor2 = n
    else:
        factor1 = n
        factor2 = 1.0
        
    # Calculate initial guess
    sqrt = (factor1 + factor2) / 2.0
    n_est = sqrt**2
    
    # Iterate
    while abs(n_est - n) / float(n) > tol:
        # Update factors
        if n_est > n:
            factor2 = sqrt
        else:
            factor1 = sqrt
            
        # Calculate new values
        sqrt = (factor1 + factor2) / 2.0
        n_est = sqrt**2
        n_iter += 1
    
    return sqrt, n_iter

In [None]:
#####################################
# Newton-Raphson root finding method
#####################################
def calc_sqrt_newton_raphson(n, tol=0.01):
    # Initialize variables
    if n < 0:
        raise ValueError('Negative numbers not supported.')
    n_iter = 0
    sqrt = 0.0 if n == 0 else 1.0
    factor = n
    n_est = sqrt**2
    
    # Iterate
    while abs(n_est - n) / float(n) > tol:
        # Update factors
        sqrt = (sqrt + factor) / 2.0
        factor = n / sqrt
        
        # Calculate new values
        n_est = sqrt**2
        n_iter += 1
        
    return sqrt, n_iter

### Find all "curious numbers". A curious number is a number that is equal to the sum of the factorial of each of its digits.

In [None]:
######################
# Calculate factorial
######################
# More or less language agnostic version
def calc_factorial1(n):
    fac = 1
    for i in xrange(1, n+1):
        fac *= i
    return fac

# Python/numpy specific version (overflows for bigger values of n)
import numpy as np
def calc_factorial2(n):
    return np.prod(xrange(1, n+1))

In [None]:
###############################################
# Determine whether or not a number is curious
###############################################
def is_curious(n):
    factorial_sum = 0
    n2 = n
    
    while n2 > 0:
        digit = n2 % 10
        factorial_sum += calc_factorial1(digit)
        n2 //= 10
    
    if factorial_sum == n and n > 0:
        return True
    else:
        return False

In [None]:
###########################
# Find all curious numbers
###########################
def find_all_curious():
    # Find upper bound on possible curious numbers
    # 10^d = 9! * d, where d is number of digits in number
    n_max = calc_factorial1(9)*7

    # Loop through all possible numbers
    curious_numbers=[]
    for n in range(n_max+1):
        if is_curious(n):
            curious_numbers.append(n)
    
    return curious_numbers

In [None]:
######################################
# Find all curious numbers, optimized
######################################
import math
def get_digit_list(n):        
    # Initialize variables
    digit_list = [0] if n == 0 else [0]*(int(math.log10(n))+1)
    ind = len(digit_list) - 1
    
    while n > 0:
        digit = n % 10
        n //= 10
        digit_list[ind] = digit
        ind -= 1
        
    return digit_list

def find_all_curious2():
    # Optimizations:
    # 1. pre-calculate factorials of all digits
    # 2. don't check number if set of digits has already been identified as curious
    # 3. cache results for each unique set of digits encountered, check cache before calculating sum
    
    # Find upper bound on possible curious numbers
    # 10^d = 9! * d, where d is number of digits in number
    n_max = calc_factorial1(9)*7
    
    # Create dictionary of digit factorials
    digit_factorials = [calc_factorial1(i) for i in range(10)]
    
    # Initialize data structures
    curious_numbers = []
    curious_combos = set()
    combo_sums = {}
    
    # Loop through all possible numbers
    for n in range(n_max+1):
        # Get list of digits
        digit_list = get_digit_list(n)
        digit_list_sorted = tuple(sorted(digit_list))
        
        # See if number combination has already been identified as curious, if so skip
        if digit_list_sorted in curious_combos:
            continue  
        # See if current set of digits has been previously seen
        elif digit_list_sorted in combo_sums:
            running_sum = combo_sums[digit_list_sorted]
        # If not already seen, calculate sum
        else:
            running_sum = 0
            for digit in digit_list_sorted:                
                running_sum += digit_factorials[digit]
            combo_sums[digit_list_sorted] = running_sum
    
        # Test sum against number
        if running_sum == n:
            curious_numbers.append(n)
            curious_combos.add(digit_list_sorted) 
    
    return curious_numbers

In [None]:
%%time

find_all_curious3()

In [None]:
%%time

find_all_curious2()

In [None]:
%%time

find_all_curious()

### Given a list of potentially overlapping time periods, find the fraction of time that is unoccupied.

In [None]:
time_periods = [(7,11), (1,5), (20, 25), (9, 17), (8, 10)]  # (start_time, end_time)

In [None]:
def find_unoccupied_time(time_periods):
    # Initialize variables
    time_periods_sorted = sorted(time_periods, key=lambda x: x[0], reverse=False)
    unoccupied_time = 0
    latest_end_time = time_periods_sorted[0][1]

    # Loop over time periods in order of increasing start time
    for time_period in time_periods_sorted:
        # Initialize variables
        current_start_time = time_period[0]
        current_end_time = time_period[1]

        # Check for a gap
        if current_start_time > latest_end_time:
            unoccupied_time += current_start_time - latest_end_time

        # Update latest occupied time
        latest_end_time = max(latest_end_time, current_end_time)

    return unoccupied_time

In [None]:
find_unoccupied_time(time_periods)

### Given a list of potentially overlapping time periods, find the maximum number of overlaps at the same time. 

In [None]:
time_periods = [(7,11), (1,5), (20, 25), (9, 17), (8, 10)]  # (start_time, end_time)

In [None]:
def find_max_overlaps(time_periods):
    # Initialize variables
    time_periods_sorted = sorted(time_periods, key=lambda x: x[0], reverse=False)
    active_end_times = []
    max_active_meetings = 0

    for time_period in time_periods:
        # Initialize variables
        current_start_time = time_period[0]
        current_end_time = time_period[1]

        # Add latest meeting to active list
        active_end_times.append(current_end_time)

        # Prune meetings that are no longer active
        active_end_times = [end_time for end_time in active_end_times if end_time > current_start_time]

        # Update max active meetings 
        max_active_meetings = max(max_active_meetings, len(active_end_times))
    
    return max_active_meetings

In [None]:
find_max_overlaps(time_periods)

## Find unknown bit string 

You have an unknown fixed bit string of length N (secret). You can query it via xor function that, for a given query computes sum( secret ^ query ^ noise), where query is the bit string of the length N, and noise is a random bit string which is mostly zeros, but can have 1's with small probability (and sum computes number of non-zero bits). Write a function that finds secret.

In [102]:
import random

def xor_bits(b1, b2):
    return [bb1 ^ bb2 for bb1, bb2 in zip(b1, b2)]

def xor_sum(b1, b2, noise_level=0):
    noise = [random.random() < noise_level for i in range(len(b1))]
    return sum(xor_bits(xor_bits(b1, b2), noise))

# Probably naive solution
def find_secret(secret, noise_level=0.1, num_avg=100):
    secret_revealed = []
    
    for i in range(len(secret)):
        diff = 0
        
        for n in range(num_avg):
            b = [False]*len(secret)
            sum1 = xor_sum(b, secret, noise_level=noise_level)
            b[i] = True
            sum2 = xor_sum(b, secret, noise_level=noise_level)
        
            diff += sum1 - sum2
            
        diff /= float(num_avg)
        
        secret_revealed.append(diff > 0)
        
    return secret_revealed
        
secret = [random.random() > 0.5 for i in range(100)]
revealed = find_secret(secret, 0.1, 400)

sum([b1 != b2 for b1, b2 in zip(secret, revealed)])

0

In [7]:
sum([True, False, False])

1

### Perform an inner join on two arrays 

# Experimental design 

## Metrics 

- churn
- usage broken down by location and time of day
- monthly active users

## Methodologies

- A/B testing
- Multi-armed bandit

## Analysis

*A/B testing*
- Compare metrics between control and treatment group
- Identify useful effect size
  - Absolute effect size
  - Relative effect size (cohen's d)
- Determine statistical power
  - Use desired power, effect size, statistical significance level to determine number of samples needed by experiment

# Math/statistics 

### Fit model given data set with incomplete/missing data  

- Use EM algorithm for maximum likelihood parameters.
 - Link to original EM paper http://web.mit.edu/6.435/www/Dempster77.pdf

### Show how maximum likelihood solution for linear model reduces to least square solution

- Formulate relationship between target and feature variables 
 - y = Bx + E, where E = N(0, sigma)
- Calculate likelihood of target given features and model:
 - p(D|theta) = p(y|x, B) = PROD_i->N{p(y_i|x_i, B)}
- Convert to negative log likelihood to turn product into sum:
 - NLL = -log(p(D|theta)) = -SUM_i->N{log(p(y_i|x_i, B))}
- Use PDF of N(0, sigma) to calculate probabilities of individual points
 - p(y_i|x_i, B) = (2*pi*sigma^2)^-1/2 * exp(-(y_i - Bx_i)^2/sigma^2)
- Plug probabilities into NLL and simplify:
 - NLL = -N/2 * log(1/(2*pi*sigma^2)) - (1/(2*sigma^2))*SUM_i->N{(y_i - Bx_i)^2}
- Differeniate NLL w/ respect to B, set equal to 0, solve for beta:
 - B = (X_tX)^-1X_ty
- Link to explanation https://www.quantstart.com/articles/Maximum-Likelihood-Estimation-for-Linear-Regression