# Problem statement

Let $X \in \{0,1\}$ be a random variable, that satisfies $$prob(X=1) = p = \frac {exp(a^{T}x+b)}{1+exp(a^{T}x+b)} $$  where $x \in \mathbb{R}^n$ is a vector of variables that affect the probability, and $a$ and $b$ are known parameters. We can think of $X = 1$ as the event that a consumer buys a product, and $x$ as a vector of variables that affect the probability, e.g., advertising effort, retail price, discounted price, packaging expense, and other factors. 
The variable $x$, which we are to optimize over, is subject to a set of linear constraints, $Fx \preceq g$.

Formulate the following problems as convex optimization problems.


1)  _Maximizing buying probability_ The goal is to choose $x$ to maximize $p$.

2)  _Maximizing expected profit._ Let $c^T x+d$ be the profit derived from selling the product, which we assume is positive for all feasible x. The goal is to maximize the expected profit, which is $p\cdot(c^T x + d)$.  


In [240]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sps
%matplotlib inline

In [275]:
a = np.matrix([1,2]).T
a
b = 1
c = np.matrix([2,1]).T
d = 2
start_a = np.matrix([5, 3]).T
start_b = np.matrix([4, 3]).T
stop_precision = 0.001
epsilon = 0.01

In [276]:
def grad_descent(grad, start_x, stop_precision=0.0001, epsilon=0.01):
    work_x = start_x
    prev_x = 0
    prev_precision = 1
    for i in range(1000):
        prev_x = work_x
        work_x = work_x - epsilon*grad(work_x)
        prev_precision = np.max(np.absolute(prev_x - work_x))
        if prev_precision < stop_precision:
            break
    return work_x

In [277]:
grad_descent(grad_b, start_b)

matrix([[39.13676488],
        [20.56847179]])

In [278]:
def grad_a(x):
    return -a

def grad_b(x):
    nom = np.exp(a.T*x + b)[0, 0]
    denom = 1 + nom
    if c.T*x + d <= 0:
        raise Exception('grad_b error: Divided by zero')
    return -a + nom*a/denom - c/(c.T*x + d)

In [285]:
# Hesse matrix
def hesse_a(x):
    return 0
def hesse_b(x):
    nom = np.exp(a.T*x + b)[0,0]
    if c.T*x + d <= 0:
        raise Exception('grad_b error: Divided by zero')
    return c*c.T/(c.T*x+d)**2 + nom/(nom+1)**2*a*a.T

In [309]:
def newton_method(grad, hesse, start_x, stop_precision=0.0001, step_size=1, iterations=1000):
    """
    Minimizes function using Newton method
    grad - gradient of a function to minimize
    start_x - starting point
    """
    curr_x = start_x
    for i in range(iterations):
        inverse_hesse = hesse(curr_x).I
        gradient = grad(curr_x)
        
        delta_x = - inverse_hesse * gradient
        lambda_square = gradient.T * inverse_hesse * gradient
        if lambda_square/2 < stop_precision:
            return curr_x + delta_x
        curr_x = curr_x + delta_x
    return curr_x

In [299]:
def quasi_newton_method(grad, start_x, stop_precision=0.0001, step_size=1, iterations=10000):
    """
    Minimizes function using quasi Newton method
    grad - gradient of a function to minimize
    start_x - starting point
    """
    def update_inverse_hesse(prev, s, y):
        v = s - prev*y
        if v.T*y == 0 or v.T*y != v.T*y:
             raise Exception('hesse error: Divided by zero %f' % v.T*y)
        return prev + v*v.T/(v.T*y)
    
    curr_x = start_x

    inverse_hesse = np.identity(start_x.size) #hesse aproximation
    for i in range(iterations):        
        delta_x = -inverse_hesse * grad(curr_x)
        
        s = step_size * delta_x
        y = grad(curr_x+s) - grad(curr_x)
        
        inverse_hesse = update_inverse_hesse(inverse_hesse, s, y)
        curr_x = curr_x + s
        
        precision = np.max(np.absolute(s))
        if precision < stop_precision:
            break

    return curr_x

In [291]:
quasi_newton_method(grad_b, start_x=start_b)

  if __name__ == '__main__':


TypeError: only size-1 arrays can be converted to Python scalars

In [296]:
def line_search(r, x, mu, z_p, z_d, c, beta):
    alpha = 1
    while norm(r(x + alpha * z_p, mu + alpha * z_d)) >= (1 - c * alpha) * norm(r(x, mu)): 
        alpha *= beta   
    return alpha

In [310]:
newton_method(grad_b, hesse_b, start_b)

LinAlgError: singular matrix

In [293]:
np.matrix(np.identity(2)).I

matrix([[ 1., -0.],
        [ 0.,  1.]])

In [294]:
hesse_b(start_b).I

matrix([[  6728.01574799, -13343.36482931],
        [-13343.36482931,  26630.39632529]])