In [None]:
import pandas as pd
import numpy as np
import scipy.optimize as opt
from itertools import count

In [None]:
dataset = pd.DataFrame({'age': [1,2,1,1,1,2,1],
                        'gender': ['f','m','m','m','f','f','f'],
                        'choice': [0,0,1,1,1,1,0],
                        'tt_1': np.random.rand(7),
                        'tt_2': np.random.rand(7)})

In [None]:
# Generate the indices of the betas, to retrieve their values from the array used by the optimizer
# The optimizer unfortunately does not handle named data structures
index = count()
TT = next(index)

utilities = {
    0: lambda b, d: b[TT] * d['tt_1'],
    1: lambda b, d: b[TT] * d['tt_2']
}

def create_betas(**kwargs):
    return pd.Series(list(kwargs.values()),
                     kwargs.keys(),
                     dtype=np.float64)
    
betas = create_betas(tt=0)

In [None]:
def neg_log_likelihood(betas, utilities, choices, df):
    print(betas)
    if len(choices) != df.shape[0]:
        raise Exception('number of choices {} is different from number of observations {}'.format(len(choices), df.shape[0]))
    
    utility_values = pd.DataFrame({c: utilities[c](betas, df) for c in utilities.keys()})
    chosen_utility = utility_values.lookup(range(len(choices)), choices)
    
    # Numerical trick to avoid overflows in the sum of exponentials
    max_util = utility_values.max().max()
    logsums = max_util + np.log(np.exp(utility_values - max_util).sum(axis=1))
    loglikelihoods = chosen_utility - logsums
    
    # the function is passed to a *minimizer*, so do not forget to add a minus!
    return -np.sum(loglikelihoods)

In [None]:
opt.minimize(neg_log_likelihood, x0=[0], args=(utilities, dataset.choice, dataset))