In [25]:
import os
import functools
import itertools

import pandas as pd
import numpy as np
from patsy import dmatrices
from scipy import stats

import estimagic

# Ordered Logit Example
We use a multidimensional least squares problem.
The outcome depends on a number of fixed observables.
We however only have conditional moments instead of individual level outcomes.
This is a very stylized example since the estimation problem is not really different and because we have moments for each set of 
observabes. In general such problems will involve endogenous and dynamic choices. To example is however sufficient to show the basic 
mechanics of standart error calculation.


## Functions

In [23]:
def ordered_logit_processing(formula, data):
    """Process user input for an ordered logit model."""
    # extract data arrays
    y, x = dmatrices(formula + " - 1", data, return_type="dataframe")
    y = y[y.columns[0]]

    # extract dimensions
    num_choices = len(y.unique())
    beta_names = list(x.columns)
    num_betas = len(beta_names)
    num_cutoffs = num_choices - 1

    # set-up index for params_df
    names = beta_names + list(range(num_cutoffs))
    categories = ["beta"] * num_betas + ["cutoff"] * num_cutoffs
    index = pd.MultiIndex.from_tuples(zip(categories, names), names=["type", "name"])

    # make params_df
    np.random.seed(5471)
    start_params = pd.DataFrame(index=index)
    start_params["value"] = np.hstack(
        [
            np.random.uniform(low=-0.5, high=0.5, size=len(x.columns)),
            np.arange(num_cutoffs) * 2,
        ]
    )
    start_params["group"] = start_params.index.get_level_values("type")

    # make constraints
    constr = [{"loc": "cutoff", "type": "increasing"}]

    # turn pandas objects into numpy arrays
    y_arr = y.to_numpy().astype(int)
    x_arr = x.to_numpy()

    return start_params, y_arr, x_arr, constr

In [54]:
def _build_moments(x,y):
    im = pd.DataFrame(data=x.copy(),columns=["pared","public","gpa"])
    im["apply"] = y
    im["gpa"] = pd.cut(im.gpa,bins=5,labels=False)
    ix = pd.MultiIndex.from_tuples(itertools.product(range(2),range(2),range(5),range(3)))
    ix.names = ["pared", "public","gpa", "apply"]
    out = pd.Series(index=ix,data=0)
    rslt =  im.groupby(["pared","public","gpa"])["apply"].value_counts(normalize=True)
    out[rslt.index] = rslt.values
    return out
    

In [65]:
def ordered_logit_msm(
    params,
    x,
    moment_func,
    moments_obs,
    weighting=[]
):
    """MSM criterion for ordered logit"""
    # parse the parameter vector into its quantities
    beta = params.loc["beta", "value"].to_numpy()
    cutoffs = params.loc["cutoff", "value"].to_numpy()

    # calculate deterministic part of utilities
    xb = x.dot(beta).reshape(len(x),1)

    # Simulate Result:
    upper_cutoffs = np.hstack([cutoffs, np.inf])
    lower_cutoffs = np.hstack([-np.inf, cutoffs])
    upper_cdf = stats.logistic.cdf(upper_cutoffs - xb)
    lower_cdf = stats.logistic.cdf(lower_cutoffs - xb)

    prob_cumulative = (upper_cdf - lower_cdf).cumsum(axis=1)
    draws = np.random.rand(len(xb), 1)
    labels = (draws < prob_cumulative).argmax(axis=1)
    
    moments_sim = moment_func(x, labels)
    
    dev = (moments_sim - moments_obs).values
    
    if len(weighting)==0:
        weighting = np.identity(len(moments_obs))
        
    return dev @ weighting @ dev

## Prepare data

In [56]:
# Data Set
data = pd.read_pickle("~/OpenSourceEconomics/estimagic/docs/source/getting_started/ologit.pickle")
formula = "apply ~ pared + public + gpa"
start_params, y, x, constraints = ordered_logit_processing(formula, data)

Assume we are not allowed to keep the dependent information due to privacy concerns.
We are only allowed to extract a moments at a certain level of granularity.

In [57]:
moments_obs = _build_moments(x,y)
# Now we pretend to leave our secure work space. Thus we have to delete y.
del y

In [66]:
# We also need a weighting matrix. We start with the identity
weighting = np.identity(len(moments_obs))


In [67]:
# Now we build the objective function
objective = functools.partial(
    ordered_logit_msm,
    x=x,
    moment_func=_build_moments,
    moments_obs=moments_obs,
    weighting=weighting
)

In [68]:
# We perform one evaluation to make sure our setup works
objective(start_params)

4.292995381283753

# Inference

In [None]:
def get_generic_standart_errors(objective, theta_hat, covariance_matrix):
    # Get Hessian Matrix
    H = None
    
    # Think about correct scaling
    
    # Build Wraper
    return np.sqrt(1/H.shape[0]) * (H @ covariance_matrix @ H)
