# Parameter Distribution Plot

In [1]:
import sys
sys.path.append('../../../')

In [2]:
import pandas as pd
import numpy as np
from numpy.random import uniform
np.random.seed(89345)

In [3]:
from bokeh.plotting import output_notebook
output_notebook()

In [4]:
from estimagic.optimization.utilities import index_element_to_string
from estimagic.visualization.parameter_distribution_plot import parameter_distribution_plot

## Generate some artifical results

We start by creating some fake results. 
Let's start with a rather minimal version of results 
that only contain the columns that are guaranteed 
to be in the final params DataFrame.

In [5]:
base_params = pd.DataFrame()
# generate an index
base_params["vartype"] = ["coefficient"] * 16 + ["cutoff"] * 3
base_params["choice"] = \
    ["work"] * 5 + ["home"] * 7 + ["educ"] * 4 + \
    ["home", "educ", "work"]
base_params["varname"] = \
    ["x{}".format(i) for i in range(5)] + \
    ["x{}".format(i) for i in range(5)] + ["z1", "z2"] + \
    ["x5", "x6", "z3", "z4", None, None, None]

base_params["group"] = "All Parameters"

base_params["value"] = \
    uniform(-3, 3, 5).tolist() + \
    uniform(-1, 3, 7).tolist() + \
    uniform(-3, 1, 4).tolist() + \
    [0, 1.153, 4.037]


base_params.set_index(["vartype", "choice", "varname"], inplace=True)

base_params["name"] = [index_element_to_string(tup) for tup in base_params.index]
base_params["lower"] = - np.inf
base_params["upper"] = np.inf

In [6]:
base_params

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,group,value,name,lower,upper
vartype,choice,varname,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
coefficient,work,x0,All Parameters,2.16589,coefficient_work_x0,-inf,inf
coefficient,work,x1,All Parameters,1.573793,coefficient_work_x1,-inf,inf
coefficient,work,x2,All Parameters,2.713678,coefficient_work_x2,-inf,inf
coefficient,work,x3,All Parameters,-0.385969,coefficient_work_x3,-inf,inf
coefficient,work,x4,All Parameters,1.032033,coefficient_work_x4,-inf,inf
coefficient,home,x0,All Parameters,2.331855,coefficient_home_x0,-inf,inf
coefficient,home,x1,All Parameters,0.476059,coefficient_home_x1,-inf,inf
coefficient,home,x2,All Parameters,2.88351,coefficient_home_x2,-inf,inf
coefficient,home,x3,All Parameters,-0.359987,coefficient_home_x3,-inf,inf
coefficient,home,x4,All Parameters,2.616226,coefficient_home_x4,-inf,inf


In [7]:
def create_results(params, n_res=50, model_classes=None, add_model_name=False):
    if model_classes is None:
        model_classes = [None]
    results = []
    for model in model_classes: 
        for i in range(int(n_res / len(model_classes))):
            res = params.copy()
            res["value"] += uniform(-0.75, 0.75, len(res))
            if model is not None:
                res["model_class"] = model
            if add_model_name:
                res["model_name"] = "{}_{}".format(model, i)
            results.append(res)
    return results

In [8]:
minimal_results = create_results(base_params)

## Generate the comparison plot

In [9]:
source, grid = parameter_distribution_plot(results=minimal_results)

  "The figure height you specified results in very small "


## Adding model_class and group_col

This is very large. We can use the group_col to kick out groups of parameters we're not interested in. 
Assume for the moment that we are only interested in the wage and education coefficients.

In [10]:
base_params["group"] = \
    ["Wage Coefficients"] * 5 + \
    [None] * 7 + \
    ["Education Coefficients"] * 4 + \
    [None] * 3

Furthermore, we might have different model classes we want to compare.

We might try different specifications, different optimization algorithms or different estimators.

Estimagic allows you to color code parameters from models of the same class in the same color. 

Assume we estimated our model with GMM and maximum likelihood with different starting values. The results will show us how sensitive our results are to the additional assumptions we usually need to make when using maximum likelihood.

In [11]:
mixed_results = create_results(
    params=base_params, 
    n_res=100, 
    model_classes=["GMM", "ML"], 
    add_model_name=True
)

In [12]:
source, grid = parameter_distribution_plot(mixed_results)