# Teaching with and Learning from Demonstration model
This notebook generates simulation data for four models: the doing demonstrator, showing model, naive observer, and sophisticated observer. Visualizations of these simulations are in the Visualization notebook.

In [2]:
import copy
from itertools import product
import time
import pickle
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from mdp_lib.domains.gridworld import GridWorld
from mdp_lib.util import sample_prob_dict
from planninginbeliefmodel import PlanningInObserverBeliefModel
from mdp_lib.domains.gridworldvis import visualize_trajectory, plot_text
from task import mdp_codes, mdp_params
from mdp_lib.util import sample_prob_dict
from util import mdp_to_feature_belief

np.random.seed(128374027)
np.seterr(under='ignore')

{'divide': 'raise', 'invalid': 'raise', 'over': 'raise', 'under': 'ignore'}

# Parameters for doing and showing demonstration models

In [3]:
#doing model parameters
do_discount = .99
do_softmax = 0.08

#showing model parameters
base_discount_rate = .99
base_softmax_temp = 3.0
obmdp_discount_rate = .9
obmdp_softmax_temp = 1

# Doing model
This code builds doing models for the different MDPs that are defined in `task.py`. It also generates seed trajectories for building a discretized observer belief MDP (OBMDP) transition function. Using trajectories guided by what a doing agent would do helps focus the approximation on parts of the world-belief state space that are relevant.

In [4]:
doing_models = []
seed_trajs = []
for p in mdp_params:
    p = copy.deepcopy(p)
    p['discount_rate'] = do_discount
    m = GridWorld(**p)
    m.solve()
    doing_models.append(m)
    
    #generate seed trajectories for OBMDP discretization
    for _ in xrange(20):
        traj = m.run(temp=.7)
        if traj[-1][1] != '%':
            continue
        seed_trajs.append([(w, a) for w, a, _, _ in traj])
        
with open("./cached_values/seed_trajs.pkl", 'wb') as f:
    pickle.dump(seed_trajs, f)

# Showing model
This block builds showing models for the different possible OBMDPs using the `PlanningInObserverBeliefModel` class defined in `planninginbeliefmodel.py`. It discretizes the belief space using the `seed_trajs` generated in the previous block and solves the resulting (large) discrete MDP.

**Estimated running time: 6-8 mins**.

*Note that since the ground transition function is the same across all the ground MDPs considered here, the world-belief transitions are also the same. This means we can speed up computations slightly by reusing the transition function, `tf`.*

In [5]:
# Belief updates for doing model
m = PlanningInObserverBeliefModel(
    base_discount_rate = do_discount,
    base_softmax_temp = do_softmax,
    obmdp_discount_rate = obmdp_discount_rate,
    obmdp_softmax_temp=obmdp_softmax_temp,
    true_mdp_code=mdp_codes[0],
    discretized_tf=None
)
m.seed_beliefs_with_trajs(seed_trajs)
m.build()
m.solve()
showing_models_inner_temp = m.ob_mdp

In [6]:
showing_models = []
tf = None
for i, rfc in enumerate(mdp_codes):
    starttime = time.time()
    print rfc,
    m = PlanningInObserverBeliefModel(
        base_discount_rate = base_discount_rate,
        base_softmax_temp = base_softmax_temp,
        obmdp_discount_rate = obmdp_discount_rate,
        obmdp_softmax_temp=obmdp_softmax_temp,
        true_mdp_code=rfc,
        discretized_tf=tf
    )
    m.seed_beliefs_with_trajs(seed_trajs)
    m.build()
    m.solve()
    showing_models.append(m.ob_mdp)
    tf = m.ob_mdp.get_discretized_tf()
    print " %.2fs" % (time.time() - starttime)

ooo  35.37s
oox  37.94s
oxo  39.38s
oxx  41.37s
xoo  36.97s
xox  39.19s
xxo  39.79s
xxx  82.47s


# Generating trajectories and calculating observer beliefs

The following two blocks generate *doing* and *showing* world-state/action trajectories using the models defined above. For each sequence of world-states and actions, we then calculate the change in the observer models' beliefs over time. The two models are the *naive* and *sophisticated* observers, which correspond to the following equations, respectively:

\begin{align}
b^{\text{Obs}}_{t+1}(M_i) &= P(M_i \mid w_t, a_t, w_{t+1})\\
&\propto P(a_t, w_{t+1} \mid w_t, M_i)P(M_i)\\
&= P(a_t \mid w_t, M_i)P(w_{t+1} \mid w_t, a_t, M_i)P(M_i)\\
&= \pi_{i}^{\text{Do}}(a_t \mid w_t)T_{i}(w_{t+1} \mid w_t, a_t)b_t^{\text{Obs}}(M_i).
\end{align}

and

\begin{align}
b^{\text{S-Obs}}_{t+1}(M_i) &= P(M_i \mid w_t, b^{\text{Obs}}_t, a_t, w_{t+1}, b^{\text{Obs}}_{t+1}) \\
&\propto \pi_i^{\text{Show}}(a_t \mid w_t, b_t^{\text{Obs}})T_i(w_{t+1} \mid w_t, a_t)b_t^{\text{S-Obs}}(M_i).
\end{align}

Each trajectory and final belief state is recorded in the `model_obs_judgments` dataframe and cached.

**Estimated running time: 20 min**

In [7]:
def calc_obs_sobs_traj(wtraj, mixture_prob=1):
    b_sobs = np.array(showing_models[0].get_init_state()[0])
    s = showing_models_inner_temp.get_init_state()
    inner_s = s
    
    obs_traj = [s[0],]
    sobs_traj = [b_sobs,]
    doing_policies = [dm.get_softmax_function(do_softmax) for dm in doing_models]
    for w, a in wtraj:
        # get next naive belief
        ns = showing_models_inner_temp.transition(s=s, a=a)
        obs_traj.append(ns[0])
        
        # calc next sophisticated belief
        inner_ns = showing_models[0].transition(s=inner_s, a=a)
        mixture_probs = []
        for dp, sm in zip(doing_policies, showing_models):
            sm_prob = sm.get_softmax_actionprobs(s=inner_s, temp=obmdp_softmax_temp)[a]
            if mixture_prob < 1:
                dm_prob = dp[w][a]
                mixture_prob = sm_prob*mixture_prob + dm_prob*(1-mixture_prob)
            else:
                mixture_prob = sm_prob
            mixture_probs.append(mixture_prob)
        mixture_probs = np.array(mixture_probs)
        b_sobs = b_sobs*mixture_probs
        b_sobs = b_sobs/np.sum(b_sobs)
        sobs_traj.append(b_sobs)
        
        s = ns
        inner_s = inner_ns
    return {'obs_traj': obs_traj, 'sobs_traj': sobs_traj}

def is_correct(row):
    rf = dict(zip(['orange', 'purple', 'cyan'], row['rf']))
    if rf[row['color']] == 'x' \
            and row['exp_safe'] < .5:
        return True
    elif rf[row['color']] == 'o' \
            and row['exp_safe'] >= .5:
        return True
    return False

def calc_correct_prob(row):
    rf = dict(zip(['orange', 'purple', 'cyan'], row['rf']))
    if rf[row['color']] == 'x':
        return 1 - row['exp_safe']
    elif rf[row['color']] == 'o':
        return row['exp_safe']

In [8]:
def gen_trajs(doing_models, showing_models, num_trajs=100):
    all_do_trajs, all_show_trajs = [], []
    for mi, (do_m, show_m) in enumerate(zip(doing_models, showing_models)):
        print(mi)
        mdp_do_trajs, mdp_show_trajs = [], []
        for _ in xrange(num_trajs):
            do_traj = do_m.run(temp=do_softmax)
            do_traj = [(w, a) for w, a, nw, r in do_traj]
            mdp_do_trajs.append(do_traj)
            
            show_traj = show_m.run(temp=obmdp_softmax_temp)
            show_traj = [(w, a) for (b, w), a, ns, r in show_traj]
            mdp_show_trajs.append(show_traj)
        all_do_trajs.append(mdp_do_trajs)
        all_show_trajs.append(mdp_show_trajs)
    return all_do_trajs, all_show_trajs

def gen_show_trajs(showing_models, num_trajs=100):
    all_show_trajs = []
    for mi, show_m in enumerate(showing_models):
        print(mi)
        mdp_show_trajs = []
        for _ in xrange(num_trajs):
            show_traj = show_m.run(temp=obmdp_softmax_temp)
            show_traj = [(w, a) for (b, w), a, ns, r in show_traj]
            mdp_show_trajs.append(show_traj)
        all_show_trajs.append(mdp_show_trajs)
    return all_show_trajs

def combine_prob_dicts(doing_probs, showing_probs, mixture_prob):
    prob = {}
    for key in doing_probs:
        prob[key] = doing_probs[key] * (1 - mixture_prob) + showing_probs[key] * mixture_prob
    return prob

def get_mixture_traj(start_belief_state, doing_policy, showing_policy, 
                     doing_model, showing_model, mixture_prob, cond='show', max_steps=25):
    belief_state = start_belief_state
    wstate = belief_state[1]
    traj = []
    for _ in range(max_steps):
        doing_probs = doing_policy[wstate]
        showing_probs = showing_policy[belief_state]
        prob_dict = combine_prob_dicts(doing_probs, showing_probs, mixture_prob)
        action = sample_prob_dict(prob_dict)
        nbelief_state = showing_model.transition(belief_state, action)
        nwstate = nbelief_state[1]
        traj.append((wstate, action))
        wstate = nwstate
        belief_state = nbelief_state
        if doing_model.is_terminal(wstate):
            break
    return traj

def gen_mixture_trajs(mixtures, doing_models, showing_models, cond='show', num_trajs_per_rf=10):
    doing_policies = [dm.get_softmax_function(do_softmax) for dm in doing_models]
    showing_policies = [sm.get_softmax_function(obmdp_softmax_temp) for sm in showing_models]
    print(showing_policies[0][showing_models[0].get_init_state()])
    print(doing_models[0].get_init_state())
    start_belief_state = showing_models[0].get_init_state()
    trajs = []
    for doing_policy, showing_policy, doing_model, showing_model in zip(
            doing_policies, showing_policies, doing_models, showing_models):
        rf_trajs = []
        for i, mixture_prob in enumerate(mixtures):
            for _ in range(num_trajs_per_rf):
                traj = get_mixture_traj(start_belief_state, doing_policy, showing_policy, 
                            doing_model, showing_model, mixture_prob, cond)
                if cond == 'show':
                    rf_trajs.append({'mixture': mixture_prob, 'traj': traj})
                else:
                    rf_trajs.append(traj)
        trajs.append(rf_trajs)
    return trajs

In [None]:
show_mixture_trajs = gen_mixture_trajs(alphas, doing_models, showing_models, num_trajs_per_rf=10)

In [None]:
show_mix_trajs_x = gen_mixture_trajs(np.linspace(0, 1, 11), doing_models, showing_models, num_trajs_per_rf=100)

In [None]:
show_mix_trajs_05 = gen_mixture_trajs([0.5], doing_models, showing_models, num_trajs_per_rf=100)

In [None]:
do_trajs = gen_mixture_trajs([0], doing_models, showing_models, 'do', num_trajs_per_rf=1100)

In [None]:
def simulate(filename, do_trajs, show_trajs, mixture=False, 
             infer_with_mixture=False, infer_with_mixture_prob=1):
    """
    Params:
        filename: filename to save to
        do_trajs
        show_trajs
        mixture[bool]: whether or not the show_trajs are generated from mixture
        infer_with_mixture[bool]: whether or not the pedagogic robot should infer using the mixture model
    """
    forder = ['orange', 'purple', 'cyan']
    model_obs_judgments = []
    num_trajs = 0
    for mi, (do_m, show_m, rf_do_trajs, rf_show_trajs) in enumerate(zip(doing_models, showing_models, do_trajs, show_trajs)):
        print mi,
        starttime = time.time()
        assert(len(rf_do_trajs) >= len(rf_show_trajs))
        for do_traj, show_traj in zip(rf_do_trajs, rf_show_trajs):
            num_trajs += 1
            if mixture:
                mixture_prob = show_traj['mixture']
                show_traj = show_traj['traj']
            else:
                mixture_prob = 1
            if infer_with_mixture:
                belief_trajs = calc_obs_sobs_traj(do_traj, mixture_prob=infer_with_mixture_prob)
            else:
                belief_trajs = calc_obs_sobs_traj(do_traj)
            obs_judg = mdp_to_feature_belief(belief_trajs['obs_traj'][-1], mdp_codes, forder)
            obs_judg['rf'] = mdp_codes[mi]
            obs_judg['observer'] = 'naive'
            obs_judg['demonstrator'] = 'doing'
            obs_judg['traj'] = do_traj
            obs_judg['belief_traj'] = belief_trajs['obs_traj']
            obs_judg['mixture'] = 0
            model_obs_judgments.append(obs_judg)

            sobs_judg = mdp_to_feature_belief(belief_trajs['sobs_traj'][-1], mdp_codes, forder)
            sobs_judg['rf'] = mdp_codes[mi]
            sobs_judg['observer'] = 'sophisticated'
            sobs_judg['demonstrator'] = 'doing'
            sobs_judg['traj'] = do_traj
            sobs_judg['belief_traj'] = belief_trajs['sobs_traj']
            sobs_judg['mixture'] = 0
            model_obs_judgments.append(sobs_judg)

            if infer_with_mixture:
                belief_trajs = calc_obs_sobs_traj(show_traj, mixture_prob=infer_with_mixture_prob)
            else:
                belief_trajs = calc_obs_sobs_traj(show_traj)
            obs_judg = mdp_to_feature_belief(belief_trajs['obs_traj'][-1], mdp_codes, forder)
            obs_judg['rf'] = mdp_codes[mi]
            obs_judg['observer'] = 'naive'
            obs_judg['demonstrator'] = 'showing'
            obs_judg['traj'] = show_traj
            obs_judg['belief_traj'] = belief_trajs['obs_traj']
            obs_judg['mixture'] = mixture_prob
            model_obs_judgments.append(obs_judg)

            sobs_judg = mdp_to_feature_belief(belief_trajs['sobs_traj'][-1], mdp_codes, forder)
            sobs_judg['rf'] = mdp_codes[mi]
            sobs_judg['observer'] = 'sophisticated'
            sobs_judg['demonstrator'] = 'showing'
            sobs_judg['traj'] = show_traj
            sobs_judg['belief_traj'] = belief_trajs['sobs_traj']
            sobs_judg['mixture'] = mixture_prob
            model_obs_judgments.append(sobs_judg)
        print " %.2fs" % (time.time() - starttime)
    print(num_trajs)

    model_obs_judgments = pd.DataFrame(model_obs_judgments)
    model_obs_judgments = pd.melt(model_obs_judgments,
        id_vars=['demonstrator', 'rf', 'observer', 'traj', 'belief_traj', 'mixture'], 
        value_name='exp_safe', 
        var_name='color')

    model_obs_judgments['confidence'] = model_obs_judgments['exp_safe'].apply(lambda v: abs(.5-v))
    model_obs_judgments['correct'] = model_obs_judgments.apply(is_correct, axis=1)
    model_obs_judgments['correct_prob'] = model_obs_judgments.apply(calc_correct_prob, axis=1)
    model_obs_judgments.to_pickle('./cached_values/{0}.pkl'.format(filename))

In [None]:
human_do_trajs, human_show_trajs = get_human_trajs(mdp_codes)

In [None]:
simulate('infer_with_mix', human_do_trajs, human_show_trajs)

In [None]:
simulate('human_new_func', human_do_trajs, human_show_trajs)

In [None]:
simulate('mixture', do_trajs, show_mixture_trajs, mixture=True)

# Human Demonstrations

In [9]:
def get_human_do_show_trajs(rf_order):
    trials = pd.read_pickle('./data/teaching-trials.pd.pkl')
    trials = trials[trials['phase'] == 'demonstration']
    do_trajs, show_trajs = [], []
    cond_order = ['do', 'show']
    do_trajs, show_trajs = [[] for _ in range(len(rf_order))], [[] for _ in range(len(rf_order))]
    for (rf, cond, p), ptrials in trials.groupby(['rf', 'cond', 'participant']):
        rf_ind = rf_order.index(rf)
        ptrials = ptrials.sort_values('timestep')
        traj = list(ptrials['stateaction'])# + [((5, 2), 'x'), ]
        if cond == 'do':
            do_trajs[rf_ind].append(traj)
        elif cond == 'show':
            show_trajs[rf_ind].append(traj)
    return do_trajs, show_trajs

def get_human_trajs():
    trials = pd.read_pickle('./data/teaching-trials.pd.pkl')
    trials = trials[trials['phase'] == 'demonstration']
    do_trajs, show_trajs = [], []
    rf_order = ['ooo', 'xoo', 'oox', 'oxo', 'xox', 'xxo', 'oxx', 'xxx']
    cond_order = ['do', 'show']
    trajs = {}
    for rf in rf_order:
        trajs[rf] = [[], []]
    for (rf, cond, p), ptrials in trials.groupby(['rf', 'cond', 'participant']):
        ptrials = ptrials.sort_values('timestep')
        traj = list(ptrials['stateaction'])# + [((5, 2), 'x'), ]
        if cond == 'do':
            trajs[rf][0].append(traj)
        elif cond == 'show':
            trajs[rf][1].append(traj)
    return trajs

def get_human_results():
    wtrajs = get_human_trajs()
    forder = ['orange', 'purple', 'cyan']
    model_obs_judgments = []
    for mi, (do_m, show_m) in enumerate(zip(doing_models, showing_models)):
        do_wtrajs = []
        show_wtrajs = []

        rf = mdp_codes[mi]
        print mi,
        starttime = time.time()
        human_trajs = wtrajs[rf]
        for (do_traj, show_traj) in zip(human_trajs[0], human_trajs[1]):
            # interpret DOING trajectory
            belief_trajs = calc_obs_sobs_traj(do_traj)
            obs_judg = mdp_to_feature_belief(belief_trajs['obs_traj'][-1], mdp_codes, forder)
            obs_judg['rf'] = rf
            obs_judg['observer'] = 'naive'
            obs_judg['demonstrator'] = 'doing'
            obs_judg['traj'] = do_traj
            obs_judg['belief_traj'] = belief_trajs['obs_traj']
            model_obs_judgments.append(obs_judg)

            sobs_judg = mdp_to_feature_belief(belief_trajs['sobs_traj'][-1], mdp_codes, forder)
            sobs_judg['rf'] = rf
            sobs_judg['observer'] = 'sophisticated'
            sobs_judg['demonstrator'] = 'doing'
            sobs_judg['traj'] = do_traj
            sobs_judg['belief_traj'] = belief_trajs['sobs_traj']
            model_obs_judgments.append(sobs_judg)

            # interpret SHOWING trajectory
            belief_trajs = calc_obs_sobs_traj(show_traj)
            obs_judg = mdp_to_feature_belief(belief_trajs['obs_traj'][-1], mdp_codes, forder)
            obs_judg['rf'] = rf
            obs_judg['observer'] = 'naive'
            obs_judg['demonstrator'] = 'showing'
            obs_judg['traj'] = show_traj
            obs_judg['belief_traj'] = belief_trajs['obs_traj']
            model_obs_judgments.append(obs_judg)

            sobs_judg = mdp_to_feature_belief(belief_trajs['sobs_traj'][-1], mdp_codes, forder)
            sobs_judg['rf'] = rf
            sobs_judg['observer'] = 'sophisticated'
            sobs_judg['demonstrator'] = 'showing'
            sobs_judg['traj'] = show_traj
            sobs_judg['belief_traj'] = belief_trajs['sobs_traj']
            model_obs_judgments.append(sobs_judg)
        print " %.2fs" % (time.time() - starttime)

    model_obs_judgments = pd.DataFrame(model_obs_judgments)
    model_obs_judgments = pd.melt(model_obs_judgments,
        id_vars=['demonstrator', 'rf', 'observer', 'traj', 'belief_traj'], 
        value_name='exp_safe', 
        var_name='color')

    model_obs_judgments['confidence'] = model_obs_judgments['exp_safe'].apply(lambda v: abs(.5-v))
    model_obs_judgments['correct'] = model_obs_judgments.apply(is_correct, axis=1)
    model_obs_judgments['correct_prob'] = model_obs_judgments.apply(calc_correct_prob, axis=1)
    model_obs_judgments.to_pickle('./cached_values/human_results.pkl')

In [10]:
get_human_results()

0  0.36s
1  0.37s
2  0.39s
3  0.39s
4  0.41s
5  0.41s
6  0.40s
7  0.37s


# Human Traj Likelihood

In [None]:
def get_human_trajs_by_person():
    trials = pd.read_pickle('./data/teaching-trials.pd.pkl')
    trials = trials[trials['phase'] == 'demonstration']
    do_trajs, show_trajs = [], []
    rf_order = ['ooo', 'xoo', 'oox', 'oxo', 'xox', 'xxo', 'oxx', 'xxx']
    cond_order = ['do', 'show']
    trajs = defaultdict(lambda: defaultdict(dict))
    for (rf, cond, p), ptrials in trials.groupby(['rf', 'cond', 'participant']):
        ptrials = ptrials.sort_values('timestep')
        traj = list(ptrials['stateaction'])# + [((5, 2), 'x'), ]
        trajs[p][cond][rf] = traj
    return trajs

In [None]:
human_trajs = get_human_trajs()

In [None]:
def get_doshow_probs(wtraj, do_policy, showing_model):
    do_action_probs, show_action_probs = [], []
    s = showing_model.get_init_state()
    for w, a in wtraj:
        do_action_probs.append(do_policy[w][a])
        show_action_prob = showing_model.get_softmax_actionprobs(
            s=s, temp=obmdp_softmax_temp)[a]
        show_action_probs.append(show_action_prob)
        s = showing_model.transition(s=s, a=a)
    do_action_probs, show_action_probs = np.array(do_action_probs), np.array(show_action_probs)
    return do_action_probs, show_action_probs
    
def get_human_traj_likelihood(human_trajs, mixtures=[0, 1], traj_type='show'):
    """
    Calculates log likelihood of showing human trajs under doing and showing model.
    
    Params:
        human_trajs
        mixtures: list of the probability that the human is pedagogic in mixture model
    """
    lls = np.zeros(len(mixtures))
    total_trajs = 0.0
    for rf, doing_model, showing_model in zip(mdp_codes, doing_models, showing_models):
        do_policy = doing_model.get_softmax_function(do_softmax)
        if traj_type == 'show':
            wtrajs = human_trajs[rf][1]
        elif traj_type == 'do':
            wtrajs = human_trajs[rf][0]
        total_trajs += len(wtrajs)
        for wtraj in wtrajs:
            do_action_probs, show_action_probs = get_doshow_probs(wtraj)
            for i, mixture_prob in enumerate(mixtures):
                probs = do_action_probs*(1-mixture_prob) + show_action_probs*(mixture_prob)
                ll = np.sum(np.log(probs))
                lls[i] += ll
    lls = lls/total_trajs
    return lls

def get_do_vs_show(trajs_by_person, cond='show'):
    num_do_better_than_show = 0.0
    overall_do_ll, overall_show_ll = 0, 0
    num_trajs = 0
    num_people = 0
    for i, person_trajs in enumerate(trajs_by_person.values()):
        trajs = person_trajs[cond]
        if len(trajs) == 0:
            continue
        print(i)
        num_people += 1
        num_trajs += len(trajs)
        person_do_ll, person_show_ll = 0, 0
        for rf, doing_model, showing_model in zip(mdp_codes, doing_models, showing_models):
            do_policy = doing_model.get_softmax_function(do_softmax)
            traj = trajs[rf]
            do_probs, show_probs = get_doshow_probs(traj, do_policy, showing_model)
            do_ll, show_ll = np.sum(np.log(do_probs)), np.sum(np.log(show_probs))
            person_do_ll += do_ll
            person_show_ll += show_ll
        print(person_do_ll/len(trajs), person_show_ll/len(trajs))
        overall_do_ll += person_do_ll
        overall_show_ll += person_show_ll
        num_do_better_than_show += (person_do_ll > person_show_ll)
    print(overall_do_ll, overall_show_ll)
    return num_do_better_than_show/num_people

def get_mixture_by_person(mixtures, trajs_by_person, cond='show'):
    num_do_better_than_show = 0.0
    alphas = []
    for i, person_trajs in enumerate(trajs_by_person.values()):
        trajs = person_trajs[cond]
        if len(trajs) == 0:
            continue
        print(i)
        all_do_probs, all_show_probs = [], []
        for rf, doing_model, showing_model in zip(mdp_codes, doing_models, showing_models):
            do_policy = doing_model.get_softmax_function(do_softmax)
            traj = trajs[rf]
            do_probs, show_probs = get_doshow_probs(traj, do_policy, showing_model)
            all_do_probs.append(do_probs)
            all_show_probs.append(show_probs)
        all_do_probs, all_show_probs = np.array(all_do_probs), np.array(all_show_probs)
        mixture_ll = np.zeros(len(mixtures))
        for i, mixture_prob in enumerate(mixtures):
            ll = 0
            for do_probs, show_probs in zip(all_do_probs, all_show_probs):
                probs = (1-mixture_prob)*do_probs + mixture_prob*show_probs
                ll += np.sum(np.log(probs))
            mixture_ll[i] = ll
        optimal_alpha = mixtures[np.argmax(mixture_ll)]
        alphas.append(optimal_alpha)
    return alphas

def plot_alphas(alphas):
    alphas = np.array(alphas)
    print(len(alphas))
    print(alphas)
    print(np.where(alphas > 0.5))
    print(len(np.where(alphas > 0.5)[0]))
    fig = plt.figure(figsize=(10, 10))
    sns.distplot(alphas, kde=False, bins=5, rug=True)
    plt.title('Individual Estimates of ' + r'$\alpha$')
    plt.ylabel('Num Individuals')
    plt.xlabel('Estimated Probability of Pedagogic Action (' + r'$\alpha$' + ')')
    plt.yticks([0, 4, 8])
    fig.savefig('/Users/smitha/proj/ml/human_misspec/figs/pedagogy/ind_alphas.pdf')

In [None]:
human_trajs_by_person = get_human_trajs_by_person()

In [None]:
alphas = get_mixture_by_person(np.linspace(0, 1, 101), human_trajs_by_person)

In [None]:
plot_alphas(alphas)

In [None]:
get_do_vs_show(human_trajs_by_person, 'show')

In [None]:
mixtures = np.linspace(0, 1, 101)
do_lls = get_human_traj_likelihood(human_trajs, mixtures, traj_type='do')

In [None]:
mixtures = np.linspace(0, 1, 101)
lls = get_human_traj_likelihood(human_trajs, mixtures)

In [None]:
print(lls)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
sns.set_context("paper", font_scale=2.5, rc={"lines.linewidth": 3.0})
sns.set_style({'lines.linewidth': 12, 'lines.markersize': 10, 
               'lines.markeredgewidth': 2, 'errorbar.capsize': 2,
               'axes.titlesize': 24
})
from matplotlib import rc
rc("font", **{"family": "serif", "serif": ["Palatino"]})
rc("text", usetex = True)
rc("axes", **{"titlesize": 36, "labelsize": 30})

In [None]:
def plot_lls(mixtures, lls, title, filename=None):
    fig = plt.figure()
    fig.set_size_inches((10, 10))
    plt.plot(mixtures, lls, marker='o', markerfacecolor='white')
    plt.xlabel('Probability of Pedagogic Action (' + r'$\alpha$' + ')')
    plt.ylabel('Mean Negative Log-Likelihood')
    plt.title(title)
    if filename is not None:
        fig.savefig('/Users/smitha/proj/ml/human_misspec/figs/pedagogy/{0}.pdf'.format(filename))

In [None]:
print(do_lls)

In [None]:
print(mixtures)

In [None]:
print(mixtures[np.argmax(lls)])

In [None]:
print(mixtures[np.argmax(do_lls)])

In [None]:
plot_lls(mixtures, -do_lls, title='Demonstrations from Literal Condition', filename='do_mixture_lls')

In [None]:
plot_lls(mixtures, -lls, title='Action Mixture Model', filename='ped_mix_lls')