In [1]:
import pymc as pm
import pytensor.tensor as pt
import pandas as pd, numpy as np
import arviz as az

In [None]:
# Import data and transform answers to scale 0-2

df = pd.read_csv('./data/first_meeting.csv')

cols = [c for c in df.columns if c.startswith('What about these?')]

remap = {'Never heard of it': 0, 'Roughly familiar': 1, 'Could explain it': 2}
responses = df[cols].replace(remap).values

used_py = (df['Have you ever used Python for data analysis?'] == 'Yes')

models = {}

In [None]:
# Binomial IRT model
with pm.Model(coords={ 'respondents': np.arange(responses.shape[0]), 'questions': cols}) as model:

    knowledge = pm.Normal('knowledge',dims='respondents')
    q_baseline = pm.Normal('q_baseline', sigma=3, dims='questions')
    q_multiplier = pm.LogNormal('q_mult', dims='questions')

    pv = pm.math.invlogit(knowledge[:,None]*q_multiplier[None,:] + q_baseline[None,:])
    pm.Binomial('responses',n=3,p=pv,observed=responses)

    idata = pm.sample()
    pm.compute_log_likelihood(idata, extend_inferencedata=True)
    models[f'IRT_base'] = idata

az.plot_trace(idata)

In [None]:
# Binomial IRT model with regressor
with pm.Model(coords={ 'respondents': np.arange(responses.shape[0]), 'questions': cols}) as model:
    py_effect = pm.Normal('py_effect', sigma=1)
    knowledge_base = pm.Normal('knowledge_base',dims='respondents')
    knowledge = pm.Deterministic('knowledge', knowledge_base + py_effect * used_py)

    q_baseline = pm.Normal('q_baseline', sigma=3, dims='questions')
    q_multiplier = pm.LogNormal('q_mult', dims='questions')

    pv = pm.math.invlogit(knowledge[:,None]*q_multiplier[None,:] + q_baseline[None,:])
    pm.Binomial('responses',n=3,p=pv,observed=responses)

    idata = pm.sample()
    pm.compute_log_likelihood(idata, extend_inferencedata=True)
    models['IRT_pyreg'] = idata

az.plot_trace(idata)

In [None]:
# Multiple factors model

n_factors = 3

coords = { 
    'respondents': np.arange(responses.shape[0]), 
    'questions': cols,
    'factors': np.arange(n_factors)
}

with pm.Model(coords=coords) as model:

    knowledge = pm.Normal('knowledge',dims=('respondents','factors'))
    q_baseline = pm.Normal('q_baseline', sigma=3, dims=('questions'))
    q_multiplier = pm.Normal('q_mult', dims=('questions','factors'))

    # Identification:
    q_diag = pm.HalfNormal('q_diag', dims=('factors')) # Positive values for diagonal
    q_multiplier = pt.set_subtensor(q_multiplier[np.arange(n_factors),np.arange(n_factors)], q_diag)
    q_multiplier = pt.set_subtensor(q_multiplier[np.triu_indices(n_factors,k=1)], 0.0)
    pm.Deterministic('loadings', q_multiplier, dims=('questions','factors'))

    pv = pm.math.invlogit((knowledge[:,None,:]*q_multiplier[None,:,:]).sum(axis=-1) + q_baseline[None,:])
    pm.Binomial('responses',n=3,p=pv,observed=responses)

    idata = pm.sample()
    pm.compute_log_likelihood(idata, extend_inferencedata=True)
    models[f'IRT_{n_factors}f'] = idata

az.plot_trace(idata)

In [None]:
idata.posterior.loadings.mean(['chain','draw'])

In [None]:
az.compare(models)