In [2]:
import numpy as np
import pandas as pd
import pymc3 as pm
import arviz as az
import theano.tensor as tt
import scipy as sp
from scipy import stats

In [6]:
# getting the data

data = pd.read_csv('cleaned_data.csv' , sep = ' ' , header = None)
data

Unnamed: 0,0,1,2,3,4
0,0,0,0,0,0
1,0,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,1
4,0,0,0,0,1
...,...,...,...,...,...
995,1,1,1,1,1
996,1,1,1,1,1
997,1,1,1,1,1
998,1,1,1,1,1


Rasch Model

$$\Pr(X_{ij}=1) = \frac{e^{\alpha_j - \beta\times\theta_i}}{1 + e^{\alpha_j - \beta\times\theta_i}}$$

$\beta_n$ = ability of student n

$\delta_i$ = difficulty of item i

https://towardsdatascience.com/a-bayesian-approach-to-rasch-models-item-response-theory-cc08805cbb37

In [223]:
# define some constants according to the constraints in the question
N = 1000
Q = 5
sigma_alpha = 100
mean_alpha = 0
mean_beta = 0
sigma_beta = 1000
mean_theta = 0
sigma_theta = 1

On doing some simple mathematics, we get the likelihood to be:

$\mathcal{L}(x | \{\alpha_j\}_{j=1}^Q , \{\theta_i\}_{i=1}^N , \beta) = \frac{\exp\left(\beta\sum\limits_{i=1}^N \theta_i r_i - \sum\limits_{j=1}^Q\alpha_j s_j\right)}{\prod\limits_{i=1}^Q\prod\limits_{j=1}^N 1 + \exp\left({\beta\theta_i - \alpha_j}\right)}$

where $r_i$ is the score of the $i^{th}$ student and $s_j$ is the score achieved on the $j^{th}$ question.


We also have our priors (all the means are zero):

$\pi(\alpha_j) = \frac{1}{\sqrt{2\pi\sigma_\alpha^2}} \exp\left({-\frac{\alpha_j^2}{2\sigma_\alpha^2}}\right)$

$\pi(\theta_i) = \frac{1}{\sqrt{2\pi\sigma_\theta^2}} \exp\left({-\frac{\theta_i^2}{2\sigma_\theta^2}}\right)$

$\pi(\beta) = \frac{1}{\sqrt{2\pi\sigma_\beta^2}} \exp\left({-\frac{\beta^2}{2\sigma_\beta^2}}\right) \times \mathbf{1}\left(\beta \geq 0\right)$

Thus, the posterior distribution is given by:

$\pi(\{\alpha_j\}_{j=1}^Q , \{\theta_i\}_{i=1}^N , \beta | x) = \mathcal{L}(x | \{\alpha_j\}_{j=1}^Q , \{\theta_i\}_{i=1}^N , \beta) \times \prod\limits_{j=1}^Q\pi(\alpha_j) \times \prod\limits_{i=1}^N\pi(\theta_i) \times \pi(\beta)$

For more details about the mathematics, please refer to the paper.

Since we wish to sample for $\{\alpha_j\}_{j=1}^Q , \{\theta_i\}_{i=1}^N , \beta$ , we would have to obtain the  conditional distributions of $\alpha_j , \theta_i , \beta$.

$\pi(\alpha_j | \alpha_1 , \ldots , \alpha_{j-1} , \alpha_{j+1} , \ldots, \alpha_Q , \{\theta_i\}_{i=1}^N , \beta, x) = \frac{\exp\left(-\alpha_j s_j - \frac{\alpha_j^2 }{2\sigma_\alpha^2}\right)}{\prod\limits_{i=1}^Q\prod\limits_{j=1}^N 1 + \exp\left({\beta\theta_i - \alpha_j}\right)}$

$\pi(\beta | \{\alpha_j\}_{j=1}^Q , \{\theta_i\}_{i=1}^N , x) = \frac{\exp\left(\beta\sum\limits_{i=1}^N \theta_i r_i  - \frac{\beta^2 }{2\sigma_\beta^2}\right)}{\prod\limits_{i=1}^Q\prod\limits_{j=1}^N 1 + \exp\left({\beta\theta_i - \alpha_j}\right)}$

$\pi(\theta_i | \theta_1 , \ldots , \theta_{i-1} , \theta_{i+1} , \ldots, \theta_N , \{\alpha_j\}_{j=1}^Q , \beta, x) = \frac{\exp\left(\beta\theta_i r_i  - \frac{\theta_i^2 }{2\sigma_\theta^2}\right)}{\prod\limits_{i=1}^Q\prod\limits_{j=1}^N 1 + \exp\left({\beta\theta_i - \alpha_j}\right)}$

In [7]:
score_per_question = np.sum(data, axis = 0)
score_per_student = np.sum(data, axis = 1)

In [145]:
def initialize_chain():
    alpha = np.random.normal(mean_alpha , sigma_alpha , size = Q)
    alpha -= alpha.mean()

    # halfFlat is a uniform distribution between 0 and infinity
    # beta = 1

    beta = np.random.normal(mean_beta , sigma_beta) * (beta > 0 and beta < sigma_beta)

    theta = np.random.normal(mean_theta , sigma_theta , size = N)
    return alpha, beta, theta

In [222]:
def mcmc_sampler(num_chains , num_steps, burn_in):
    eps = 1e-6
    alpha_samples = []
    beta_samples = []
    theta_samples = []

    for chain_num in range(num_chains):
        alpha , beta , theta = initialize_chain()

        alpha_chain = []
        beta_chain = []
        theta_chain = []

        # we assume proposal distribution to be symmetric
        for step in range(num_steps):
            alpha_ratio = alpha_ratio_num - alpha_ratio_den
            print('(Chain: {}, Step: {}/{})=> Old Likelihood: {:.4f}, New Likelihood: {:.4f} , Alpha = {:.4f}'.format(chain_num+1, step+1, num_steps, likelihood, likelihood_new , alpha_ratio), end='\r')

            u = np.random.rand()
            if u < min(1 , alpha_ratio):
                if step > burn_in-1:
                    alpha_chain.append(alpha_new)
                    beta_chain.append(beta_new)
                    theta_chain.append(theta_new)
                
                alpha = alpha_new
                beta = beta_new
                theta = theta_new

            else:
                alpha_chain.append(alpha)
                beta_chain.append(beta)
                theta_chain.append(theta)

        alpha_samples.append(alpha_chain)
        beta_samples.append(beta_chain)
        theta_samples.append(theta_chain)
        print('\n')
    return alpha_samples , beta_samples , theta_samples


In [219]:
s_alpha, s_beta, s_theta = mcmc_sampler(1, 50, 5)

(Chain: 1, Step: 50/50)=> Old Likelihood: -107.1321, New Likelihood: -106.4113 , Alpha = 0.72080



In [220]:
means = np.zeros(5)
for i in range(len(s_alpha)):
    means+=np.mean(s_alpha[i], axis = 0)
print(means/len(s_alpha))

[ 36.33212485 -43.2552371  -73.07147787 -52.43069594 101.58690643]


In [221]:
np.array(s_beta).mean()

0.8461716373604699