In [19]:
import pystan
import arviz
import random

In [13]:
def do_exprinment(is_cheater):
    coin_result = flip_coin()
    if (coin_result):
        return is_cheater
    else:
        return flip_coin()

def flip_coin():
    result = False;
    rand_value = random();
    if (rand_value > 0.5):
        result = True;
    return result

def compute_result_to_list(num_of_cheaters, num_of_people):
    results = []
    all_population = range(num_of_people)
    cheaters_list = random.sample(all_population , k=num_of_cheaters)
    for i in all_population:
        if i in cheaters_list:
            results.append(1);
        else:
            results.append(0)
    return results
    

In [14]:
generative_model = """

data {
    int<lower=0> N; // number of people answered the survey
    int<lower=0, upper = 1> y[N]; // boolean array of answers
}

parameters {
    real<lower=0, upper=1> theta; // the latent variable we want to infer
    real<lower=0, upper=1> coin_results[N]; // helper coin results buffer
}


model {
    theta ~ beta(0.5, 0.5); // beta prior
    for (i in 1:N){
        coin_results[i] ~ normal(0 , 1);
        if (coin_results[i] >= 0){
            y[i] ~ bernoulli(theta);
        }
        else{
            y[i] ~ bernoulli(0.5);
        }
    }
    
}
"""

In [15]:
mixture_model = """

data {
    int<lower=0> N; // number of people answered the survey
    int<lower=0, upper = 1> y[N]; // boolean array of answers
}

parameters {
    real<lower=0, upper=1> theta; // the latent variable we want to infer
}


model {
    theta ~ beta(0.5, 0.5); // beta prior
    for (i in 1:N){
        target +=
            log_mix(0.5, bernoulli_lpmf(y[i] | theta), bernoulli_lpmf(y[i] | 0.5));
    }
    
}
"""

In [16]:
g_sm = pystan.StanModel(model_code=generative_model)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_253b7b21ad7780ba0df8d0570d3407f2 NOW.


In [17]:
m_sm = pystan.StanModel(model_code=mixture_model)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1faba59f95facd8bf45fceea85f1353e NOW.


In [20]:
y1 = compute_result_to_list(15 , 100);
exp_1 = {
    'N': len(y1),
    'y': y1
}


y2 = compute_result_to_list(30 , 100);
exp_2 = {
    'N': len(y2),
    'y': y2
}


y3 = compute_result_to_list(45 , 100);
exp_3 = {
    'N': len(y3),
    'y': y3
}


y4 = compute_result_to_list(60, 100);
exp_4 = {
    'N': len(y4),
    'y': y4
}
y5 = compute_result_to_list(75, 100);
exp_5 = {
    'N': len(y5),
    'y': y5
}


y6 = compute_result_to_list(90, 100);
exp_6 = {
    'N': len(y6),
    'y': y6
}

In [24]:
g_fit = g_sm.sampling(data=exp_1, iter=1000, chains=4, control = {"adapt_delta" : 0.9})

In [None]:
print(g_fit)