# Make up some data for the unfair casino

In [28]:
import numpy as np
import pandas as pd
import os

In [20]:
fair_dice_emission_vector = np.ones(shape=(6,))
fair_dice_emission_vector /= fair_dice_emission_vector.sum()

In [21]:
fair_dice_emission_vector

array([ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
        0.16666667])

In [22]:
unfair_dice_emission_vector = np.array([1,1,1,1,100,100], dtype=float)
unfair_dice_emission_vector /= unfair_dice_emission_vector.sum()

In [23]:
unfair_dice_emission_vector

array([ 0.00490196,  0.00490196,  0.00490196,  0.00490196,  0.49019608,
        0.49019608])

In [24]:
# fair state: 0, unfair state: 1
latent_model_transition_matrix = np.array([[0.9, 0.1],
                                           [0.3, 0.7]])

In [25]:
def roll_dice(emission_probs):
    assert(len(emission_probs) == 6)
    cumulative_probs = np.cumsum(emission_probs)
    roll = np.random.random()
    value = None
    for i in range(len(cumulative_probs)):
        if roll < cumulative_probs[i]:
            value = i+1
            break
            
    return value


def do_the_transition(current_state, transition_matrix):
    # this is the 'roll of the dice'
    transition_prob = np.random.random()
    # get the transition probability data
    probs = transition_matrix[current_state, :]
    cum_probs = np.cumsum(probs)
    next_state = None
    for i in range(len(cum_probs)):
        if transition_prob < cum_probs[i]:
            next_state = i
            break
    return next_state
    

In [26]:
# let's make up a bunch of stuff
current_latent_state = 0 # start in the fair state

data = []

for i in range(0, 10000):
    if current_latent_state == 0: # in fair mode
        dice_value = roll_dice(fair_dice_emission_vector)
        data.append((0, dice_value)) # fair state, what we rolled this time around
    elif current_latent_state == 1:
        dice_value = roll_dice(unfair_dice_emission_vector)
        data.append((1, dice_value)) # unfair state, what we rolled this time
    else:
        raise ValueError('You somehow screwed up the latent state value')
        
    # now let's get the next state
    current_latent_state = do_the_transition(current_latent_state, latent_model_transition_matrix)
    
        

In [27]:
data[0:50]

[(0, 6),
 (1, 6),
 (1, 6),
 (1, 5),
 (1, 6),
 (1, 6),
 (0, 3),
 (0, 2),
 (0, 2),
 (0, 3),
 (0, 4),
 (0, 1),
 (0, 6),
 (0, 1),
 (0, 4),
 (1, 6),
 (1, 6),
 (0, 5),
 (0, 6),
 (0, 2),
 (0, 4),
 (0, 6),
 (0, 4),
 (0, 3),
 (0, 4),
 (0, 1),
 (0, 6),
 (0, 6),
 (0, 2),
 (0, 6),
 (0, 6),
 (0, 3),
 (0, 4),
 (0, 4),
 (0, 6),
 (0, 2),
 (0, 1),
 (0, 1),
 (0, 4),
 (0, 6),
 (0, 5),
 (0, 4),
 (0, 1),
 (0, 6),
 (0, 4),
 (0, 2),
 (1, 6),
 (1, 6),
 (1, 6),
 (1, 6)]

In [31]:
data_df = pd.DataFrame(data=data, columns=['state', 'dice_value'])
emission_df = pd.DataFrame(data=[fair_dice_emission_vector, unfair_dice_emission_vector],
                           index=['fair', 'unfair'],
                           columns=[i+1 for i in range(0,6)])
transition_df = pd.DataFrame(data=latent_model_transition_matrix, index=['fair', 'unfair'], columns=['fair', 'unfair'])

In [32]:
emission_df

Unnamed: 0,1,2,3,4,5,6
fair,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667
unfair,0.004902,0.004902,0.004902,0.004902,0.490196,0.490196


In [33]:
transition_df

Unnamed: 0,fair,unfair
fair,0.9,0.1
unfair,0.3,0.7


In [34]:
data_file = os.path.join('..' ,'data', 'casino_data.csv')
emission_file = os.path.join('..', 'data', 'dice_probs.csv')
transition_file = os.path.join('..', 'data', 'casino_transitions.csv')

data_df.to_csv(data_file)
emission_df.to_csv(emission_file)
transition_df.to_csv(transition_file)