# Import

In [None]:
# importing
import numpy as np
from scipy.special import binom

import itertools

import matplotlib.pyplot as plt
import matplotlib

# showing figures inline
%matplotlib inline

In [None]:
# plotting options 
font = {'size'   : 20}
plt.rc('font', **font)
plt.rc('text', usetex=True)

matplotlib.rc('figure', figsize=(18, 6) )

# Here we go

### define parameters

In [None]:
# parameteres

# get all types of a die with N_dice sides
N_dice = 3
p_dice = np.array( [ .5, .25, .25 ] )

# sequence length
N_sequence = 7


### get all sequences and their probability

In [None]:
# helper function for getting all sequences
def cartesian_product_nfold(n, k):
    """
    Compute the N-fold Cartesian product {1,...,k}}
    
    IN: n : The number of times to repeat 
        k: The range of values
    """

    grids = np.meshgrid( *[np.arange( 1, k+1)] * n, indexing='ij')

    return np.stack(grids, axis=-1).reshape(-1, n)

In [None]:
# find number of sequences, sequences themselves, and get their probability
numb_sequences = N_dice ** N_sequence

sequences = np.zeros( ( numb_sequences, N_sequence ))
probability_of_sequences = np.zeros( numb_sequences ) 

for ind_seq, seq in enumerate( itertools.product( np.arange(1, N_dice+1,dtype=int), repeat = N_sequence ) ):
    sequences[ ind_seq, : ] = seq
    probability_of_sequences[ ind_seq ] = np.prod( p_dice[ np.array(seq) - 1 ] )


In [None]:
# find number of sequences, sequences themselves, and get their probability
sequence_np = cartesian_product_nfold( N_sequence, N_dice )

probability_of_sequences = np.zeros( numb_sequences ) 
for _n in range( np.shape( sequence_np )[0] ):
    probability_of_sequences[ _n ] = np.prod( p_dice[ sequence_np[_n] - 1 ] )

### reduce to types and find size of type classes

In [None]:
# now identify the different types 

# sort arrays (by lexographic order)
sorted_numpy_array = np.sort( sequences )

# use numpy's unique to remove duplicates and count them
type_classes_sorted, size_of_type_class = np.unique( sorted_numpy_array, axis=0, return_counts=True )

print( 'Total number of sequences: \t', numb_sequences)
print( 'Different types: \t\t', len( type_classes_sorted ) )


### get bounds for size of type classes

In [None]:
size_of_type_class_lower_bound = np.zeros( len( type_classes_sorted ) )
size_of_type_class_upper_bound = np.zeros( len( type_classes_sorted ) )

# pass through type classes, get empirical prob., and get entropy
for ind_type_class, val_type_class in enumerate( type_classes_sorted ):

    empirical_distribution = np.zeros( N_dice )

    for _n in range( N_dice ):
        empirical_distribution[ _n ] = np.count_nonzero( val_type_class == _n + 1) / N_sequence

    empirical_distribution_non_zero = empirical_distribution[ empirical_distribution != 0]
    H_type_class = - np.sum(  empirical_distribution_non_zero * np.log2( empirical_distribution_non_zero ) )

    size_of_type_class_lower_bound[ ind_type_class ] = 1 / ( N_sequence + 1 )**N_dice * 2**( N_sequence * H_type_class )
    size_of_type_class_upper_bound[ ind_type_class ] = 2**( N_sequence * H_type_class )



In [None]:
plt.semilogy( size_of_type_class, label='elements in type class' )
plt.semilogy( size_of_type_class_lower_bound, label='lower bound')
plt.semilogy( size_of_type_class_upper_bound, label='upper bound')

plt.xlabel('type classes in lexographic order')
plt.legend()
plt.grid(1)
plt.show()

In [None]:
### probability of type classes and bounds
prob_of_type_class = np.zeros( len( type_classes_sorted ) )
prob_of_type_class_lower_bound = np.zeros( len( type_classes_sorted ) )
prob_of_type_class_upper_bound = np.zeros( len( type_classes_sorted ) )


# pass through type classes, get empirical prob., and get entropy
for ind_type_class, val_type_class in enumerate( type_classes_sorted ):

    # prob of a type class
    prob_of_type_class_pointwise = p_dice[ np.array( val_type_class - 1 ).astype(int) ]
    prob_of_type_class[ ind_type_class ] = np.prod( prob_of_type_class_pointwise ) * size_of_type_class[ ind_type_class ]


    # get empirical distribution of a type class
    empirical_distribution = np.zeros( N_dice )
    for _n in range( N_dice ):
        empirical_distribution[ _n ] = np.count_nonzero( val_type_class == _n + 1) / N_sequence

    # find non-zero values and slice the according prob vactors
    indices_empirical_distribution_non_zero = np.where( empirical_distribution != 0 )
    empirical_distribution_non_zero = empirical_distribution[ indices_empirical_distribution_non_zero ]
    p_dice_non_zero = p_dice[ indices_empirical_distribution_non_zero ]

    # determine KL divergence
    D_PQ = np.sum( empirical_distribution_non_zero * np.log2( empirical_distribution_non_zero / p_dice_non_zero ) )
    D_PQ_non_zero = D_PQ[ D_PQ != 0 ]

    # get bounds as described in the lecture
    prob_of_type_class_lower_bound[ ind_type_class ] = 1 / ( N_sequence + 1 )**N_dice * 2**( - N_sequence * D_PQ )
    prob_of_type_class_upper_bound[ ind_type_class ] = 2**( - N_sequence * D_PQ )



In [None]:
plt.semilogy( prob_of_type_class, label='prob. of type class' )
plt.semilogy( prob_of_type_class_lower_bound, label='lower bound')
plt.semilogy( prob_of_type_class_upper_bound, label='upper bound')

plt.xlabel('type classes in lexographic order')
plt.legend()
plt.grid(1)
plt.show()

In [None]:

# print(result)
#print(result)

In [None]:
print(sequences)