In [1]:
# import sys
# sys.path.append("../../")
import libspn as spn
import numpy as np
import tensorflow as tf

In [2]:
num_decomps = 2
num_subsets = 2
num_input_mixtures = 2
num_roots = 1
num_mixtures = 2
input_dist = spn.DenseSPNGeneratorLayerNodes.InputDist.RAW

# Additive smoothing during learning
additive_smoothing=10
min_additive_smoothing=1
smoothing_decay=0.2

# Weight initialization
# weight_init_value = 1
weight_init_value = spn.ValueType.RANDOM_UNIFORM(1, 2)

# Type of inference during upward pass of learning
value_inference_type = spn.InferenceType.MARGINAL

In [3]:
male_data = []
male_labels = np.concatenate([np.ones(330) * i for i in range(10)])
female_data = []
female_labels = np.concatenate([np.ones(330) * i for i in range(10)])
with open('/home/jos/datasets/arabic/Train_Arabic_Digit.txt') as f:
    count = 0
    block = []
    for line in f.readlines():
        words = line.split(' ')
        if len(words) == 13 and all(words):
            block.append([float(w) for w in words])
        else:
            if block:
                if (count // 330) % 2 == 0:
                    male_data.append(block)
                else:
                    female_data.append(block)
            count += 1
            block = []
    if block:
        female_data.append(block)
            
# import pandas as pd

# train_data = pd.read_csv('/home/jos/datasets/arabic/pendigits.tra', delimiter=',', header=None)

In [4]:
all_data = male_data + female_data


In [5]:
def sequence_lens(sequences):
    return [len(seq) for seq in sequences]

def pad_sequences(sequences, sequence_maxlen, padding_val=-1):
    ret = []
    for seq in sequences:
        arr = np.asarray(seq)
        ret.append(np.concatenate([
            padding_val * np.ones((sequence_maxlen - len(seq),) + arr.shape[1:]),
            arr
        ]))
    return np.asarray(ret)



In [6]:
data_stacked = np.concatenate(all_data, axis=0)
print(data_stacked.shape)
padded = pad_sequences(all_data[:2], 93)

(263256, 13)


In [None]:
print(padded)

In [None]:
tf.reset_default_graph()
dense_gen = spn.DenseSPNGeneratorLayerNodes(num_decomps=num_decomps, num_subsets=num_subsets, num_mixtures=num_mixtures, 
                                            input_dist=input_dist, num_input_mixtures=num_input_mixtures,
                                            node_type=spn.DenseSPNGeneratorLayerNodes.NodeType.LAYER)

contvars = spn.GaussianLeaf(
    max_steps=93, num_vars=13, num_components=2, data=data_stacked, dynamic=True, use_prior=True,
    learn_dist_params=True)

In [None]:
root = dense_gen.generate(contvars)
template_heads = root.values

In [None]:
print(template_heads[0].node._num_or_size_prods)
print(root)

In [17]:
tf.reset_default_graph()
dense_gen = spn.DenseSPNGeneratorLayerNodes(num_decomps=num_decomps, num_subsets=num_subsets, num_mixtures=num_mixtures, 
                                            input_dist=input_dist, num_input_mixtures=num_input_mixtures, 
                                            node_type=spn.DenseSPNGeneratorLayerNodes.NodeType.LAYER)

contvars = spn.GaussianLeaf(
    max_steps=93, num_vars=13, num_components=2, data=data_stacked, dynamic=True, use_prior=True,
    learn_dist_params=True, total_counts_init=100)

top_per_digit = []
for d in range(10):
    root = dense_gen.generate(contvars)
#     print(root.values[0].node)
    template_head = root.values[0].node
    
    interface = spn.DynamicInterface(name="Interface")
    interface.set_source(template_head)
    
    interface_mixtures = spn.ParSums(interface, num_sums=template_head.num_prods, name="InterfaceMixtures", interface_head=True)
    
    prod_sizes = template_head.num_or_size_prods
    offset = 0
    new_inputs, new_prod_sizes = [], []
    for i, size in enumerate(prod_sizes):
        new_inputs.extend(template_head.inputs[offset:offset+size])
        new_inputs.append(spn.Input(node=interface_mixtures, indices=i))
        new_prod_sizes.append(size + 1)
        offset += size
    template_head.set_values(*new_inputs)
    template_head.set_prod_sizes(new_prod_sizes)
    
    top_per_digit.append(root)
    #     top_per_digit.append(spn.Sum(*template_heads, name="Top{}".format(d)))
    
root = spn.Sum(*top_per_digit)
spn.generate_weights(root, init_value=weight_init_value)
latent = root.generate_ivs()

print(root.get_num_nodes())


154


In [9]:
%load_ext autoreload
%autoreload 2

In [18]:
sequence_len_ph = tf.placeholder(tf.int32, [None])
additive_smoothing_var = tf.Variable(additive_smoothing, dtype=spn.conf.dtype)
learning = spn.EMLearning(root, log=True, value_inference_type = value_inference_type,
                          additive_smoothing=additive_smoothing_var, sequence_lens=sequence_len_ph,
                          initial_accum_value=100)
init_weights = spn.initialize_weights(root)
reset_accumulators = learning.reset_accumulators()
accumulate_updates = learning.accumulate_updates()
update_spn = learning.update_spn()
train_likelihood = learning.likelihood()
avg_train_likelihood = tf.reduce_mean(train_likelihood)

In [24]:
seq_lens = sequence_lens(all_data)
train_set = pad_sequences(all_data, 93).transpose((1, 0, 2))
train_labels = np.expand_dims(np.concatenate([male_labels, female_labels]), 1)

In [25]:
print(np.asarray(seq_lens).shape, train_set.shape, train_labels.shape)

(6600,) (93, 6600, 13) (6600, 1)


In [20]:
sess = None
from tqdm import tqdm_notebook
from sklearn.utils import shuffle

In [None]:

if sess is not None:
    sess.close()
sess = tf.Session()
    
sess.run(init_weights)
sess.run(reset_accumulators)

num_batches=64
batch_size = train_set.shape[1] // num_batches
prev_likelihood = 100
likelihood = 0
for epoch in range(num_epochs):
    prev_likelihood=likelihood
    likelihoods = []
    pbar = tqdm_notebook(range(num_batches))
    train_set, train_labels = shuffle(train_set.transpose((1, 0, 2)), train_labels)
    train_set = train_set.transpose((1, 0, 2))
    for batch in pbar:
        start = (batch)*batch_size
        stop = (batch+1)*batch_size
        # Adjust smoothing
        ads=max(np.exp(-epoch*smoothing_decay)*additive_smoothing, min_additive_smoothing)
        sess.run(additive_smoothing_var.assign(ads))
        # Run accumulate_updates
        train_likelihoods_arr, avg_train_likelihood_val, _, = \
                sess.run([train_likelihood, avg_train_likelihood, accumulate_updates],
                        feed_dict={contvars: train_set[:, start:stop],
                                   latent: train_labels[start:stop],
                                   sequence_len_ph: seq_lens[start:stop]})
        # Print avg likelihood of this batch data on previous batch weights
        likelihoods.append(avg_train_likelihood_val)
        pbar.set_description("lh: {}".format(avg_train_likelihood_val))
        # Update weights
        sess.run(update_spn)
    likelihood = sum(likelihoods) / len(likelihoods)
    print("Avg likelihood: %s" % (likelihood))
    sess.run(reset_accumulators)
    

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))



Avg likelihood: -737.9169101715088


HBox(children=(IntProgress(value=0, max=64), HTML(value='')))