# Comparison of Partial and Loc-scale Flows

## Imports

In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import time

In [2]:
import edward2 as ed

In [3]:
import sys
sys.path.append('../mdnf')

In [4]:
import aux
import flows
import flows_edward2 as fed
import flows_edward2_made as made
from flows_factorized import DiscreteFactorizedFlowPartial

from flows_transformations import CopiableMADELocScale

## Configuration

In [5]:
# can be run as a script with args in format KEY=VAL,KEY=[STRVAL],...
args = aux.parse_script_args() 

In [6]:
PARTIAL_FLOWS = bool(args.get("PARTIAL_FLOWS", 1)) # 1 = use partial flows / 0 = use loc-scale flows


In [7]:
SEED = args.get("SEED", 12411)
K = args.get("K", 5)
NSAMPLES = args.get("NSAMPLES", 1000)
NITER = args.get("NITER", 10000)
t = args.get("TEMP", 1.0)
B = args.get("B", 10)

OUT = args.get("OUT", "PARTIAL_%s_%s_%s_%s.csv" % (K, int(PARTIAL_FLOWS), B, SEED))
print("Results output file: %s" % OUT)

Results output file: PARTIAL_5_1_10_12411.csv


## Target distribution (sorted)

In [8]:
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [9]:
# 'true' data generating distribution -> ordered probs
probs = np.array(np.arange(1,K+1), dtype=float)
probs /= np.sum(probs)

target = tfp.distributions.OneHotCategorical(probs = [probs])
np.round(target.probs, 2)

array([[0.07, 0.13, 0.2 , 0.27, 0.33]], dtype=float32)

## Base distibution (shuffled)

In [10]:
source_probs = np.array(probs[-1: :-1])
np.random.shuffle(source_probs)
base = tfp.distributions.OneHotCategorical(probs = [source_probs])
print("Base probs: %s" % np.round(base.probs, 2))

Base probs: [[0.2  0.07 0.33 0.27 0.13]]


## Flows

In [11]:
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [12]:
if PARTIAL_FLOWS:
    # Partial flows
    category_pairs = [[i,i+1] for u in range(K-1,0,-1) for i in range(0,u)]    
    layers = [DiscreteFactorizedFlowPartial(1,K, pair, temperature=t) for pair in category_pairs]
else:
    # Loc-scale Tran's flows
    layers = []
    for _ in range(B):
        made_class = CopiableMADELocScale
        network = made_class(K*2, hidden_dims=[], hidden_order="left-to-right") 
        flow = ed.layers.DiscreteAutoregressiveFlow(network, temperature=t)
        layers.append(flow)
    #layers = [DiscreteFactorizedFlowLocScale(1, K, temperature=t) for _ in range(B)]

In [13]:
print("Using %s layers: %s..." % (len(layers), str(layers)[:100]))
flow = flows.DiscreteFlow(layers=layers)

# initial output distribution of a flow
outprobs = tf.reduce_mean(flow(tf.cast(base.sample(NSAMPLES), 'float32')), 0)
print("Initial probs: %s" % outprobs)

Using 10 layers: [<flows_factorized.DiscreteFactorizedFlowPartial object at 0x7f4242277d10>, <flows_factorized.Discre...
Initial probs: tf.Tensor([[0.069 0.186 0.329 0.262 0.154]], shape=(1, 5), dtype=float32)


## Optimization

In [14]:
optimizer = tf.keras.optimizers.Adam(lr=0.1)
is_sorted = False
start = time.time()
for i in range(NITER):
    with tf.GradientTape() as tape:         
        # our 'features'
        target_samples = tf.cast(target.sample(NSAMPLES), 'float32') # cast to the right type

        # move samples to the space where we know how to evaluate probabilities
        reversed_target_samples = flow.reverse(target_samples)
        
        # evaluate log-probs of the samples (output shape=batch x N)
        # (i.e., log_probs = base.log_prob(reversed_target_samples) )
        probs = tf.reduce_sum(reversed_target_samples*base.probs, -1)
        log_probs = tf.math.log(probs+1e-12)    
        
        # independent variables -> we just sum up log-probs 
        # to get joint log prob of a N-dim sample
        log_probs = tf.reduce_sum(log_probs, -1) 
                
        # loss = minus average log-likelihood
        loss = -tf.reduce_mean(log_probs) 

        outprobs = tf.reduce_mean(flow(tf.cast(base.sample(NSAMPLES), 'float32')), 0)
        
        is_sorted = (sorted(outprobs.numpy()[0])==outprobs.numpy()[0]).all()

        if i%10==0 or i<10 or is_sorted:        
            print("[%.1fs] iter=%i loss=%.3f recovered=%s" % \
                  (time.time()-start, i, loss, np.round(outprobs, 2)))
        
        if is_sorted:
            print("Recovered in %i iterations" % (i+1))
            break
                                  
            
    gradients = tape.gradient(loss, flow.trainable_variables)        
    optimizer.apply_gradients(zip(gradients, flow.trainable_variables))
    

[0.0s] iter=0 loss=1.624 recovered=[[0.07 0.2  0.33 0.26 0.14]]
[0.1s] iter=1 loss=1.672 recovered=[[0.2  0.08 0.32 0.13 0.27]]
[0.2s] iter=2 loss=1.564 recovered=[[0.06 0.2  0.33 0.14 0.27]]
[0.3s] iter=3 loss=1.605 recovered=[[0.06 0.21 0.33 0.13 0.27]]
[0.4s] iter=4 loss=1.616 recovered=[[0.07 0.31 0.21 0.14 0.27]]
[0.5s] iter=5 loss=1.627 recovered=[[0.06 0.34 0.18 0.15 0.26]]
[0.6s] iter=6 loss=1.663 recovered=[[0.06 0.34 0.2  0.28 0.13]]
[0.7s] iter=7 loss=1.690 recovered=[[0.06 0.33 0.2  0.29 0.12]]
[0.8s] iter=8 loss=1.682 recovered=[[0.06 0.34 0.19 0.27 0.14]]
[0.9s] iter=9 loss=1.732 recovered=[[0.32 0.06 0.22 0.16 0.25]]
[0.9s] iter=10 loss=1.688 recovered=[[0.22 0.07 0.29 0.15 0.27]]
[1.8s] iter=20 loss=1.659 recovered=[[0.15 0.19 0.05 0.32 0.28]]
[2.8s] iter=30 loss=1.527 recovered=[[0.07 0.2  0.14 0.31 0.27]]
[3.6s] iter=40 loss=1.536 recovered=[[0.08 0.2  0.12 0.34 0.26]]
[4.5s] iter=50 loss=1.817 recovered=[[0.33 0.21 0.07 0.12 0.27]]
[5.3s] iter=60 loss=1.797 recovered

## Store results

In [15]:
f = open(OUT, "w")
fmt = ("%s,"*8+"%s\n")
f.write((fmt % (K,NSAMPLES,NITER,SEED,t,B,int(PARTIAL_FLOWS),time.time()-start,i)))
f.close()