<a href="https://colab.research.google.com/github/tanthongtan/ptm/blob/master/vptm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hyperparameters

In [None]:
num_topic = 50
dataset = 'nytimes'
method = 'vptm'

#vptm hyperparameters
alpha_scalar = 50./num_topic
v= 0.025
c = 0.02
 
#GMC hyperparameters
num_samples = 1
num_burn = 5000
S = 25000
L = 20
eta_theta = 4e-2
rho_theta = 1e-1
eta_mu = 4e-4
rho_mu = 1e-1
eta_kappa = 2e-1
rho_kappa = 1e-1

# Run GMC Inference

In [None]:
#only for google colab
import sys
import os
if 'google.colab' in sys.modules:
    #lets see what gpu we were given
    !nvidia-smi
    #get repository
    !git clone https://github.com/tanthongtan/ptm.git
    %cd '/content/ptm'
    #get ref corp if doesn't exist
    if not os.path.isdir('wiki_final'):
        !unzip -q "/content/drive/My Drive/wiki_final.zip"
 
import torch
import torch.nn.functional as F
from geodesic import GeodesicMonteCarlo
from dataset import load_data, csr_to_torchsparse
import geodesic as g
import distributions as D
from tqdm.notebook import tqdm
import torch.distributions as dist
import numpy as np
from utils import print_topics, get_topics, vmf_perplexity, clustering_metrics_20news, print_summary
 
#make all tensors cuda if available and double
if torch.cuda.is_available():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    gpu = True
else:
    torch.set_default_tensor_type(torch.DoubleTensor)
    gpu = False
 
#Load Data
data_tr, data_te, vocab, vocab_size, num_tr = load_data(use_tfidf = True, sublinear = False, normalize = True, dataset = dataset)    
tensor_te = csr_to_torchsparse(data_te, gpu)

#declare tensor hyperparameters
alpha = torch.full((1,num_topic), alpha_scalar)
mu0 = F.normalize(torch.full((vocab_size,),-1.0),dim=-1)
 
#randomly initialize model parameters
theta = torch.randn(num_tr,num_topic-1)
mu = F.normalize(torch.randn(num_topic, vocab_size) / (vocab_size ** 0.5) + mu0, p=2, dim=-1)
kappa = torch.randn(num_topic,1)*50 + 50. * (vocab_size ** 0.5) 
 
#declare GMC transition kernels
kernel = GeodesicMonteCarlo(L)
params = {'theta': theta, 'mu':mu, 'kappa': kappa}
init_etas = {'theta': eta_theta, 'mu':eta_mu, 'kappa': eta_kappa}
geodesics = {'theta': g.RnGeodesic(eta = eta_theta, rho = rho_theta), 'mu': g.SphericalGeodesic(eta = eta_mu, rho = rho_mu), 'kappa': g.PositiveGeodesic(eta = eta_kappa, rho = rho_kappa)}
vs = {name: geodesics[name].projection(params[name],dist.MultivariateNormal(torch.zeros(params[name].shape[-1]), torch.eye(params[name].shape[-1])).sample([params[name].shape[0]])) for name in params}
 
#start sampling loop
t = tqdm(range(num_samples+num_burn))
theta_samples = 0
mu_samples = 0
kappa_samples = 0
for i in t:
    
    idx = torch.randperm(num_tr)[:S]
    x_batch = csr_to_torchsparse(data_tr[idx.cpu()], gpu)
    theta = params['theta']
    params['theta'] = theta[idx]
    v_theta = vs['theta']
    vs['theta'] = v_theta[idx]
 
    for name in geodesics:
        geodesics[name].eta = init_etas[name] * ((i+1) ** (-1./5.))
    params, vs = kernel.stochastic_transition(params, vs, geodesics, D.VptmJointDistributionWithStickDirConjugatePrior(x_batch, alpha, c, mu0, v))
    
    theta[idx] = params['theta']
    v_theta[idx] = vs['theta']
    params['theta'] = theta
    vs['theta'] = v_theta
    
    theta = params['theta']
    kappa = params['kappa']
    mu = params['mu']
    
    if torch.any(kappa != kappa):
        break
   
    if i >= num_burn:
        theta_samples += theta
        mu_samples += mu
        kappa_samples += kappa

    if i % 100 == 0:
        print("\ncurrent iteration:", i)
        print("kappa mean",kappa.mean())
        print("kappas",kappa)
        pi = dist.StickBreakingTransform()(theta)
        print("mu norms", mu.norm(dim=-1).sum(), num_topic)
        print("sparsity",(torch.abs(mu)**2.).norm(dim=-1))
        print("sparsitymean",(torch.abs(mu)**2.).norm(dim=-1).mean())
        print("pi sums", pi.sum(dim=-1).sum(), num_tr)

        sum_ll = 0.0
        sum_cs = 0.0
        for j in range(int(np.ceil(num_tr/S))):
            curr_pi = pi[j*S:j*S+S]
            curr_tensor_tr = csr_to_torchsparse(data_tr[j*S:j*S+S], gpu)
            curr_avg = torch.matmul(curr_pi,kappa*mu)
            sum_ll += D.log_prob_von_mises_fisher(curr_avg, curr_tensor_tr).sum()
            curr_avg = F.normalize(curr_avg,dim=-1)
            sum_cs += D.sparse_dense_dot(curr_tensor_tr, curr_avg).sum()
        
        print("log likelihood", sum_ll / num_tr)        
        print("cosine similarity", sum_cs / num_tr)
        print("perplexity", vmf_perplexity(tensor_te, mu, kappa, alpha, N=1000),"\n")
        
    if i % 1000 == 0:
        emb = mu.cpu().numpy()
        print_topics(get_topics(emb,vocab))
        print("")


Tue Oct 13 01:59:31 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

HBox(children=(FloatProgress(value=0.0, max=5001.0), HTML(value='')))


current iteration: 0
kappa mean tensor(5948.0962)
kappas tensor([[5765.4355],
        [5757.1357],
        [5867.1987],
        [5913.7695],
        [5902.4829],
        [5881.2642],
        [5875.5059],
        [5946.0610],
        [5908.0615],
        [5753.5024],
        [5895.1226],
        [5920.4360],
        [5870.4253],
        [5917.7070],
        [5871.7588],
        [5796.0103],
        [5739.5879],
        [5875.3789],
        [5988.4458],
        [5967.6880],
        [5937.4082],
        [5896.0190],
        [5993.0781],
        [5877.8770],
        [5905.3594],
        [5946.0488],
        [5909.6367],
        [5977.3628],
        [5929.0693],
        [6050.0078],
        [5979.5151],
        [6032.4678],
        [5982.9385],
        [5956.3340],
        [5985.7534],
        [5998.7295],
        [5962.0776],
        [5922.4927],
        [6017.2534],
        [5990.8071],
        [6197.8960],
        [6044.3188],
        [6070.0381],
        [6006.3379],
        [5977.2373

# Get Topic Coherence

In [None]:
mu_final = mu_samples / num_samples
kappa_final = kappa_samples / num_samples
theta_final = theta_samples / num_samples
print("final perplexity", vmf_perplexity(tensor_te, mu_final, kappa_final, alpha, N=1000))
emb = mu_final.cpu().numpy()
topics = get_topics(emb, vocab)
print_summary(topics,method,dataset)

if dataset == '20news':
    pi = dist.StickBreakingTransform()(theta_final)
    pi = pi.cpu().numpy()
    clustering_metrics_20news(pi)

final perplexity tensor(-51383.1719)

Method  = vptm
Number of topics = 50
Dataset = nytimes 

 NPMI       TU         Topic
 0.28958    1.00000    en la el que lo una del por se para
 0.07547    0.95000    cup tablespoon teaspoon pepper salt recipe cook gram minute sauce
 0.18237    0.92500    golf woods tournament hole tour par play round pga putt
 0.12772    0.95000    race horse win derby cup racing olympic finish track winner
 0.17049    1.00000    music song album band rock sing record jazz musical singer
 0.04249    0.83333    woman book man life write black war novel world like
 0.11855    0.82000    lakers neal point game nba bryant rebound laker coach jackson
 0.16175    0.86667    iraq saddam iraqi weapon war military hussein united baghdad nuclear
 0.00792    0.95000    bc nyt bloom locator graphic map bos tk chart weekly
 0.21569    0.85000    vote party republican election dole campaign voter senate republicans candidate
 0.11964    0.92500    film movie star actor charact