<a href="https://colab.research.google.com/github/tanthongtan/ptm/blob/master/sam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hyperparameters

In [None]:
num_topic = 50
dataset = 'kos'
method = 'sam'

#model hyperparameters
alpha_scalar = 50./num_topic
c0 = 1000.0
kappa1 = 10000.0

#GMC hyperparameters
num_samples = 1
num_burn = 5000
S = 25000
L = 20
eta_theta = 4e-2
rho_theta = 1e-1
eta_mu = 4e-4
rho_mu = 1e-1

# Run GMC Inference

In [None]:
#only for google colab
import sys
import os
if 'google.colab' in sys.modules:
    #lets see what gpu we were given
    !nvidia-smi
    #get repository
    !git clone https://github.com/tanthongtan/ptm.git
    %cd '/content/ptm'
    #get ref corp if doesn't exist
    if not os.path.isdir('wiki_final'):
        !unzip -q "/content/drive/My Drive/wiki_final.zip"

import torch
import torch.nn.functional as F
from geodesic import GeodesicMonteCarlo
from dataset import load_data, csr_to_torchsparse
import geodesic as g
import distributions as D
from tqdm.notebook import tqdm
import torch.distributions as dist
import numpy as np
from utils import print_topics, get_topics, vmf_perplexity, clustering_metrics_20news, print_summary

#make all tensors cuda if available and double
if torch.cuda.is_available():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    gpu = True
else:
    torch.set_default_tensor_type(torch.FloatTensor)
    gpu = False

#Load Data
data_tr, data_te, vocab, vocab_size, num_tr = load_data(use_tfidf = True, sublinear = False, normalize = True, dataset = dataset)    
tensor_te = csr_to_torchsparse(data_te, gpu)

#declare tensor hyperparameters
alpha = torch.full((1,num_topic), alpha_scalar)
mu0 = F.normalize(torch.full((vocab_size,),-1.0),dim=-1)

#randomly initialize model parameters
theta = torch.randn(num_tr,num_topic-1)
mu = F.normalize(torch.randn(num_topic, vocab_size) / (vocab_size ** 0.5) + mu0, p=2, dim=-1)

#declare GMC transition kernels
kernel = GeodesicMonteCarlo(L)
params = {'theta': theta, 'mu':mu}
init_etas = {'theta': eta_theta, 'mu':eta_mu}
geodesics = {'theta': g.RnGeodesic(eta = eta_theta, rho = rho_theta), 'mu': g.SphericalGeodesic(eta = eta_mu, rho = rho_mu)}
vs = {name: geodesics[name].projection(params[name],dist.MultivariateNormal(torch.zeros(params[name].shape[-1]), torch.eye(params[name].shape[-1])).sample([params[name].shape[0]])) for name in params}

#start sampling loop
t = tqdm(range(num_samples+num_burn))
theta_samples = 0
mu_samples = 0
for i in t:
    idx = torch.randperm(num_tr)[:S]
    x_batch = csr_to_torchsparse(data_tr[idx.cpu()], gpu)
    theta = params['theta']
    params['theta'] = theta[idx]
    v_theta = vs['theta']
    vs['theta'] = v_theta[idx]

    for name in geodesics:
        geodesics[name].eta = init_etas[name] * ((i+1) ** (-1./5.))
    params, vs = kernel.stochastic_transition(params, vs, geodesics, D.SamJointDistributionWithStickDir(x_batch, alpha, c0, mu0, kappa1))
    
    theta[idx] = params['theta']
    v_theta[idx] = vs['theta']
    params['theta'] = theta
    vs['theta'] = v_theta
    
    theta = params['theta']
    mu = params['mu']

    if torch.any(mu != mu):        
        break    
   
    if i >= num_burn:
        theta_samples += theta
        mu_samples += mu

    if i % 100 == 0:
        print("\ncurrent iteration:", i)         
        print("mu norms", mu.norm(dim=-1).sum(), num_topic)        
        print("sparsity",(torch.abs(mu)**2.).norm(dim=-1))
        print("sparsitymean",(torch.abs(mu)**2.).norm(dim=-1).mean())
        pi = dist.StickBreakingTransform()(theta)
        print("pi sums", pi.sum(dim=-1).sum(), num_tr)         
        
        sum_ll = 0.0
        sum_cs = 0.0
        for j in range(int(np.ceil(num_tr/S))):
            curr_pi = pi[j*S:j*S+S]
            curr_tensor_tr = csr_to_torchsparse(data_tr[j*S:j*S+S], gpu)
            curr_avg = F.normalize(torch.matmul(curr_pi,mu), p=2, dim=-1)
            sum_ll += D.log_prob_von_mises_fisher(kappa1 * curr_avg, curr_tensor_tr).sum()
            sum_cs += D.sparse_dense_dot(curr_tensor_tr, curr_avg).sum()

        print("log likelihood", sum_ll / num_tr)        
        print("cosine similarity", sum_cs / num_tr)
        print("perplexity", vmf_perplexity(tensor_te, mu, kappa1, alpha, N=1000),"\n")
                
    if i % 1000 == 0:
        emb = mu.cpu().numpy()
        print_topics(get_topics(emb,vocab))
        print("")

Sun Oct  4 10:46:35 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    24W / 300W |      0MiB / 16130MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

HBox(children=(FloatProgress(value=0.0, max=5001.0), HTML(value='')))


current iteration: 0
mu norms tensor(50.) 50
sparsity tensor([0.0413, 0.1120, 0.0445, 0.0320, 0.0814, 0.3311, 0.0458, 0.0364, 0.1070,
        0.0591, 0.0551, 0.0452, 0.0607, 0.0738, 0.0807, 0.0666, 0.0662, 0.0845,
        0.0906, 0.0930, 0.0847, 0.0990, 0.1007, 0.1053, 0.1057, 0.1104, 0.1011,
        0.0976, 0.1006, 0.1028, 0.0937, 0.0993, 0.1006, 0.0873, 0.0914, 0.0856,
        0.0867, 0.0820, 0.0789, 0.0715, 0.0632, 0.0626, 0.0523, 0.0482, 0.0441,
        0.0400, 0.0338, 0.0296, 0.0290, 0.0254])
sparsitymean tensor(0.0784)
pi sums tensor(2916.) 2916
log likelihood tensor(18123.4531)
cosine similarity tensor(0.2080)
perplexity tensor(-18006.6777) 

bush iraq people time president war party states campaign democratic
november kerry poll bush senate polls house account electoral governor
bush kerry iraq campaign republican bushs administration party president security
poll november war kerry iraq vote dean percent american senate
november bush kerry poll polls iraq account campaign ele

# Get Topic Coherence

In [None]:
mu_final = mu_samples / num_samples
theta_final = theta_samples / num_samples
print("final perplexity", vmf_perplexity(tensor_te, mu_final, kappa1, alpha, N=1000))
emb = mu_final.cpu().numpy()
topics = get_topics(emb, vocab)
print_summary(topics,method,dataset)

if dataset == '20news':
    pi = dist.StickBreakingTransform()(theta_final)
    pi = pi.cpu().numpy()
    clustering_metrics_20news(pi)

final perplexity tensor(-18619.0996)

Method  = sam
Number of topics = 50
Dataset = kos 

 NPMI       TU         Topic
 0.17908    0.90000    iraqi baghdad fallujah iraq forces killed iraqis shiite city troops
-0.00857    0.95000    november turnout duderino parecommend trouble ground exit zogby con west
 0.05006    0.67500    administration intelligence commission officials white report cia attacks clarke house
 0.06646    0.58333    states bin united laden america air space saddam terrorists south
 0.04781    0.76667    sunday john sens edition adviser sun cbs cnn powell fox
 0.02665    0.63667    november voting account electoral governor sunzoo house polls senate liberalrakkasan
 0.09055    0.82500    marriage gay amendment rights republicans issue ban hate samesex unions
 0.08701    0.60333    district race seat schrader house gop alexander elections democrat republican
 0.10113    0.88333    jobs job growth economy report economic employment labor workers numbers
 0.03327    0.68