In [1]:
import numpy as np
import pandas as pd


 This function uses baseline probablities to create covariates for the parameter list and returns kappa. 

## Construction of the topical content variable kappa

- kappa_t has dimension KxV 
- kappa_c has dimension AxV
- kappa_i has dimension AxKxV

The initialisation of kappa depends therefore on the number of topics (K), the number of content covariates (A) 
and the vocabulary length (V). 

In [None]:
# Kappa initialization

### Python implementation
def kappa_init(documents, K ,V, A, interactions): 
    kappa.out = []
    #Calculate the baseline log-probability (m)
    freq = matrix(unlist(documents),nrow=2) #break it into a matrix
    freq = split(freq[2,], freq[1,]) #shift into list by word type
    m = unlist(lapply(freq, sum)) #sum over the word types
    m = m/sum(m)
    #m <- log(m)
    m = np.log(m) - np.log(np.mean(m)) #logit of m
    kappa.out.append(m)
    
    #Defining parameters
    aspectmod = A > 1 # if there is more than one topical content variable
    if(aspectmod):
        interact = interactions # allow for the choice to interact
    else:
        interact = FALSE
    
    #Create the parameters object
    parLength = K + A * aspectmod + (K*A)*interact
    kappa.out['params'] = vector(mode="list",length=parLength)
    for i in range(len(kappa.out['params'])):
        kappa.out.params[[i]] = np.repeat(0, V)
    
    #Create a running sum of the kappa parameters starting with m
    kappa.out['kappasum'] = vector(mode="list", length=A)
    for a in range(A):
        kappa.out$kappasum[[a]] <- matrix(m, nrow=K, ncol=V, byrow=TRUE)
    
    #create covariates. one element per item in parameter list.
    #generation by type because its conceptually simpler
    if(!aspectmod & !interact):
        kappa.out['covar'] = list(k=1:K, a=rep(NA, parLength), type=rep(1,K))
    if(aspectmod & !interact):
        kappa.out['covar'] = list(k=c(1:K,rep(NA,A)),
                                  a=c(rep(NA, K), 1:A),
                                  type=c(rep(1,K), rep(2,A)))      
    if(interact):
        kappa.out['covar'] = list(k=c(1:K,np.repeat(NA,A), np.repeat(1:K,A)), 
                                  a=c(np.repeat(NA, K), 1:A, np.repeat(1:A,each=K)),
                                  type=c(np.repeat(1,K), np.repeat(2,A), np.repeat(3,K*A)))
    
    return(kappa.out)

### R implementation
kappa.init <- function(documents, K, V, A, interactions) {
  kappa.out <- list()
  #Calculate the baseline log-probability (m)
  freq <- matrix(unlist(documents),nrow=2) #break it into a matrix
  freq <- split(freq[2,], freq[1,]) #shift into list by word type
  m <- unlist(lapply(freq, sum)) #sum over the word types
  m <- m/sum(m)
  #m <- log(m)
  m <- log(m) - log(mean(m)) #logit of m
  kappa.out$m <- m
  
  #Defining parameters
  aspectmod <- A > 1
  if(aspectmod) {
    interact <- interactions 
  } else {
    interact <- FALSE
  }
  
  #Create the parameters object
  parLength <- K + A*aspectmod + (K*A)*interact
  kappa.out$params <- vector(mode="list",length=parLength)
  for(i in 1:length(kappa.out$params)) {
    kappa.out$params[[i]] <- rep(0, V)
  }
  
  #Create a running sum of the kappa parameters starting with m
  kappa.out$kappasum <- vector(mode="list", length=A)
  for (a in 1:A) {
    kappa.out$kappasum[[a]] <- matrix(m, nrow=K, ncol=V, byrow=TRUE)
  }
  
  #create covariates. one element per item in parameter list.
    #generation by type because its conceptually simpler
  if(!aspectmod & !interact) {
    kappa.out$covar <- list(k=1:K, a=rep(NA, parLength), type=rep(1,K))
  }
  if(aspectmod & !interact) {
    kappa.out$covar <- list(k=c(1:K,rep(NA,A)), a=c(rep(NA, K), 1:A), type=c(rep(1,K), rep(2,A)))      
  }
  if(interact) {
    kappa.out$covar <- list(k=c(1:K,rep(NA,A), rep(1:K,A)), 
                        a=c(rep(NA, K), 1:A, rep(1:A,each=K)), 
                        type=c(rep(1,K), rep(2,A), rep(3,K*A)))            
  }
  return(kappa.out)
}