In [17]:
import numpy as np
import pandas as pd
import scipy.stats as st
import pickle

In [31]:
# Given Parameters
I = 100 # Number of customers
J = 100 # Number of trasactions per customer
K = 2 # Number of types
R = 3 # Regression dimensionality

eta = 1000 # Gamma parameter 1
zeta = 500 # Gamma parameter 2

b = np.random.normal(0, 1, size=(I, R))

lambdas = np.random.exponential(scale=0.2, size=(K, R))

In [32]:
w = np.random.normal(0, scale=lambdas)

logits = np.matmul(w,b.T).T
theta = np.exp(logits)/np.sum(np.exp(logits), axis=1)[:,None]

phi_alpha = np.random.exponential(eta, K)
phi_beta = np.random.exponential(zeta, K)

In [33]:
cid = []
y = []
x = []
for i in range(I):
    alpha = np.dot(phi_alpha, theta[i,:])
    beta = np.dot(phi_beta, theta[i,:])
    for j in range(J):
        cid.append(i)
        y.append(np.random.choice(K, p=theta[i,:]))
        x.append(np.random.gamma(alpha, beta))

In [34]:
x = np.vstack(x)
y = np.vstack(y)
cid = np.vstack(cid)

In [35]:
data = np.hstack([cid,y,x])

In [36]:
data = pd.DataFrame(data=data, columns=['cid', 'y', 'x'])

In [37]:
bs = [b[i,:] for i in range(b.shape[0])]

In [38]:
x = [list(data[data['cid']==i]['x']) for i in range(I)]
y = [list(data[data['cid']==i]['y'].astype(int)) for i in range(I)]

In [39]:
model_data = {
    'I': I,
    'J': J,
    'K': K,
    'R': R,
    'eta': eta,
    'zeta': zeta,
    'b': bs,
    'x': x,
    'y': y,
    'lambda': lambdas
}

In [40]:
with open('./model_data.pickle', 'wb') as f:
    pickle.dump(model_data, f)

In [41]:
init_dict = {
    'omega': w,
    'phi_alpha': phi_alpha,
    'phi_beta': phi_beta,
    'theta': theta
}

In [42]:
with open('./init_dict.pickle', 'wb') as f:
    pickle.dump(init_dict, f)