In [1]:
#####Modelling Reciprocating Relationships with Hawkes Processes#####
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import numpy.matlib
import scipy.linalg
import itertools
import seaborn as sns
from scipy import sparse
from scipy.stats import norm
from pandas.tools.plotting import scatter_matrix
from numpy.random import *
from scipy import optimize

#np.random.seed(98537)

In [2]:
##多項分布の乱数を生成する関数
def rmnom(pr, n, k, no, pattern):
    z_id = np.argmax((np.cumsum(pr, axis=1) >= np.random.uniform(0, 1, n)[:, np.newaxis]), axis=1)
    return z_id
    if pattern==1:
        Z = sparse.coo_matrix((np.repeat(1, n), (no, np.array(z_id))), shape=(n, k))   #スパース行列の設定
        return z_id, Z

In [3]:
####データの発生####
##データの設定
k = 10
k_vec = np.repeat(1, k)
hh = 5000
item = 3000
Lambda = np.random.gamma(30.0, 1/0.2, hh)
pt = np.random.poisson(Lambda, hh)
hhpt = np.sum(pt)

In [4]:
##IDとインデックスを設定
#IDを設定
d_id = np.repeat(range(hh), pt)
pt_id = np.array(list(itertools.chain(*[np.array(range(pt[i]), dtype="int") for i in range(hh)])))

#インデックスを設定
d_list = [i for i in range(hh)]
d_vec = [i for i in range(hh)]
pt_list = [j for j in range(np.max(pt))]
pt_n = np.repeat(0, np.max(pt))
for i in range(hh):
    d_list[i] = np.array(np.where(d_id==i)[0], dtype="int")
    d_vec[i] = np.repeat(1, pt[i])
for j in range(np.max(pt)):
    pt_list[j] = np.array(np.where(pt_id==j)[0], dtype="int")
    pt_n[j] = pt_list[j].shape[0]
max_index = np.array([np.max(d_list[i]) for i in range(hh)])

In [18]:
##アイテムの割当を生成
#セグメント割当を生成
topic = 25
phi = np.random.dirichlet(np.repeat(0.5, item), topic)
theta = np.random.dirichlet(np.repeat(2.5, topic), hh)
z = np.dot(np.array([np.random.multinomial(1, theta[i, :], 1) for i in range(hh)]).reshape(hh, topic), range(topic))

#多項分布からアイテムを生成
item_id1 = np.zeros(hhpt, dtype='int')
for i in range(hh):
    if i%1000==0:
        print(i)
    item_id1[d_list[i]] = np.dot(np.random.multinomial(1, phi[z[i], :], pt[i]), range(item))
    
#インデックスの設定
item_list1 = [j for j in range(item)]
item_vec1 = [j for j in range(item)]
item_n1 = np.repeat(0, item)
for j in range(item):
    item_list1[j] = np.array(np.where(item_id1==j)[0], dtype="int")
    item_vec1[j] = np.repeat(1, item_list1[j].shape[0])
    item_n1[j] = len(item_list1[j])

0
1000
2000
3000
4000


In [241]:
##アイテム購買履歴を定義
#データの設定
get = 20
item_id2 = np.array(np.full((hhpt, get), item), dtype="int16")

#アイテムごとに購買履歴を格納
for i in range(hh):
    x = d_list[i]
    for j in range(x.shape[0]):
        index = np.arange(j-get, j)[::-1]; index = index[index >= 0]
        item_id2[x[j], np.arange(index.shape[0])] = item_id1[x[index]]
Z = ((np.arange(get) + 1)[::-1] / get) * np.array(item_id2!=item, dtype="int")

#インデックスを作成
index_history = [j for j in range(get)]
item_list21 = [i for i in range(item)]
item_list22 = [i for i in range(item)]
d_list2 = [i for i in range(item)]
for j in range(get):
    index_history[j] = np.array(np.where(item_id2[:, j]!=item)[0], dtype="int")
for i in range(item):
    temp_list1 = [j for j in range(get)]
    temp_list2 = [j for j in range(get)]
    for j in range(get):
        temp_list1[j] = np.array(np.where(item_id2[:, j]==i)[0], dtype="int")
        temp_list2[j] = d_id[temp_list1[j]]
    item_list21[i] = temp_list1
    item_list22[i] = np.array(np.unique(np.where(item_id2==i)[0]), dtype="int")
    d_list2[i] = temp_list2

In [20]:
##妥当なパラメータが生成するまで反復
rp = 0
while True:
    rp = rp + 1
    
    ##モデルパラメータの生成
    #事前分布の設定
    alpha11 = 0.3; beta11 = 0.5
    alpha12 = 0.3; beta12 = 0.5
    alpha21 = 0.1; beta21 = 0.75
    alpha22 = 0.1; beta22 = 0.75
    tau = 0.25

    #モデルパラメータを生成
    theta_u1 = np.random.gamma(alpha11, 1/beta11, k*hh).reshape(hh, k)
    theta_v1 = np.random.gamma(alpha12, 1/beta12, k*item).reshape(item, k)
    theta_u2 = np.random.gamma(alpha21, 1/beta21, k*hh).reshape(hh, k)
    theta_v2 = np.random.gamma(alpha22, 1/beta22, k*item).reshape(item, k)
    gamma = np.random.normal(0, tau, hh)
    thetat_u1 = theta_u1.copy(); thetat_v1 = theta_v1.copy()
    thetat_u2 = theta_u2.copy(); thetat_v2 = theta_v2.copy()
    gammat = gamma.copy()
    
    ##応答変数を生成
    #モデルの期待値    
    uv1 = np.dot(theta_u1[d_id, ] * theta_v1[item_id1, ], k_vec)
    gamma_vec = gamma[d_id]; theta_user2 = theta_u2[d_id, ]
    uv2 = np.repeat(0.0, hhpt)
    for j in range(get):
        index = index_history[j]
        uv2[index] += np.dot(theta_user2[index, ] * theta_v2[item_id2[index, j], ], k_vec) * np.exp(gamma_vec[index] * Z[index, j])
    mu = uv1 + uv2

    #指数分布から応答変数を生成
    y = np.random.exponential(mu, hhpt)
    y_vec = y[:, np.newaxis]
    print([rp, np.round(np.max(y), 1)])
    if np.max(y) < 400:
        break

[1, 448.6]
[2, 599.6]
[3, 499.3]
[4, 408.7]
[5, 884.3]
[6, 431.8]
[7, 613.9]
[8, 532.1]
[9, 522.8]
[10, 454.3]
[11, 589.1]
[12, 585.4]
[13, 483.2]
[14, 733.6]
[15, 402.4]
[16, 481.2]
[17, 556.0]
[18, 441.5]
[19, 483.4]
[20, 527.3]
[21, 536.0]
[22, 524.3]
[23, 652.5]
[24, 736.9]
[25, 518.0]
[26, 530.6]
[27, 830.3]
[28, 495.8]
[29, 892.2]
[30, 704.5]
[31, 403.6]
[32, 572.7]
[33, 558.9]
[34, 722.7]
[35, 454.0]
[36, 451.6]
[37, 957.0]
[38, 605.9]
[39, 553.7]
[40, 918.3]
[41, 418.5]
[42, 500.6]
[43, 521.5]
[44, 677.6]
[45, 497.6]
[46, 825.0]
[47, 409.6]
[48, 456.8]
[49, 562.5]
[50, 424.5]
[51, 492.7]
[52, 494.1]
[53, 563.5]
[54, 795.5]
[55, 405.1]
[56, 574.1]
[57, 838.9]
[58, 381.4]


In [21]:
####マルコフ連鎖モンテカルロ法でパラメータを推定####
##切断指数分布の乱数を生成する関数
def rtexp(gamma, a, b):
    #切断指数分布の乱数を生成
    FA = scipy.stats.expon.cdf(a, scale=gamma)
    FB = scipy.stats.expon.cdf(b, scale=gamma)
    par = scipy.stats.expon.ppf(np.random.uniform(0, 1, a.shape[0])*(FB-FA)+FA, scale=gamma)
    return par

In [22]:
##アルゴリズムの設定
R = 2000
keep = 4
burnin = int(500/keep)
iter = 0
disp = 10
e1 = 0.001
e2 = 0.0025
L = 3

In [74]:
##データの設定
#インデックスの作成
item_get = [j for j in range(get)]
z_get = [j for j in range(get)]
for j in range(get):
    index = index_history[j]
    item_get[j] = item_id2[index, j]
    z_get[j] = Z[index, j]

In [75]:
##事前分布の設定
alpha = 1.0
beta = 1.0
s0 = 1.0; v0 = 1.0

In [433]:
##パラメータの真値
#ガンマ分布のパラメータ
alpha11 = 0.3; beta11 = 0.5
alpha12 = 0.3; beta12 = 0.5
alpha21 = 0.1; beta21 = 0.75
alpha22 = 0.1; beta22 = 0.75
tau = 0.25
    
#モデルパラメータの生成
theta_u1 = thetat_u1.copy()
theta_v1 = thetat_v1.copy()
theta_u2 = thetat_u2.copy()
theta_v2 = thetat_v2.copy()
gamma = gammat.copy()

#モデルの期待値
uv1 = np.dot(theta_u1[d_id, ] * theta_v1[item_id1, ], k_vec)
gamma_vec = gamma[d_id]; theta_user2 = theta_u2[d_id, ]
uv2 = np.repeat(0.0, hhpt)
for j in range(get):
    index = index_history[j]
    uv2[index] += np.dot(theta_user2[index, ] * theta_v2[item_id2[index, j], ], k_vec) * np.exp(gamma_vec[index] * Z[index, j])
mu = uv1 + uv2

In [None]:
##パラメータの初期値
#ガンマ分布のパラメータ
alpha11 = 1.0; beta11 = 1.5
alpha12 = 1.0; beta12 = 1.5
alpha21 = 0.25; beta21 = 1.0
alpha22 = 0.25; beta22 = 1.0
tau = 0.2

#モデルパラメータの初期値
theta_u1 = np.random.gamma(alpha11, 1/beta11, k*hh).reshape(hh, k)
theta_v1 = np.random.gamma(alpha12, 1/beta12, k*item).reshape(item, k)
theta_u2 = np.random.gamma(alpha21, 1/beta21, k*hh).reshape(hh, k)
theta_v2 = np.random.gamma(alpha22, 1/beta22, k*item).reshape(item, k)
gamma = np.random.normal(0, tau, hh)

#モデルの期待値
uv1 = np.dot(theta_u1[d_id, ] * theta_v1[item_id1, ], k_vec)
gamma_vec = gamma[d_id]; theta_user2 = theta_u2[d_id, ]
uv2 = np.repeat(0.0, hhpt)
for j in range(get):
    index = index_history[j]
    uv2[index] += np.dot(theta_user2[index, ] * theta_v2[item_id2[index, j], ], k_vec) * np.exp(gamma_vec[index] * Z[index, j])
mu = uv1 + uv2

In [None]:
####ギブスサンプリングでパラメータをサンプリング####

##ユーザー特徴行列をサンプリング
#補助変数を更新
theta_user1 = theta_u1[d_id, ]
theta_item1 = theta_v1[item_id1, ]
uv_dt = theta_user1 * theta_item1
Lambda = uv_dt / mu[:, np.newaxis]

#ガンマ分布のパラメータ
lambda_y = Lambda * y_vec
W11 = np.zeros((hh, k)); W12 = np.zeros((hh, k))
for i in range(hh):
    index = d_list[i]
    W11[i, ] = np.dot(lambda_y[index, ].T, d_vec[i])
    W12[i, ] = np.dot(theta_item1[index, ].T, d_vec[i])
W11 = W11 + alpha; W12 = W12 + beta

#パラメータをサンプリング
theta_u1 = np.random.gamma(W11.reshape(-1), 1/W12.reshape(-1), hh*k).reshape(hh, k)
theta_user1 = theta_u1[d_id, ]
uv1 = np.dot(theta_user1 * theta_item1, k_vec)

In [42]:
##アイテム特徴行列をサンプリング
#補助変数を更新
mu = uv1 + uv2
uv_dt = theta_user1 * theta_item1
Lambda = uv_dt / mu[:, np.newaxis]

#ガンマ分布のパラメータ
lambda_y = Lambda * y_vec
H11 = np.zeros((item, k)); H12 = np.zeros((item, k))
for i in range(item):
    index = item_list1[i]
    H11[i, ] = np.dot(lambda_y[index, ].T, item_vec1[i])
    H12[i, ] = np.dot(theta_user1[index, ].T, item_vec1[i])
H11 = H11 + alpha; H12 = H12 + beta

#パラメータをサンプリング
theta_v1 = np.random.gamma(H11.reshape(-1), 1/H12.reshape(-1), item*k).reshape(item, k)
theta_item1 = theta_v1[item_id1, ]
uv1 = np.dot(theta_user1 * theta_item1, k_vec)

In [434]:
##トリガーのユーザー特徴行列をサンプリング
#補助変数を更新
mu = uv1 + uv2
theta_user2 = theta_u2[d_id, ]
theta_dt = np.zeros((hhpt, k))
uv_dt = np.zeros((hhpt, k))
gamma_dt = np.zeros((hhpt, get))
for j in range(get):
    index = index_history[j]
    theta_target = theta_user2[index, ]
    gamma_dt[index, j] = np.exp(gamma_vec[index] * z_get[j])
    uv_target = (theta_target * theta_v2[item_get[j], ]) * gamma_dt[index, j][:, np.newaxis]
    theta_dt[index, ] += uv_target / theta_target  
    uv_dt[index, ] += uv_target
Lambda = uv_dt / mu[:, np.newaxis]

#ガンマ分布のパラメータ
lambda_y = Lambda * y_vec
W21 = np.zeros((hh, k)); W22 = np.zeros((hh, k))
for i in range(hh):
    index = d_list[i]
    W21[i, ] = np.dot(lambda_y[index, ].T, d_vec[i])
    W22[i, ] = np.dot(theta_dt[index, ].T, d_vec[i])
W21 = W21 + alpha; W22 = W22 + beta

#パラメータをサンプリング
theta_u2 = np.random.gamma(W21.reshape(-1), 1/W22.reshape(-1), hh*k).reshape(hh, k)
theta_user2 = theta_u2[d_id, ]

#期待値を更新
uv2 = np.repeat(0.0, hhpt)
for j in range(get):
    index = index_history[j]
    uv2[index] += np.dot(theta_user2[index, ] * theta_v2[item_get[j], ], k_vec) * np.exp(gamma_vec[index] * z_get[j])

In [435]:
##トリガーのアイテム特徴行列をサンプリング
#パラメータの格納用配列
theta_dt = np.zeros((hhpt, k))
uv_dt = np.zeros((hhpt, k))
H21 = np.zeros((item, k))
H22 = np.zeros((item, k))

##アイテムごとの事後分布のパラメータ
for i in range(item):
    #補助変数を更新
    for j in range(get):
        index = item_list21[i][j]
        theta_target = theta_user2[index, ]
        uv_target = (theta_target * theta_v2[i, ]) * gamma_dt[index, j][:, np.newaxis]
        theta_dt[index, ] += uv_target / theta_v2[i, ] 
        uv_dt[index, ] += uv_target
    index = item_list22[i]
    Lambda = uv_dt[index, ] / mu[index, np.newaxis]

    #ガンマ分布のパラメータ　
    lambda_y = Lambda * y_vec[index]
    H21[i, ] = np.sum(lambda_y, axis=0)
    H22[i, ] = np.sum(theta_dt[index, ], axis=0)
    theta_dt[index, ] = 0; uv_dt[index, ] = 0
H21 = H21 + alpha; H22 = H22 + beta

#パラメータをサンプリング
par = np.random.gamma(H21.reshape(-1), 1/H22.reshape(-1), item*k).reshape(item, k)

In [439]:
i = 15
print(np.round(par[i, ], 3))
print(np.round(thetat_v2[i, ], 3))

[0.038 0.001 0.003 0.003 0.001 0.    0.002 0.002 0.399 0.002]
[0.036 0.001 0.    0.    0.    0.    0.    0.    0.406 0.   ]


In [432]:
np.sum(uv_dt)

0.0

In [406]:
theta_dt0

0

In [383]:
b = np.arange(hhpt)