In [22]:
#####線形回帰ベース潜在因子モデル#####
##ライブラリの読み込み
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import numpy.matlib
import scipy
import scipy.stats as ss
from numpy.random import random
from scipy import optimize
from scipy.stats import norm

In [26]:
####任意の相関行列(分散共分散行列)を作成する関数####
##任意の相関行列を作る関数
def CorM(col, lower, upper, eigen_lower, eigen_upper):
    #相関行列の初期値を定義する
    cov_vec = (upper - lower) * np.random.rand(col*col) + lower   #相関係数の乱数ベクトルを作成
    rho = np.reshape(np.array(cov_vec), (col, col)) * np.tri(col)   #乱数ベクトルを下三角行列化
    Sigma = np.diag(np.diag(rho + rho.T) + 1) - (rho + rho.T)   #対角成分を1にする
    
    #相関行列を正定値行列に変更
    #固有値分解を実行
    eigen = scipy.linalg.eigh(Sigma)
    eigen_val = eigen[0] 
    eigen_vec = eigen[1]
    
    #固有値が負の数値を正にする
    for i in range(eigen_val.shape[0]-1):
        if eigen_val[i] < 0:
            eigen_val[i] = (eigen_upper - eigen_lower) * np.random.rand(1) + eigen_lower
            
    #新しい相関行列の定義と対角成分を1にする
    Sigma = np.dot(np.dot(eigen_vec, np.diag(eigen_val)), eigen_vec.T)
    normalization_factor = np.dot(pow(np.diag(Sigma), 0.5)[:, np.newaxis], pow(np.diag(Sigma), 0.5)[np.newaxis, :])
    Cor = Sigma / normalization_factor
    return Cor

##相関行列から分散共分散行列に変換する関数
def covmatrix(Cor, sigma_lower, sigma_upper):
    sigma = (sigma_upper - sigma_lower) * np.random.rand(np.diag(Cor).shape[0]) + sigma_lower
    sigma_factor = np.dot(sigma[:, np.newaxis], sigma[np.newaxis, :])
    Cov = Cor * sigma_factor
    return Cov

In [3]:
####データの発生####
##データの設定
k = 10   #基底数
hh = 5000   #ユーザー数
item = 2000   #アイテム数

##IDの設定
user_id0 = np.repeat(range(hh), item)
item_id0 = np.tile((np.array(range(item))), hh)

In [4]:
##説明変数の生成
##素性ベクトルを生成
k1 = 2; k2 = 3; k3 = 4
x1 = np.array(np.random.random(hh*item*k1)).reshape((hh*item, k1))
x2 = np.zeros((hh*item, k2))
for j in range(k2):
    prob = np.random.uniform(0.25, 0.55, 1)
    x2[:, j] = np.random.binomial(1, prob, hh*item)
x3 = np.random.multinomial(1, np.random.dirichlet(np.repeat(3.0, k3), 1).reshape(k3), hh*item)
x3 = np.delete(x3, np.argmin(np.sum(x3, axis=0)), axis=1)   #冗長な変数の削除
x = np.concatenate((np.repeat(1, hh*item)[:, np.newaxis], x1, x2, x3), axis=1)

In [5]:
##階層モデルの説明変数を生成
#ユーザーの説明変数を生成
k1 = 1; k2 = 3; k3 = 5
u1 = np.array(np.random.random(hh*k1)).reshape((hh, k1))
u2 = np.zeros((hh, k2))
for j in range(k2):
    prob = np.random.uniform(0.25, 0.55, 1)
    u2[:, j] = np.random.binomial(1, prob, hh)
u3 = np.random.multinomial(1, np.random.dirichlet(np.repeat(3.0, k3), 1).reshape(k3), hh)
u3 = np.delete(u3, np.argmin(np.sum(u3, axis=0)), axis=1)   #冗長な変数の削除
u = np.concatenate((np.repeat(1, hh)[:, np.newaxis], u1, u2, u3), axis=1)

#アイテムの説明変数を生成
k1 = 2; k2 = 3; k3 = 4
v1 = np.array(np.random.random(item*k1)).reshape((item, k1))
v2 = np.zeros((item, k2))
for j in range(k2):
    prob = np.random.uniform(0.25, 0.55, 1)
    v2[:, j] = np.random.binomial(1, prob, item)
v3 = np.random.multinomial(1, np.random.dirichlet(np.repeat(3.0, k3), 1).reshape(k3), item)
v3 = np.delete(v3, np.argmin(np.sum(v3, axis=0)), axis=1)   #冗長な変数の削除
v = np.concatenate((np.repeat(1, item)[:, np.newaxis], v1, v2, v3), axis=1)

In [8]:
##素性ベクトルの回帰係数を生成
beta = np.repeat(0.0, x.shape[1])
for j in range(x.shape[1]):
    beta[j] = np.random.uniform(-0.6, 1.6, 1)
betat = beta

In [30]:
##階層モデルのパラメータ
##ユーザーベースの階層モデルのパラメータ
#分散共分散行列を設定
sigma_ut = sigma_u = 0.4
Cov_ut = Cov_u = covmatrix(CorM(k, -0.6, 0.8, 0.05, 0.2), 0.0025, 0.25)

#回帰係数を設定
alpha_u = np.zeros((u.shape[1], k+1))
for j in range(u.shape[1]):
    if j==1:
        alpha_u[j, :] = np.random.uniform(-0.55, 1.3, k+1)
    else:
        alpha_u[j, :] = np.random.uniform(-0.4, 0.5, k+1)
alpha_ut = alpha_u

#多変量回帰モデルからユーザー個別の回帰パラメータを生成
er = np.concatenate((np.random.normal(0, sigma_u, hh).reshape(hh, 1), numpy.random.multivariate_normal(np.repeat(0, k), Cov_u, hh)), axis=1)
theta_u = np.dot(u, alpha_u) + er
theta_ut1 = theta_u1 = theta_u[:, 0]   #ランダム効果のパラメータ
theta_ut2 = theta_u2 = pd.DataFrame(np.delete(theta_u, 0, 1))   #行列分解のパラメータ

(10, 10)

In [54]:
theta_u[:, 1]

array([ 0.11225798, -0.21596567,  0.42546748, ...,  0.23624111,
        0.11570023, -0.21095558])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.191729,0.302995,-0.305915,0.250215,0.530499,0.158490,0.065318,1.828296,0.120200,0.202602
1,-0.211361,0.394862,0.763736,0.313932,1.313244,-0.218540,0.779017,-0.073175,0.676021,-0.202359
2,0.350129,0.360383,0.521584,0.674762,1.485671,-0.510447,0.474792,1.269478,0.353786,0.774193
3,0.359563,0.293090,0.400167,0.040802,1.402205,-0.830641,0.766108,1.035428,0.218320,0.411017
4,-0.118392,0.046840,0.768526,0.351307,1.115453,0.221093,0.859159,1.246883,0.920051,-0.527063
5,0.463227,0.115499,0.610565,0.791936,1.089459,-0.546075,1.177336,0.981448,-0.061298,-0.106683
6,0.370873,0.416329,0.133768,0.022047,0.765009,-0.395559,0.800930,0.386044,0.210116,-0.174132
7,0.016395,0.636891,0.426637,0.663741,1.194605,-0.116035,1.292109,0.317370,0.626284,-0.581518
8,0.121717,0.711344,0.212615,0.426279,0.842087,0.532483,1.135362,1.197979,0.361412,-0.377669
9,0.214749,0.481419,-0.091798,0.382156,0.725992,-0.071345,0.311114,1.058022,-0.035832,0.140718


In [64]:
pd.DataFrame(theta_u)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.557682,0.191729,0.302995,-0.305915,0.250215,0.530499,0.158490,0.065318,1.828296,0.120200,0.202602
1,0.648102,-0.211361,0.394862,0.763736,0.313932,1.313244,-0.218540,0.779017,-0.073175,0.676021,-0.202359
2,1.724344,0.350129,0.360383,0.521584,0.674762,1.485671,-0.510447,0.474792,1.269478,0.353786,0.774193
3,1.711911,0.359563,0.293090,0.400167,0.040802,1.402205,-0.830641,0.766108,1.035428,0.218320,0.411017
4,1.381458,-0.118392,0.046840,0.768526,0.351307,1.115453,0.221093,0.859159,1.246883,0.920051,-0.527063
5,1.196492,0.463227,0.115499,0.610565,0.791936,1.089459,-0.546075,1.177336,0.981448,-0.061298,-0.106683
6,1.489344,0.370873,0.416329,0.133768,0.022047,0.765009,-0.395559,0.800930,0.386044,0.210116,-0.174132
7,1.333659,0.016395,0.636891,0.426637,0.663741,1.194605,-0.116035,1.292109,0.317370,0.626284,-0.581518
8,1.142022,0.121717,0.711344,0.212615,0.426279,0.842087,0.532483,1.135362,1.197979,0.361412,-0.377669
9,0.651044,0.214749,0.481419,-0.091798,0.382156,0.725992,-0.071345,0.311114,1.058022,-0.035832,0.140718
