In [23]:
import time
import numpy as np
from collections import defaultdict

## config


In [1]:
item_count = 100000 # 십만개
user_count = 1000000 # 십만개
cluster = 8
traffic = 200000
timing = 15 #s
item_idx = [i for i in range(item_count)]
user_idx = [u for u in range(user_count)]

## data 생성(dirichlet 분포)

In [2]:
p_item_cluster = np.random.dirichlet([1 for _ in range(cluster)], item_count).transpose()
p_cluster_user = np.random.dirichlet([1 for _ in range(cluster)], user_count)
p_user = np.random.dirichlet([1 for _ in range(user_count)], 1)

p_item_cluster.shape, p_cluster_user.shape, p_user.shape

((8, 100000), (1000000, 8), (1, 1000000))


# step 1
## 유저 수 생성(포아송 분포)

In [3]:
user_num = traffic/24/60/60*timing
user_num = np.random.poisson(user_num)
user_num

37

## 유저 리스트 생성


In [40]:
u_idxs = np.random.choice(range(user_count),user_num,p=p_user[0])
u_list={'user_id':u_idxs}

In [41]:
u_list

{'user_id': array([797704, 272643, 854691,  12392, 986051, 240735, 403620, 195892,
         19619, 504805, 739274, 329237, 337138, 298244, 400527, 851552,
        525059, 719231, 764620, 193445, 221639, 597830,    862, 510765,
        737436, 620290, 180560, 551864, 444315, 304187, 634210, 482123,
        517029, 798174, 748788, 816135, 291962])}

# step 2 - case 1 그냥 아이템 선택

In [34]:
#p_cluster_user[u_idxs[11]]

In [42]:
u_ks = [np.random.choice(range(cluster),p=p_cluster_user[u_idx]) for u_idx in u_idxs]
#u_ks = map(lambda x : np.random.choice(range(cluster),p=p_cluster_user[x]), u_idxs)

In [43]:
i_idxs = [np.random.choice(range(item_count),p=p_item_cluster[u_k]/sum(p_item_cluster[u_k])) for u_k in u_ks]
#u_chioce = list(map(lambda x : np.random.choice(range(item_count),p=p_item_cluster[x]/sum(p_item_cluster[x])), u_ks))

In [44]:
u_chioce = dict(zip(u_idxs, i_idxs))

In [45]:
u_chioce

{797704: 38832,
 272643: 13611,
 854691: 64934,
 12392: 27693,
 986051: 23741,
 240735: 55464,
 403620: 86988,
 195892: 87369,
 19619: 11989,
 504805: 55471,
 739274: 16396,
 329237: 11359,
 337138: 21354,
 298244: 32124,
 400527: 19183,
 851552: 5415,
 525059: 85842,
 719231: 26665,
 764620: 9079,
 193445: 98567,
 221639: 4570,
 597830: 34244,
 862: 21071,
 510765: 81109,
 737436: 45448,
 620290: 95881,
 180560: 64663,
 551864: 87618,
 444315: 65746,
 304187: 2809,
 634210: 7392,
 482123: 98214,
 517029: 52622,
 798174: 81634,
 748788: 74742,
 816135: 64158,
 291962: 90883}

# step 2 - case 2 추천된 아이템 선택

In [46]:
u_interest = {u_idx: list(np.argsort(-np.dot(p_cluster_user[u_idx],p_item_cluster))[:100]) for u_idx in u_idxs}


In [47]:
u_interest

{797704: [32239,
  77106,
  71568,
  82770,
  64158,
  42090,
  93840,
  66128,
  99314,
  46486,
  37140,
  18831,
  59377,
  14258,
  84483,
  88627,
  38482,
  87542,
  47816,
  63245,
  2847,
  94146,
  33929,
  32448,
  82584,
  93638,
  16427,
  3897,
  92496,
  66461,
  61075,
  55059,
  91106,
  65744,
  27801,
  73029,
  2730,
  12716,
  63696,
  50671,
  80762,
  97281,
  21684,
  53373,
  60828,
  83459,
  96977,
  4163,
  18078,
  84036,
  94461,
  52516,
  97967,
  17377,
  99027,
  96608,
  94383,
  63280,
  77310,
  57363,
  58178,
  70915,
  67490,
  87304,
  99656,
  40409,
  62496,
  71220,
  1869,
  82621,
  8350,
  17040,
  90238,
  55514,
  27039,
  5547,
  34858,
  17005,
  46468,
  41177,
  5721,
  89728,
  87795,
  5089,
  70753,
  91455,
  19615,
  76077,
  81838,
  3981,
  57355,
  87815,
  90522,
  86635,
  94673,
  95885,
  8603,
  60351,
  45323,
  19009],
 272643: [87249,
  57468,
  76694,
  64232,
  23090,
  38562,
  53574,
  14929,
  86301,
  13108,
  36