In [2]:
import time
import numpy as np
from collections import defaultdict

## config


In [3]:
item_count = 100000 # 십만개
user_count = 1000000 # 십만개
cluster = 8
traffic = 200000
timing = 15 #s
item_idx = [i for i in range(item_count)]
user_idx = [u for u in range(user_count)]

## data 생성(dirichlet 분포)

In [4]:
p_item_cluster = np.random.dirichlet([1 for _ in range(cluster)], item_count).transpose()
p_cluster_user = np.random.dirichlet([1 for _ in range(cluster)], user_count)
p_user = np.random.dirichlet([1 for _ in range(user_count)], 1)

p_item_cluster.shape, p_cluster_user.shape, p_user.shape

((8, 100000), (1000000, 8), (1, 1000000))


# step 1
## 유저 수 생성(포아송 분포)

In [5]:
user_num = traffic/24/60/60*timing
user_num = np.random.poisson(user_num)
user_num

39

## 유저 리스트 생성


In [6]:
u_idxs = np.random.choice(range(user_count),user_num,p=p_user[0])
u_list={'user_id':u_idxs}

In [7]:
u_list

{'user_id': array([696358, 657329, 815073, 258590, 743971, 565277, 481875, 308828,
        903909, 256715, 879879, 506973, 476560,  26038, 425696, 496862,
        691675, 351150, 329302, 868205, 715902,  66631, 459377, 530559,
        661385, 478193, 648824, 102315,  42156, 786281, 458193, 830134,
        395714, 646829,  61081, 919080, 236100, 229231,  47059])}

# step 2 - case 1 그냥 아이템 선택

In [8]:
#p_cluster_user[u_idxs[11]]

In [9]:
u_ks = [np.random.choice(range(cluster),p=p_cluster_user[u_idx]) for u_idx in u_idxs]
#u_ks = map(lambda x : np.random.choice(range(cluster),p=p_cluster_user[x]), u_idxs)

In [10]:
i_idxs = [np.random.choice(range(item_count),p=p_item_cluster[u_k]/sum(p_item_cluster[u_k])) for u_k in u_ks]
#u_chioce = list(map(lambda x : np.random.choice(range(item_count),p=p_item_cluster[x]/sum(p_item_cluster[x])), u_ks))

In [11]:
u_chioce = dict(zip(u_idxs, i_idxs))

In [12]:
u_chioce

{696358: 45622,
 657329: 3743,
 815073: 50703,
 258590: 10754,
 743971: 38000,
 565277: 55713,
 481875: 39620,
 308828: 96221,
 903909: 73165,
 256715: 40622,
 879879: 6272,
 506973: 35716,
 476560: 15568,
 26038: 75253,
 425696: 23702,
 496862: 93455,
 691675: 54466,
 351150: 36644,
 329302: 19692,
 868205: 20971,
 715902: 10242,
 66631: 58954,
 459377: 95833,
 530559: 69471,
 661385: 63736,
 478193: 53637,
 648824: 15125,
 102315: 11290,
 42156: 51316,
 786281: 80458,
 458193: 75847,
 830134: 11920,
 395714: 22664,
 646829: 31480,
 61081: 52200,
 919080: 47826,
 236100: 45086,
 229231: 78682,
 47059: 31100}

# step 2 - case 2 추천된 아이템 선택

In [13]:
u_interest = {u_idx: list(np.argsort(-np.dot(p_cluster_user[u_idx],p_item_cluster))[:100]) for u_idx in u_idxs}


In [14]:
u_interest

{696358: [12493,
  22865,
  69891,
  53736,
  29008,
  66634,
  33916,
  7073,
  32810,
  13554,
  90215,
  21980,
  49324,
  44765,
  16857,
  58088,
  16519,
  38702,
  71059,
  47949,
  96879,
  52886,
  46736,
  39252,
  11057,
  68511,
  99095,
  34518,
  84057,
  19032,
  54862,
  10067,
  88640,
  26657,
  64098,
  17778,
  66677,
  2253,
  83495,
  48638,
  64410,
  16490,
  32448,
  23959,
  27068,
  28452,
  67412,
  95794,
  59287,
  16040,
  41729,
  16989,
  94641,
  6552,
  15342,
  31047,
  98067,
  96023,
  76232,
  43357,
  68667,
  1314,
  33719,
  11751,
  33196,
  66656,
  24134,
  8378,
  68388,
  9131,
  40842,
  54232,
  60964,
  42188,
  75101,
  84705,
  80480,
  88477,
  74748,
  4224,
  65092,
  80271,
  49893,
  3044,
  40330,
  74965,
  32436,
  49156,
  31279,
  6333,
  61925,
  86743,
  3624,
  9471,
  60514,
  30269,
  93281,
  93956,
  7610,
  97481],
 657329: [55245,
  39380,
  63455,
  33574,
  19919,
  98599,
  69993,
  35965,
  57422,
  44436,
  806

In [1]:
import _pickle

In [21]:
with open("test.pickle","wb") as f:
    _pickle.dump(p_cluster_user, f)




In [22]:
with open("test.pickle","rb") as f:
    a = _pickle.load(f)

In [23]:
a.shape

(1000000, 8)

In [24]:
import gzip

In [25]:
with gzip.open("test.pickle.gzip","wb") as f:
    _pickle.dump(p_cluster_user, f)



In [26]:
with gzip.open("test.pickle.gzip","rb") as f:
    b=_pickle.load(f)

In [1]:
a = {}

In [3]:
type(a)

dict

In [1]:
a = {1,2,3,4,5,6,7,8}
b = {7,8,9,0}

In [17]:
c = a & b

In [13]:
np.random.choice(list(a & b))

7

In [18]:
np.random.choice(c)

ValueError: a must be 1-dimensional or an integer

In [20]:
a = [{1:2,3:4},{3:4}]

In [21]:
a[0]

{1: 2, 3: 4}

In [22]:
a[1]

{3: 4}