# 시나리오 기반 보상 시스템 테스트 계획서

## 개요

보상 로그를 다양한 방식으로 생성하고 보상룰을 설정하고 테스트한다.

In [24]:
from datetime import datetime, timedelta
import random

**시작 시간** ISO시간으로 설정 '2021-04-01T00:00:00+09:00'  
**로그 기간** 30일로 설정 `days=30`

In [25]:
#start_dt = datetime.fromisoformat('2021-04-01T00:00:00+09:00')
start_dt = datetime.fromisoformat('2021-04-01T00:00:00')
t_total = timedelta(days=30)
end_dt = start_dt + t_total
print(start_dt.isoformat(), end_dt.isoformat())

2021-04-01T00:00:00 2021-05-01T00:00:00


## 시나리오 1 

**소규모 데이터셋 생성**

In [26]:
n_user = 10            # 총 사용자 수
n_news = 100           # 뉴스 글 갯수
n_post = 30            # 일반 글 갯수

n_view = 100           # 총 조회수
n_reply = 100          # 총 댓글수
n_rereply = 100        # 총 대댓글 수
n_reaction = 100       # 총 reaction 수(본문)
n_reaction_reply = 100 # 댓글 reaction 갯수

n_best = 10            # 우수 콘텐츠 수
n_bookmark = 10        # bookmark
n_share = 10           # bookmark

# group 생성
groups = ['press', 'community']
group_share = [ 0.2, 0.8 ]

# 그룹 별 뉴스 생성 비율, post생성 비율
news_share = [ 0.9, 0.1 ]
post_share = [ 0.1, 0.9 ]


n_login = 100          # 로그인 횟수
n_invitation = 10      # 초대 횟수
n_recommendation = 10  # 추천 횟수

## 시나리오 2 

**대규모 데이터셋 생성**


In [5]:
n_user = 100            # 총 사용자 수
n_news = 1000           # 뉴스 글 갯수
n_post = 300            # 일반 글 갯수

n_view = 30000          # 총 조회수
n_reply = 1000          # 총 댓글수
n_rereply = 1000        # 총 대댓글 수
n_reaction = 1000       # 총 reaction 수(본문)
n_reaction_reply = 1000 # 댓글 reaction 갯수

n_best = 10             # 우수 콘텐츠 수
n_bookmark = 100        # bookmark
n_share = 100           # bookmark

# group 생성
groups = ['press', 'community']
group_share = [ 0.2, 0.8 ]

# 그룹 별 뉴스 생성 비율, post생성 비율
news_share = [ 0.9, 0.1 ]
post_share = [ 0.1, 0.9 ]


n_login = 100          # 로그인 횟수
n_invitation = 10      # 초대 횟수
n_recommendation = 10  # 추천 횟수

**사용자 생성**

In [27]:
# user 생성
def make_tower(weights):
    #tower_total = sum(weights)
    tower_total = weights[0]
    for i in range(1, len(weights)):
        tower_total += weights[i]
    total = weights[0]
    tower = [total/tower_total]
    for i in range(1, len(weights)):
        total += weights[i]
        tower.append(total/tower_total)
    return tower

def select_tower(tower):
    a = random.uniform(0.0, 1.0)
    for i in range(len(tower)):
        if a < tower[i]:
            return i
    
def create_users(n, group_share):
    users = []
    tower = make_tower(group_share)
    for i in range(n):
        group = groups[select_tower(tower)]
        user = { 'user_id': 'u%06i'%(i+1), 
                 'user_type': group}
        users.append(user)
    return users

users = create_users(n_user, group_share)

In [28]:
users_by_group = []
for i in range(len(groups)):
    users_by_group.append([])

for user in users:
    for i in range(len(groups)):
        if user['user_type'] == groups[i]:
            break
    users_by_group[i].append(user)

In [29]:
users_by_group[0]

[{'user_id': 'u000008', 'user_type': 'press'},
 {'user_id': 'u000009', 'user_type': 'press'},
 {'user_id': 'u000010', 'user_type': 'press'}]

In [30]:
users_by_group[1]

[{'user_id': 'u000001', 'user_type': 'community'},
 {'user_id': 'u000002', 'user_type': 'community'},
 {'user_id': 'u000003', 'user_type': 'community'},
 {'user_id': 'u000004', 'user_type': 'community'},
 {'user_id': 'u000005', 'user_type': 'community'},
 {'user_id': 'u000006', 'user_type': 'community'},
 {'user_id': 'u000007', 'user_type': 'community'}]

**본문 글쓰기 로그 생성**

In [31]:

def create_article(start_dt, t_total, n_news, n_post, news_share, post_share, users_by_group):
    list_t = []
    n = n_news + n_post
    articles = []
    news_group_tower = make_tower(news_share)
    post_group_tower = make_tower(post_share)
    for i in range(n):
        if (i < n_news):
            article_type = 0 # news
            group = select_tower(news_group_tower)
            prefix = 'COAT'
            content_type = 'article'
        else:
            article_type = 1 # post
            group = select_tower(post_group_tower)
            prefix = 'COPT'
            content_type = 'post'
                   
        user = random.choice(users_by_group[article_type])
        a = random.uniform(0.0, 1.0)
        t = a*t_total
        dt = start_dt + t
        articles.append({
            'timestamp': dt.isoformat(),
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'action_group': user['user_type'], # action group과 user_type의 차이?
            'action': 'create',
            'action_key': user['user_type']+'/create1',
            'log_type': 'content',
            'type_id': prefix,
            'content_type': content_type,
        })
        
    articles = sorted(articles, key=lambda article: (article['timestamp']))
    for i in range(n):
       articles[i]['content_id'] = prefix + '.%04i'%(i+1)
    
        
    return articles

In [32]:
post_log = create_article(start_dt, t_total, n_news, n_post, news_share, post_share, users_by_group)

In [33]:
post_log

[{'timestamp': '2021-04-01T12:02:56.993322',
  'user_id': 'u000010',
  'user_type': 'press',
  'action_group': 'press',
  'action': 'create',
  'action_key': 'press/create1',
  'log_type': 'content',
  'type_id': 'COAT',
  'content_type': 'article',
  'content_id': 'COPT.0001'},
 {'timestamp': '2021-04-02T00:40:07.197579',
  'user_id': 'u000008',
  'user_type': 'press',
  'action_group': 'press',
  'action': 'create',
  'action_key': 'press/create1',
  'log_type': 'content',
  'type_id': 'COAT',
  'content_type': 'article',
  'content_id': 'COPT.0002'},
 {'timestamp': '2021-04-02T13:25:07.996260',
  'user_id': 'u000010',
  'user_type': 'press',
  'action_group': 'press',
  'action': 'create',
  'action_key': 'press/create1',
  'log_type': 'content',
  'type_id': 'COAT',
  'content_type': 'article',
  'content_id': 'COPT.0003'},
 {'timestamp': '2021-04-02T14:28:15.091120',
  'user_id': 'u000009',
  'user_type': 'press',
  'action_group': 'press',
  'action': 'create',
  'action_key': 'p

**댓글 생성 로그**

In [34]:
def create_reply(start_dt, t_total, n_reply, post_log, users, depth='1'):
    end_dt = start_dt + t_total
    n_post = len(post_log)
    t_post_list = []
    t_post_total = timedelta(seconds=0)
    for i in range(n_post):
        t_post = datetime.fromisoformat(post_log[i]['timestamp'])
        t_remain = end_dt - t_post
        t_post_list.append(t_remain)
    post_tower = make_tower(t_post_list)
    reply_log_list = []
    for i in range(n_reply):
        post_i = select_tower(post_tower)
        t_post = datetime.fromisoformat(post_log[post_i]['timestamp'])
        t_remain = end_dt - t_post
        t_reply = t_post + random.uniform(0.0, 1.0)*t_remain
        user = random.choice(users)
        reply_log = {
            'timestamp': t_reply.isoformat(),
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'action_group': user['user_type'],
            'action': 'create',
            'action_key':  user['user_type']+'/create3',
            'parent_id': post_log[post_i]['content_id'],
            'parent_user_id': post_log[post_i]['user_id'],
            'type_id': 'CMMT', # comment
            'val': '1',        # reply
            'log_type': 'content',
            'content_type': 'comment',
            'voting_power': 'y',
        }
        reply_log_list.append(reply_log)
        
    reply_log_list = sorted(reply_log_list, key=lambda article: (article['timestamp']))
    for i in range(n_reply):
       reply_log_list[i]['content_id'] = reply_log_list[i]['type_id']+'%i.%04i'%(1,i+1)
    return reply_log_list
        
        

In [35]:
reply_log = create_reply(start_dt, t_total, n_reply, post_log, users)

In [36]:
reply_log

[{'timestamp': '2021-04-06T15:08:36.911592',
  'user_id': 'u000007',
  'user_type': 'community',
  'action_group': 'community',
  'action': 'create',
  'action_key': 'community/create3',
  'parent_id': 'COPT.0012',
  'parent_user_id': 'u000010',
  'type_id': 'CMMT',
  'val': '1',
  'log_type': 'content',
  'content_type': 'comment',
  'voting_power': 'y',
  'content_id': 'CMMT.0001'},
 {'timestamp': '2021-04-06T17:41:09.583586',
  'user_id': 'u000004',
  'user_type': 'community',
  'action_group': 'community',
  'action': 'create',
  'action_key': 'community/create3',
  'parent_id': 'COPT.0001',
  'parent_user_id': 'u000010',
  'type_id': 'CMMT',
  'val': '1',
  'log_type': 'content',
  'content_type': 'comment',
  'voting_power': 'y',
  'content_id': 'CMMT.0002'},
 {'timestamp': '2021-04-06T23:12:22.104760',
  'user_id': 'u000002',
  'user_type': 'community',
  'action_group': 'community',
  'action': 'create',
  'action_key': 'community/create3',
  'parent_id': 'COPT.0020',
  'parent

**대댓글 생성로그**

In [51]:
def create_rereply(start_dt, t_total, n_rereply, reply_log, users):
    end_dt = start_dt + t_total
    n_reply = len(reply_log)
    t_reply_list = []
    t_reply_total = timedelta(seconds=0)
    for i in range(n_reply):
        t_reply = datetime.fromisoformat(reply_log[i]['timestamp'])
        t_remain = end_dt - t_reply
        t_reply_list.append(t_remain)
    reply_tower = make_tower(t_reply_list)
    rereply_log_list = []
    for i in range(n_rereply):
        reply_i = select_tower(reply_tower)
        t_reply = datetime.fromisoformat(reply_log[reply_i]['timestamp'])
        t_remain = end_dt - t_reply
        t_rereply = t_reply + random.uniform(0.0, 1.0)*t_remain
        user = random.choice(users)
        rereply_log = {
            'timestamp': t_rereply.isoformat(),
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'action_group': user['user_type'],
            'action': 'create',
            'action_key': user['user_type']+'/create4',
            'parent_id': reply_log[reply_i]['content_id'],
            'parent_user_id': reply_log[reply_i]['user_id'],
            'type_id': 'CMMT', # comment
            'val': '2',        # re-reply
            'log_type': 'content',
            'content_type': 'comment',
            'voting_power': 'y',
        }
        rereply_log_list.append(rereply_log)
        
    rereply_log_list = sorted(rereply_log_list, key=lambda article: (article['timestamp']))
    for i in range(n_rereply):
       rereply_log_list[i]['content_id'] = rereply_log_list[i]['type_id']+'.%i%04i'%(2,i+1)
    return rereply_log_list

In [52]:
rereply_log = create_rereply(start_dt, t_total, n_rereply, reply_log, users)

In [None]:
rereply_log

**조회 로그**

In [40]:
def create_view(start_dt, t_total, n_view, post_log, users):
    end_dt = start_dt + t_total
    n_post = len(post_log)
    t_post_list = []
    t_post_total = timedelta(seconds=0)
    for i in range(n_post):
        t_post = datetime.fromisoformat(post_log[i]['timestamp'])
        t_remain = end_dt - t_post
        t_post_list.append(t_remain)
    post_tower = make_tower(t_post_list)
    view_log_list = []
    for i in range(n_view):
        post_i = select_tower(post_tower)
        t_post = datetime.fromisoformat(post_log[post_i]['timestamp'])
        t_remain = end_dt - t_post
        t_reply = t_post + random.uniform(0.0, 1.0)*t_remain
        #user = random.choice(users)
        view_log = {
            'timestamp': t_reply.isoformat(),
            'log_type': 'info',
            'action': 'view',
            'action_key': post_log[post_i]['action_group'] + '/view',
            'action_group': post_log[post_i]['action_group'],
            'user_id': post_log[post_i]['user_id'],
            'user_type': post_log[post_i]['user_type'],
            'content_type': post_log[post_i]['content_type'],
            'content_id': post_log[post_i]['content_id'],
            'value': 1,             # TO-DO 1시간 간격
        }
        view_log_list.append(view_log)
        
    view_log_list = sorted(view_log_list, key=lambda article: (article['timestamp']))
    return view_log_list

view_log = create_view(start_dt, t_total, n_view, post_log, users)
view_log

[{'timestamp': '2021-04-05T18:56:35.320921',
  'log_type': 'info',
  'action': 'view',
  'action_key': 'community/view',
  'action_group': 'community',
  'user_id': 'u000004',
  'user_type': 'community',
  'content_type': 'post',
  'content_id': 'COPT.0006',
  'value': 1},
 {'timestamp': '2021-04-07T07:22:45.425774',
  'log_type': 'info',
  'action': 'view',
  'action_key': 'press/view',
  'action_group': 'press',
  'user_id': 'u000009',
  'user_type': 'press',
  'content_type': 'article',
  'content_id': 'COPT.0018',
  'value': 1},
 {'timestamp': '2021-04-08T08:15:31.787699',
  'log_type': 'info',
  'action': 'view',
  'action_key': 'press/view',
  'action_group': 'press',
  'user_id': 'u000010',
  'user_type': 'press',
  'content_type': 'article',
  'content_id': 'COPT.0021',
  'value': 1},
 {'timestamp': '2021-04-08T08:48:52.370491',
  'log_type': 'info',
  'action': 'view',
  'action_key': 'press/view',
  'action_group': 'press',
  'user_id': 'u000009',
  'user_type': 'press',
  'c

**우수 콘텐츠 로그 생성**

In [41]:
def create_best(start_dt, t_total, n_best, post_log, users):
    end_dt = start_dt + t_total
    n_post = len(post_log)
    
    best_log_list = []
    for i in range(n_best):
        post = random.choice(post_log)
        t_post = datetime.fromisoformat(post['timestamp'])
        t_remain = end_dt - t_post
        t_reply = t_post + random.uniform(0.0, 1.0)*t_remain
        user = random.choice(users)
        best_log = {
            'timestamp': t_reply.isoformat(),
            'log_type': 'info',
            'action': 'best',
            'action_key': post['action_group'] + '/best',
            'action_group': post['action_group'],
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'content_type': post['content_type'],
            'content_id': post['content_id'],
            'value': 1,             # TO-DO 1시간 간격
        }
        best_log_list.append(best_log)
        
    best_log_list = sorted(best_log_list, key=lambda article: (article['timestamp']))
    return best_log_list

In [43]:
best_log = create_best(start_dt, t_total, n_best, post_log, users)

In [44]:
best_log

[{'timestamp': '2021-04-10T18:59:28.763662',
  'log_type': 'info',
  'action': 'best',
  'action_key': 'press/best',
  'action_group': 'press',
  'user_id': 'u000009',
  'user_type': 'press',
  'content_type': 'article',
  'content_id': 'COPT.0012',
  'value': 1},
 {'timestamp': '2021-04-20T09:51:35.021456',
  'log_type': 'info',
  'action': 'best',
  'action_key': 'press/best',
  'action_group': 'press',
  'user_id': 'u000009',
  'user_type': 'press',
  'content_type': 'article',
  'content_id': 'COPT.0036',
  'value': 1},
 {'timestamp': '2021-04-23T17:03:20.630781',
  'log_type': 'info',
  'action': 'best',
  'action_key': 'press/best',
  'action_group': 'press',
  'user_id': 'u000008',
  'user_type': 'press',
  'content_type': 'article',
  'content_id': 'COPT.0003',
  'value': 1},
 {'timestamp': '2021-04-25T16:51:17.212205',
  'log_type': 'info',
  'action': 'best',
  'action_key': 'press/best',
  'action_group': 'press',
  'user_id': 'u000001',
  'user_type': 'community',
  'conten

**리액션 로그(본문)**

In [45]:
def create_reaction(start_dt, t_total, n_reaction, post_log, users):
    end_dt = start_dt + t_total
    n_post = len(post_log)
    t_post_list = []
    t_post_total = timedelta(seconds=0)
    for i in range(n_post):
        t_post = datetime.fromisoformat(post_log[i]['timestamp'])
        t_remain = end_dt - t_post
        t_post_list.append(t_remain)
    post_tower = make_tower(t_post_list)
    reaction_log_list = []
    for i in range(n_reaction):
        post_i = select_tower(post_tower)
        t_post = datetime.fromisoformat(post_log[post_i]['timestamp'])
        t_remain = end_dt - t_post
        t_reply = t_post + random.uniform(0.0, 1.0)*t_remain
        user = random.choice(users)
        reaction_log = {
            'timestamp': t_reply.isoformat(),
            'log_type': 'relation', # reaction?
            'action': 'like',
            'action_key': post_log[post_i]['action_group'] + '/like',
            'action_group': post_log[post_i]['action_group'],
            'value': random.choice([1, 2]), # like, dislike
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'content_type': post_log[post_i]['content_type'],
            'content_id': post_log[post_i]['content_id'],
            'parent_user_id': post_log[post_i]['user_id'],
            'voting_power': 'y',
        }
        reaction_log_list.append(reaction_log)
        
    reaction_log_list = sorted(reaction_log_list, key=lambda article: (article['timestamp']))
    return reaction_log_list

reaction_log = create_reaction(start_dt, t_total, n_view, post_log, users)
reaction_log

[{'timestamp': '2021-04-03T08:48:17.503786',
  'log_type': 'relation',
  'action': 'like',
  'action_key': 'community/like',
  'action_group': 'community',
  'value': 2,
  'user_id': 'u000003',
  'user_type': 'community',
  'content_type': 'post',
  'content_id': 'COPT.0006',
  'parent_user_id': 'u000004',
  'voting_power': 'y'},
 {'timestamp': '2021-04-05T03:38:00.518283',
  'log_type': 'relation',
  'action': 'like',
  'action_key': 'press/like',
  'action_group': 'press',
  'value': 2,
  'user_id': 'u000006',
  'user_type': 'community',
  'content_type': 'article',
  'content_id': 'COPT.0010',
  'parent_user_id': 'u000009',
  'voting_power': 'y'},
 {'timestamp': '2021-04-06T07:12:37.868717',
  'log_type': 'relation',
  'action': 'like',
  'action_key': 'press/like',
  'action_group': 'press',
  'value': 2,
  'user_id': 'u000002',
  'user_type': 'community',
  'content_type': 'article',
  'content_id': 'COPT.0018',
  'parent_user_id': 'u000009',
  'voting_power': 'y'},
 {'timestamp':

**리액션(댓글) 로그 생성**

In [46]:
def create_reaction_reply(start_dt, t_total, n_reaction_reply, reply_log, users):
    end_dt = start_dt + t_total
    n_reply = len(reply_log)
    t_reply_list = []
    t_reply_total = timedelta(seconds=0)
    for i in range(n_reply):
        t_reply = datetime.fromisoformat(reply_log[i]['timestamp'])
        t_remain = end_dt - t_reply
        t_reply_list.append(t_remain)
    reply_tower = make_tower(t_reply_list)
    reaction_log_list = []
    for i in range(n_view):
        reply_i = select_tower(reply_tower)
        t_reply = datetime.fromisoformat(reply_log[reply_i]['timestamp'])
        t_remain = end_dt - t_reply
        t_reply = t_reply + random.uniform(0.0, 1.0)*t_remain
        user = random.choice(users)
        reaction_log = {
            'timestamp': t_reply.isoformat(),
            'log_type': 'relation', # reaction?
            'action': 'like',
            'action_key': reply_log[reply_i]['action_group'] + '/like',
            'action_group': reply_log[reply_i]['action_group'],
            'value': random.choice([1, 2]), # like, dislike
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'content_type': reply_log[reply_i]['content_type'],
            'content_id': reply_log[reply_i]['content_id'],
            'parent_user_id': reply_log[reply_i]['user_id'],
            'voting_power': 'y',
        }
        reaction_log_list.append(reaction_log)
        
    reaction_log_list = sorted(reaction_log_list, key=lambda article: (article['timestamp']))
    return reaction_log_list

reaction_reply_log = create_reaction_reply(start_dt, t_total, n_view, post_log, users)
reaction_reply_log

[{'timestamp': '2021-04-02T15:34:33.774208',
  'log_type': 'relation',
  'action': 'like',
  'action_key': 'press/like',
  'action_group': 'press',
  'value': 1,
  'user_id': 'u000001',
  'user_type': 'community',
  'content_type': 'article',
  'content_id': 'COPT.0002',
  'parent_user_id': 'u000008',
  'voting_power': 'y'},
 {'timestamp': '2021-04-04T21:21:22.088503',
  'log_type': 'relation',
  'action': 'like',
  'action_key': 'press/like',
  'action_group': 'press',
  'value': 1,
  'user_id': 'u000007',
  'user_type': 'community',
  'content_type': 'article',
  'content_id': 'COPT.0008',
  'parent_user_id': 'u000008',
  'voting_power': 'y'},
 {'timestamp': '2021-04-05T03:24:22.099691',
  'log_type': 'relation',
  'action': 'like',
  'action_key': 'press/like',
  'action_group': 'press',
  'value': 1,
  'user_id': 'u000006',
  'user_type': 'community',
  'content_type': 'article',
  'content_id': 'COPT.0003',
  'parent_user_id': 'u000010',
  'voting_power': 'y'},
 {'timestamp': '202

**Bookmark 로그 생성**

In [47]:
def create_bookmark(start_dt, t_total, n_bookmark, post_log, users):
    end_dt = start_dt + t_total
    n_post = len(post_log)
    
    bookmark_log_list = []
    for i in range(n_bookmark):
        post = random.choice(post_log)
        t_post = datetime.fromisoformat(post['timestamp'])
        t_remain = end_dt - t_post
        t_reply = t_post + random.uniform(0.0, 1.0)*t_remain
        user = random.choice(users)
        bookmark_log = {
            'timestamp': t_reply.isoformat(),
            'log_type': 'relation',
            'action': 'bookmark',
            'action_key': post['action_group'] + '/bookmark',
            'action_group': post['action_group'],
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'content_type': post['content_type'],
            'content_id': post['content_id'],
        }
        bookmark_log_list.append(bookmark_log)
        
    bookmark_log_list = sorted(bookmark_log_list, key=lambda article: (article['timestamp']))
    return bookmark_log_list

bookmark_log = create_bookmark(start_dt, t_total, n_bookmark, post_log, users)
bookmark_log

[{'timestamp': '2021-04-14T20:12:36.673864',
  'log_type': 'relation',
  'action': 'bookmark',
  'action_key': 'press/bookmark',
  'action_group': 'press',
  'user_id': 'u000002',
  'user_type': 'community',
  'content_type': 'article',
  'content_id': 'COPT.0021'},
 {'timestamp': '2021-04-17T15:19:05.945631',
  'log_type': 'relation',
  'action': 'bookmark',
  'action_key': 'community/bookmark',
  'action_group': 'community',
  'user_id': 'u000007',
  'user_type': 'community',
  'content_type': 'post',
  'content_id': 'COPT.0033'},
 {'timestamp': '2021-04-19T18:13:15.371065',
  'log_type': 'relation',
  'action': 'bookmark',
  'action_key': 'community/bookmark',
  'action_group': 'community',
  'user_id': 'u000009',
  'user_type': 'press',
  'content_type': 'post',
  'content_id': 'COPT.0065'},
 {'timestamp': '2021-04-21T00:06:16.327446',
  'log_type': 'relation',
  'action': 'bookmark',
  'action_key': 'press/bookmark',
  'action_group': 'press',
  'user_id': 'u000008',
  'user_type'

**공유 로그**

In [48]:
def create_share(start_dt, t_total, n_share, post_log, users):
    end_dt = start_dt + t_total
    n_post = len(post_log)
    
    share_log_list = []
    for i in range(n_share):
        post = random.choice(post_log)
        t_post = datetime.fromisoformat(post['timestamp'])
        t_remain = end_dt - t_post
        t_reply = t_post + random.uniform(0.0, 1.0)*t_remain
        user = random.choice(users)
        share_log = {
            'timestamp': t_reply.isoformat(),
            'log_type': 'relation',
            'action': 'share',
            'action_key': post['action_group'] + '/share',
            'action_group': post['action_group'],
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'content_type': post['content_type'],
            'content_id': post['content_id'],
        }
        share_log_list.append(share_log)
        
    share_log_list = sorted(share_log_list, key=lambda article: (article['timestamp']))
    return share_log_list

share_log = create_share(start_dt, t_total, n_share, post_log, users)
share_log

[{'timestamp': '2021-04-17T19:04:11.255288',
  'log_type': 'relation',
  'action': 'share',
  'action_key': 'community/share',
  'action_group': 'community',
  'user_id': 'u000007',
  'user_type': 'community',
  'content_type': 'post',
  'content_id': 'COPT.0046'},
 {'timestamp': '2021-04-18T03:18:29.522614',
  'log_type': 'relation',
  'action': 'share',
  'action_key': 'press/share',
  'action_group': 'press',
  'user_id': 'u000002',
  'user_type': 'community',
  'content_type': 'article',
  'content_id': 'COPT.0058'},
 {'timestamp': '2021-04-18T08:03:16.303754',
  'log_type': 'relation',
  'action': 'share',
  'action_key': 'press/share',
  'action_group': 'press',
  'user_id': 'u000005',
  'user_type': 'community',
  'content_type': 'article',
  'content_id': 'COPT.0056'},
 {'timestamp': '2021-04-26T07:00:35.643993',
  'log_type': 'relation',
  'action': 'share',
  'action_key': 'press/share',
  'action_group': 'press',
  'user_id': 'u000006',
  'user_type': 'community',
  'content

**Login 로그 생성**

In [49]:
def create_login(start_dt, t_total, n_login, users):
    login_list = []
    for i in range(n_login):
        user = random.choice(users)
        a = random.uniform(0.0, 1.0)
        t = a*t_total
        dt = start_dt + t
        login_list.append({
            'timestamp': dt.isoformat(),
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'action_group': 'common', # action group과 user_type의 차이?
            'action': 'login',
            'action_key': 'common/login',
            'log_type': 'activity',
            'value': 1,
        })
        
    login_list = sorted(login_list, key=lambda article: (article['timestamp']))
        
    return login_list

login_log = create_login(start_dt, t_total, n_login, users)
login_log

[{'timestamp': '2021-04-01T01:29:42.298475',
  'user_id': 'u000010',
  'user_type': 'press',
  'action_group': 'common',
  'action': 'login',
  'action_key': 'common/login',
  'log_type': 'activity',
  'value': 1},
 {'timestamp': '2021-04-01T05:21:27.431553',
  'user_id': 'u000008',
  'user_type': 'press',
  'action_group': 'common',
  'action': 'login',
  'action_key': 'common/login',
  'log_type': 'activity',
  'value': 1},
 {'timestamp': '2021-04-01T07:48:51.975471',
  'user_id': 'u000008',
  'user_type': 'press',
  'action_group': 'common',
  'action': 'login',
  'action_key': 'common/login',
  'log_type': 'activity',
  'value': 1},
 {'timestamp': '2021-04-01T08:03:16.747507',
  'user_id': 'u000001',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'login',
  'action_key': 'common/login',
  'log_type': 'activity',
  'value': 1},
 {'timestamp': '2021-04-01T11:29:29.982820',
  'user_id': 'u000010',
  'user_type': 'press',
  'action_group': 'common',
  'action': 'lo

**초대 로그**

In [50]:
def create_invitation(start_dt, t_total, n_invitation, users):
    invitation_list = []
    for i in range(n_invitation):
        user = random.choice(users)
        a = random.uniform(0.0, 1.0)
        t = a*t_total
        dt = start_dt + t
        invitation_list.append({
            'timestamp': dt.isoformat(),
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'action_group': 'common', # action group과 user_type의 차이?
            'action': 'invitation',
            'action_key': 'common/invitation',
            'log_type': 'social',
            'invite_code': 'user.3.xxxxx', #TO-DO
        })
        
    invitation_list = sorted(invitation_list, key=lambda article: (article['timestamp']))
        
    return invitation_list

invitation_log = create_invitation(start_dt, t_total, n_invitation, users)
invitation_log

[{'timestamp': '2021-04-01T18:13:43.821218',
  'user_id': 'u000002',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'invitation',
  'action_key': 'common/invitation',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-03T14:40:51.390822',
  'user_id': 'u000007',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'invitation',
  'action_key': 'common/invitation',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-04T15:06:04.512360',
  'user_id': 'u000008',
  'user_type': 'press',
  'action_group': 'common',
  'action': 'invitation',
  'action_key': 'common/invitation',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-08T03:28:09.863646',
  'user_id': 'u000008',
  'user_type': 'press',
  'action_group': 'common',
  'action': 'invitation',
  'action_key': 'common/invitation',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-

**추천 로그**

In [54]:
def create_recommend(start_dt, t_total, n_recommend, users):
    recommend_list = []
    for i in range(n_recommend):
        user = random.choice(users)
        a = random.uniform(0.0, 1.0)
        t = a*t_total
        dt = start_dt + t
        recommend_list.append({
            'timestamp': dt.isoformat(),
            'user_id': user['user_id'],
            'user_type': user['user_type'],
            'action_group': 'common', # action group과 user_type의 차이?
            'action': 'recommender',
            'action_key': 'common/recommender',
            'log_type': 'social',
            'invite_code': 'user.3.xxxxx', #TO-DO
        })
        
    recommend_list = sorted(recommend_list, key=lambda article: (article['timestamp']))
        
    return recommend_list

recommend_log = create_recommend(start_dt, t_total, n_recommendation, users)
recommend_log

[{'timestamp': '2021-04-05T03:29:36.908988+09:00',
  'user_id': 'u000004',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'recommender',
  'action_key': 'common/recommender',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-05T18:11:19.618859+09:00',
  'user_id': 'u000007',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'recommender',
  'action_key': 'common/recommender',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-08T13:04:25.667181+09:00',
  'user_id': 'u000010',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'recommender',
  'action_key': 'common/recommender',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-10T01:03:04.039254+09:00',
  'user_id': 'u000010',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'recommender',
  'action_key': 'common/recommender',
  'log_type': 'social',
  'invite_code': '

[{'timestamp': '2021-04-04T01:59:07.384292+09:00',
  'user_id': 'u000010',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'recommender',
  'action_key': 'common/recommender',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-04T10:50:38.622007+09:00',
  'user_id': 'u000010',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'recommender',
  'action_key': 'common/recommender',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-06T07:44:13.943625+09:00',
  'user_id': 'u000001',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'recommender',
  'action_key': 'common/recommender',
  'log_type': 'social',
  'invite_code': 'user.3.xxxxx'},
 {'timestamp': '2021-04-10T22:07:07.303581+09:00',
  'user_id': 'u000001',
  'user_type': 'community',
  'action_group': 'common',
  'action': 'recommender',
  'action_key': 'common/recommender',
  'log_type': 'social',
  'invite_code': '

In [55]:
log_list = post_log + reply_log + rereply_log + view_log + best_log + reaction_log + reaction_reply_log
log_list += bookmark_log + share_log
log_list += login_log + invitation_log + recommend_log
print(len(log_list))

780


In [59]:
#for log in log_list:
#    if log['action_key'] == 'press/like':
#        print(log)

**후처리**

In [60]:
# 없는 필드 채우기
def fill_keys(log_list):
    keys = [
        'log_type',
        'user_type',
        'action',
        'value',
        'action_group',
        'action_key',
        'content_type',
        'type_id',
        'content_id',
        'parent_user_id',
        'parent_type',
        'parent_id',
        'voting_power',
        'invite_code',
        'reward_user_id',
        'reward_value',
        'timestamp',
        'voting_power',
        'invite_code',
    ]
    for log in log_list:
        for key in keys:
            if key not in log.keys():
                log[key] = ''

fill_keys(log_list)

In [156]:
log_list = sorted(log_list, key=lambda article: (article['timestamp']))
log_list

**파일에 로그 데이터 저장**

In [61]:
import json
with open('log.json', 'w') as fp:
    json.dump(log_list, fp)

## MongoDB 등록
**주의: DB에 기록됨**

In [None]:
from pymongo import MongoClient
from bson.objectid import ObjectId
import pymysql
import random
import time


db_host = '182.237.86.231'
db_port = 27217
db_name = 'newmingDB'
db_user = 'newming'
db_pwd = 'u8Yto93qrAgW'

client = MongoClient(db_host, db_port)
db = client['newmingDB']

for log in log_list:
    col = db[log['action_group']]
    col.insert_one(log)

In [62]:
import json

with open("log.json", "r") as st_json:
    log2 = json.load(st_json)

In [63]:
log2

[{'timestamp': '2021-04-01T03:34:01.095162+09:00',
  'user_id': 'u000009',
  'user_type': 'press',
  'action_group': 'press',
  'action': 'create',
  'action_key': 'press/create1',
  'log_type': 'content',
  'type_id': 'COAT',
  'content_type': 'article',
  'content_id': 'COPT.0001',
  'value': '',
  'parent_user_id': '',
  'parent_type': '',
  'parent_id': '',
  'voting_power': '',
  'invite_code': '',
  'reward_user_id': '',
  'reward_value': ''},
 {'timestamp': '2021-04-01T05:47:38.853937+09:00',
  'user_id': 'u000009',
  'user_type': 'press',
  'action_group': 'press',
  'action': 'create',
  'action_key': 'press/create1',
  'log_type': 'content',
  'type_id': 'COAT',
  'content_type': 'article',
  'content_id': 'COPT.0002',
  'value': '',
  'parent_user_id': '',
  'parent_type': '',
  'parent_id': '',
  'voting_power': '',
  'invite_code': '',
  'reward_user_id': '',
  'reward_value': ''},
 {'timestamp': '2021-04-01T17:04:36.415728+09:00',
  'user_id': 'u000008',
  'user_type': 'c

# 이전 소스

In [None]:
from pymongo import MongoClient
from bson.objectid import ObjectId
import pymysql
import random
import time


db_host = '182.237.86.231'
db_port = 27217
db_name = 'newmingDB'
db_user = 'newming'
db_pwd = 'u8Yto93qrAgW'


def str_time_prop(start, end, format, prop):
    """Get a time at a proportion of a range of two formatted times.

    start and end should be strings specifying times formated in the
    given format (strftime-style), giving an interval [start, end].
    prop specifies how a proportion of the interval to be taken after
    start.  The returned time will be in the specified format.
    """

    stime = time.mktime(time.strptime(start, format))
    etime = time.mktime(time.strptime(end, format))

    ptime = stime + prop * (etime - stime)

    return time.strftime(format, time.localtime(ptime))


def random_date(start, end, prop):
    return str_time_prop(start, end, '%Y-%m-%dT%H:%I:%S', prop)



client = MongoClient(db_host, db_port)
db = client['newmingDB']

for i in range(17):

    log_type = ''
    user_type = ''
    action = ''
    value = ''
    action_group = ''
    action_key = ''
    content_type = ''
    type_id = ''
    content_id = ''
    parent_user_id = ''
    parent_type = ''
    parent_id = ''
    voting_power = ''
    invite_code = ''
    reward_user_id = ''
    reward_value = ''
    timestamp = ''

    if i == 0:
        col = db['press']
        log_type = 'content'
        user_type = 'press'
        action = 'create'
        action_group = 'press'
        action_key = 'press/create1'
        content_type = 'article'
        type_id = 'COAT'
        content_id = 'COAT.key'

    elif i == 1:
        col = db['press']
        log_type = 'info'
        action = 'view'
        value = random.randrange(1, 100)
        action_group = 'press'
        action_key = 'press/view'
        content_type = 'article'
        type_id = 'COAT'
        content_id = 'COAT.key'
    elif i == 2:
        col = db['press']
        log_type = 'content'
        user_type = random.choice(['user', 'expert'])
        action = 'create'
        value = random.randrange(1, 100)
        action_group = 'press'
        action_key = 'press/create2'
        content_type = 'comment'
        type_id = 'CMMT'
        content_id = 'CMMT.seq'
        parent_type = 'article'
        parent_id = 'COAT.key'
        voting_power = random.choice(['y', 'n'])
    elif i == 3:
        col = db['press']
        log_type = 'relation'
        user_type = random.choice(['user', 'expert'])
        action = 'like'
        value = random.randrange(1, 100)
        action_group = 'press'
        action_key = 'press/like'
        content_type = 'article'
        type_id = 'COAT'
        content_id = 'COAT.key'
        voting_power = random.choice(['y', 'n'])
    elif i == 4:
        col = db['press']
        log_type = 'relation'
        user_type = random.choice(['user', 'expert'])
        action = 'bookmark'
        value = random.randrange(1, 100)
        action_group = 'press'
        action_key = 'press/bookmark'
        content_type = 'article'
        type_id = 'COAT'
        content_id = 'COAT.key'
        voting_power = random.choice(['y', 'n'])
    elif i == 5:
        col = db['press']
        log_type = 'relation'
        user_type = random.choice(['user', 'expert'])
        action = 'share'
        action_group = 'press'
        action_key = 'press/share'
        content_type = 'article'
        type_id = 'COAT'
        content_id = 'COAT.key'
    elif i == 6:
        col = db['community']
        log_type = 'content'
        user_type = random.choice(['user', 'expert'])
        action = 'create'
        action_group = 'community'
        action_key = 'community/create1'
        content_type = 'post'
        type_id = 'COPT'
        content_id = 'COPT.key'
    elif i == 7:
        col = db['community']
        log_type = 'content'
        user_type = random.choice(['user', 'expert'])
        action = 'create'
        action_group = 'community'
        action_key = 'community/create2'
        content_type = 'post'
        type_id = 'COPT'
        content_id = 'COPT.key'
        parent_user_id = random.randrange(1, 100)
        parent_type = 'article'
        parent_id = 'COAT.key'
    elif i == 8:
        col = db['community']
        log_type = 'info'
        action = 'view'
        value = random.randrange(1, 100)
        action_group = 'community'
        action_key = 'community/view'
        content_type = 'post'
        type_id = 'COPT'
        content_id = 'COPT.key'
    elif i == 9:
        col = db['community']
        log_type = 'content'
        user_type = random.choice(['press', 'user', 'expert'])
        action = 'create'
        action_group = 'community'
        action_key = 'community/create3'
        content_type = 'comment'
        type_id = 'CMMT'
        content_id = 'CMMT.seq'
        parent_user_id = random.randrange(1, 100)
        parent_type = 'post'
        parent_id = 'COAT.key'
        voting_power = random.choice(['y', 'n'])
    elif i == 10:
        col = db['community']
        log_type = 'relation'
        user_type = random.choice(['user', 'expert'])
        action = 'like'
        value = random.randrange(1, 100)
        action_group = 'community'
        action_key = 'community/like'
        content_type = 'post'
        type_id = 'COPT'
        content_id = 'COPT.key'
        voting_power = random.choice(['y', 'n'])
    elif i == 11:
        col = db['community']
        log_type = 'relation'
        user_type = random.choice(['user', 'expert'])
        action = 'bookmark'
        value = random.randrange(1, 100)
        action_group = 'community'
        action_key = 'community/bookmark'
        content_type = 'post'
        type_id = 'COPT'
        content_id = 'COPT.key'
        voting_power = random.choice(['y', 'n'])
    elif i == 12:
        col = db['community']
        log_type = 'relation'
        user_type = random.choice(['user', 'expert'])
        action = 'share'
        action_group = 'community'
        action_key = 'community/share'
        content_type = 'post'
        type_id = 'COPT'
        content_id = 'COPT.key'
    elif i == 13:
        col = db['community']
        log_type = 'content'
        user_type = random.choice(['user', 'expert'])
        action = 'create'
        value = random.randrange(1, 100)
        action_group = 'community'
        action_key = 'community/create4'
        content_type = 'post'
        type_id = 'CMMT'
        content_id = 'CMMT.seq'
        parent_type = 'comment'
        parent_id = 'CMMT.seq'
        voting_power = random.choice(['y', 'n'])
    elif i == 14:
        col = db['common']
        log_type = 'social'
        user_type = random.choice(['user', 'expert'])
        action = 'recommender'
        action_group = 'common'
        action_key = 'common/recommender'
        invite_code = 'user.3.xxxxxx'
    elif i == 15:
        col = db['common']
        log_type = 'social'
        user_type = random.choice(['user', 'expert'])
        action = 'invitation'
        action_group = 'common'
        action_key = 'common/invitation'
    elif i == 16:
        col = db['common']
        log_type = 'activity'
        user_type = random.choice(['press', 'user', 'expert'])
        action = 'login'
        value = random.randrange(1, 100)
        action_group = 'common'
        action_key = 'common/login'

    for j in range(100):
        if i in [2, 8]:
            user_id = ''
        else:
            user_id = i

        d = {
            'log_type': log_type,
            'user_type': user_type,
            'user_id': j,
            'action': action,
            'action_key': action_key,
            'value': value,
            'action_group': action_group,
            'content_type': content_type,
            'content_id': content_id,
            'parent_user_id': parent_user_id,
            'parent_type': parent_type,
            'parent_id': parent_id,
            'voting_power': voting_power,
            'invite_code': invite_code,
            'reward_user_id': reward_user_id,
            'reward_value': reward_value,
            'timestamp' : random_date("2021-03-01T01:01:01", "2021-04-30T01:01:01", random.random()),
        }

        #x = col.insert_one(d)