## Objectives
- Main quest와 달리 side quests(보스 토벌, 주간 던전 등)는 플레이 시작과 종료 타임스탬프가 찍혀있다.
- 이들을 활용하여 각 level에서 side quests를 총 몇 시간 플레이했는지 feature를 생성한다.
- 주의할 점이 타임스탬프에 missing이 있고, side quests 플레이 도중 레벨업이 가능하다.

In [1]:
from config import *

In [2]:
levelups_ref = pd.read_csv("../data/levelups_panel_1.csv", encoding="utf-8")
levelups_ref["event_datetime"] = pd.to_datetime(levelups_ref["event_datetime"])
levelups_ref = levelups_ref.sort_values(by=["event_datetime","level_from"]).reset_index(drop=True) # several leveups can happen at once

In [3]:
stages = pd.read_csv("../data/stages.csv", encoding="utf-8")
stages["event_datetime"] = pd.to_datetime(stages["event_datetime"])

In [4]:
stages.stage_category_slug.value_counts()

stage_category_slug
보스 토벌     15257496
메인 퀘스트    12469843
주간 던전      3920684
천공의 탑      2170426
PVP         720005
파밍 필드       346556
허상의 결계      109545
Name: count, dtype: int64

### Side quests playing time in hours

* <code>playtime_dungeon</code>: Actual playing time of 주간 던전 in hours before progressing to the next level
    - 주의: <u><b>플레이 도중 레벨업 가능</b></u>

In [5]:
def calculate_playtime_side_quests(stages, q_name, new_col_name):
    stages_sub = stages[stages["stage_category_slug"]==q_name].sort_values(by="event_datetime")
    stages_sub.drop(columns=["stage_type","stage_slug","stage_score","stage_playtime"], inplace=True)
    
    # separate start and finish and then merge
    stages_sub_s = stages_sub[stages_sub["stage_status"]=="start"].reset_index(drop=True)
    stages_sub_f = stages_sub[stages_sub["stage_status"]!="start"].reset_index(drop=True)
    stages_sub_f["event_datetime_y"] = stages_sub_f["event_datetime"]
    stages_sub = pd.merge_asof(stages_sub_s, stages_sub_f, on="event_datetime",
                               by=["player_slug","character_slug","stage_category_slug","stage_level"], direction="forward")
    stages_sub = stages_sub.query("event_datetime_y==event_datetime_y")
    stages_sub = stages_sub.drop_duplicates(subset=["player_slug","character_slug","stage_category_slug","stage_level",
                                                    "stage_status_y","event_datetime_y"], keep="last")
    stages_sub = stages_sub.rename(columns={"event_datetime":"event_datetime_x"})
    
    # match levels
    stages_sub = stages_sub.sort_values(by="event_datetime_y").reset_index(drop=True)
    tmp = levelups_ref[["player_slug","character_slug","level_to","event_datetime_prev","event_datetime"]].reset_index(drop=True)
    tmp["levelup_datetime"] = tmp["event_datetime"]
    tmp.rename(columns={"event_datetime":"event_datetime_y", "event_datetime_prev":"levelup_datetime_prev"}, inplace=True)
    stages_sub = pd.merge_asof(stages_sub, tmp,
                               on="event_datetime_y", by=["player_slug","character_slug"],
                               direction="backward", allow_exact_matches=False)
    
    # confine to cases when levelup not occured while playing a side quest
    stages_sub = stages_sub.query("levelup_datetime<=event_datetime_x").reset_index(drop=True)
    stages_sub.rename(columns={"level_to":"level_from"}, inplace=True)

    # play time in hours
    stages_sub["playtime_sub"] = stages_sub["event_datetime_y"] - stages_sub["event_datetime_x"]
    stages_sub["playtime_sub"] = stages_sub["playtime_sub"].apply(lambda x: x.total_seconds())
    stages_sub["playtime_sub"] = stages_sub["playtime_sub"].apply(lambda x: (x-1)/3600 if x>=1 else 0)
    # some errors updating
    if q_name=="파밍 필드":
        stages_sub["playtime_sub"] = stages_sub["playtime_sub"].apply(lambda x: 0 if x>5 else x) # over 5 hour
    else:
        stages_sub["playtime_sub"] = stages_sub["playtime_sub"].apply(lambda x: 0 if x>0.25 else x) # over 15 min

    # summation of playing time per each level
    stages_sub_g = stages_sub.groupby(["player_slug","character_slug","level_from"])["playtime_sub"].sum().reset_index()
    stages_sub_g.rename(columns={"playtime_sub":new_col_name}, inplace=True)
    
    return stages_sub, stages_sub_g

In [6]:
for q_name, new_col_name in [("보스 토벌","playtime_boss"),("주간 던전","playtime_dungeon"),
                             ("천공의 탑","playtime_tower"),("PVP","playtime_pvp"),
                             ("파밍 필드","playtime_farm"),("허상의 결계","playtime_survival")]:
    print(q_name)
    stages_sub, stages_sub_g = calculate_playtime_side_quests(stages, q_name, new_col_name)
    
    levelups_ref = pd.merge(levelups_ref, stages_sub_g, how="left")
    levelups_ref[new_col_name].fillna(0, inplace=True)

보스 토벌
주간 던전
천공의 탑
PVP
파밍 필드
허상의 결계


In [7]:
cols_side = [c for c in levelups_ref.columns if "playtime_" in c]

In [8]:
levelups_ref[cols_side].describe()

Unnamed: 0,playtime_boss,playtime_dungeon,playtime_tower,playtime_pvp,playtime_farm,playtime_survival
count,2466997.0,2466997.0,2466997.0,2466997.0,2466997.0,2466997.0
mean,0.0003789942,0.0003576743,0.0003302678,0.0001082405,0.0009467574,8.241678e-05
std,0.01877821,0.007293312,0.008515553,0.002990918,0.04391786,0.002435827
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0
max,19.6975,3.384722,7.333056,0.3941667,10.07361,0.7216667


In [9]:
# final error check: 혹시 레벨업하는 데 소요된 시간보다 더 긴 시간 side quest를 플레이했다고 전처리한 내역은 없는지
levelups_ref[levelups_ref[cols_side].sum(axis=1) > levelups_ref["interval"]]

Unnamed: 0,player_slug,character_slug,level_from,level_to,event_datetime_prev,event_datetime,interval,interval_cum,purchase,ads,playtime_boss,playtime_dungeon,playtime_tower,playtime_pvp,playtime_farm,playtime_survival


* Export

In [10]:
# sum of playtime
levelups_ref["playtime_side"] = levelups_ref[cols_side].sum(axis=1)

In [11]:
levelups_ref

Unnamed: 0,player_slug,character_slug,level_from,level_to,event_datetime,death,ad_revival,ad_item,purchase,event_datetime_prev,interval,interval_cum,playtime_boss,playtime_dungeon,playtime_tower,playtime_pvp,playtime_farm,playtime_survival,playtime_side
0,2021-11-01T07:31:49.366Z,레드스완,1,2,2021-11-01 16:33:55,0.0,0.0,0.0,0.0,2021-11-01 16:33:19,0.010278,0.010278,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2021-11-01T07:31:49.366Z,레드스완,2,3,2021-11-01 16:34:12,0.0,0.0,0.0,0.0,2021-11-01 16:33:55,0.005000,0.015278,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2021-11-01T07:31:49.366Z,레드스완,3,4,2021-11-01 16:36:50,0.0,0.0,0.0,0.0,2021-11-01 16:34:12,0.044167,0.059444,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2021-11-01T07:31:49.366Z,레드스완,4,5,2021-11-01 16:38:30,0.0,0.0,0.0,0.0,2021-11-01 16:36:50,0.028056,0.087500,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2021-11-01T07:31:49.366Z,레드스완,5,6,2021-11-01 16:39:43,0.0,0.0,0.0,0.0,2021-11-01 16:38:30,0.020556,0.108056,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466992,2022-03-30T14:13:23.546Z,sheal,19,20,2022-03-30 23:56:33,0.0,0.0,0.0,0.0,2022-03-30 23:55:16,0.021667,0.705556,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2466993,2022-03-30T06:19:53.643Z,반백살,36,37,2022-03-30 23:56:47,0.0,0.0,0.0,0.0,2022-03-30 23:46:38,0.169444,8.593889,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2466994,2022-03-30T14:13:23.546Z,sheal,20,21,2022-03-30 23:57:47,0.0,0.0,0.0,0.0,2022-03-30 23:56:33,0.020833,0.726389,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2466995,2022-03-30T14:13:23.546Z,sheal,21,22,2022-03-30 23:58:07,0.0,0.0,0.0,0.0,2022-03-30 23:57:47,0.005833,0.732222,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
levelups_ref.to_csv("../data/levelups_panel_2.csv", index=None, encoding="utf-8")