In [1]:
from pubg_util import mysql, load_schema, notifier
from sphynx import sphynx, NODE_SMALL, NODE_MEDIUM, NODE_LARGE
from pyspark.sql.functions import *
import pandas as pd
import pickle

spark = sphynx.get_spark(executor_count=4, app_name='west0_mclaren', node_spec=NODE_LARGE)

Spark cluster not assigned. creating a new one...
Node spec: 4 executors with 48G RAM each
Job Port 4049 is assigned for requested cluster
Waiting for Spark master to be available...
Spark master launched!
Creating new Spark session, name: west0_mclaren...
Waiting for all executors ready...
All executors connected!
Complete! elapsed time: 00:00:24


In [77]:
sphynx.stop()

Stopping Spark session...
Destroying Spark cluster...
Done!


In [81]:
date = "2022-08-24"
crate_increased = load_schema.lobby(spark, "pc", "live", "WorkshopCrateIncreased", date, date)

In [84]:
crate_increased.where(col("ItemDescId") == "itemdesc.14300002").groupBy("ItemDescId", "Reason").agg(sum("Amount")).toPandas()

Unnamed: 0,ItemDescId,Reason,sum(Amount)
0,itemdesc.14300002,buy-item,47838
1,itemdesc.14300002,buy-cash-item,130244
2,itemdesc.14300002,open-crate,-409923
3,itemdesc.14300002,by-support,18204
4,itemdesc.14300002,battlepass-level-reward,9140
5,itemdesc.14300002,open-crate-bonus,41098


In [85]:
crate_opened = load_schema.lobby(spark, "pc", "live", "WorkshopCrateOpened", date, date)

In [86]:
crate_opened.printSchema()

root
 |-- date: string (nullable = true)
 |-- AccountId: string (nullable = true)
 |-- CurrencyObtained: struct (nullable = true)
 |    |-- currencydesc.artisanstoken: long (nullable = true)
 |    |-- currencydesc.credit: long (nullable = true)
 |-- EventName: string (nullable = true)
 |-- ItemDescId: string (nullable = true)
 |-- ItemsObtained: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- OpenAmount: long (nullable = true)
 |-- Provider: string (nullable = true)
 |-- Time: string (nullable = true)
 |-- TransactionId: string (nullable = true)
 |-- Type: string (nullable = true)



In [88]:
crate_opened.where(col("ItemDescId") == "itemdesc.14300002").groupBy("Provider").agg(sum("OpenAmount")).show()

+--------+---------------+
|Provider|sum(OpenAmount)|
+--------+---------------+
|     bro|          26833|
|   steam|         383090|
+--------+---------------+



In [4]:
start_date = "2022-09-07"
end_date = "2022-09-13"
gcoin = load_data_mart("pc", start_date, end_date, "gcoin_use")

In [80]:
wk_before_mclaren = load_data_mart('pc', "2022-08-31", "2022-09-06", "gcoin_use").where(col("event_name").like("%workshop%"))
wk_after_mclaren = load_data_mart('pc', "2022-09-07", "2022-09-13", "gcoin_use").where(col("event_name").like("%workshop%"))

In [5]:
from pyspark.sql.types import * 

def classify_country(country_os, country_ip):
    if country_os != 'CN':
        return country_ip
    else:
        return country_os

country_type_udf = udf(classify_country, StringType())

meta_region = mysql.read_table(spark, 'metainfo', 'meta_bi_regions')

user = load_data_mart("pc", end_date, end_date, "user_master")
user = user.withColumn("country_new", country_type_udf("country_os", "country_ip"))
user = user.join(meta_region, user.country_new == meta_region.country_code_iso2, "left").withColumnRenamed("accountid", "account_id")

In [7]:
gcoin = gcoin.join(user.select("account_id", "pubg_region"), "account_id")

In [6]:
user.where(col("lastlogindate") >= start_date).groupBy("pubg_region").agg(countDistinct("account_id").alias("au")).toPandas()

Unnamed: 0,pubg_region,au
0,CN,3233533
1,,117465
2,SA,106221
3,KR,621740
4,SEA,203621
5,Undefined,313
6,JP,13554
7,TW/HK,28736
8,CIS,300753
9,EMEA,339667


In [5]:
mclaren_gcoin = gcoin.where(col("event_name") == "202209_season_workshop")

In [6]:
gcoin_master = load_data_mart('pc', "2022-09-13", "2022-09-13", "gcoin_master")

In [8]:
mclaren_gcoin.join(gcoin_master.select("account_id", "first_use_date"), "account_id").where(col("first_use_date") == col("date")).select(countDistinct("account_id").alias("npu")).show(truncate=False)

+-----+
|npu  |
+-----+
|23606|
+-----+



In [9]:
mclaren_gcoin.join(gcoin_master.select("account_id", "first_use_date"), "account_id").where(col("first_use_date") == col("date")) \
    .withColumn("paid_account_id", when(col("paid_use") > 0, col("account_id")).otherwise(lit(None))) \
    .select(sum("paid_use").alias("paid_use"), sum(col("paid_use") + col("free_use")).alias("total_use"), countDistinct("paid_account_id").alias("paid_pu")).show(truncate=False)

+--------+---------+-------+
|paid_use|total_use|paid_pu|
+--------+---------+-------+
|91624860|114668410|5106   |
+--------+---------+-------+



In [16]:
mclaren_npu = mclaren_gcoin.join(gcoin_master.select("account_id", "first_use_date"), "account_id").where(col("first_use_date") == col("date")).select("account_id").distinct()

In [18]:
mclaren_gcoin.join(mclaren_npu, "account_id") \
    .withColumn("paid_account_id", when(col("paid_use") > 0, col("account_id")).otherwise(lit(None))) \
    .select(sum("paid_use").alias("paid_use"), sum(col("paid_use") + col("free_use")).alias("total_use"), countDistinct("paid_account_id").alias("paid_pu")).show(truncate=False)

+---------+---------+-------+
|paid_use |total_use|paid_pu|
+---------+---------+-------+
|100037830|125731470|5457   |
+---------+---------+-------+



In [20]:
# 전체 total gcoin중 npu의 total gcoin 차지율
125731470/1873343180*100

6.711609028304147

In [19]:
# 전체 paid gcoin중 npu의 paid gcoin 차지율
100037830/1564717324*100

6.393348400097346

In [13]:
91624860/114668410*100

79.90418634042278

In [14]:
5106/23606*100

21.630094043887148

In [43]:
mclaren_gcoin.groupBy("date").agg(sum('paid_use').alias('paid_use'), sum(col('paid_use') + col('free_use')).alias("total_use")).orderBy('date').toPandas()

Unnamed: 0,date,paid_use,total_use
0,2022-09-07,476573575,593070310
1,2022-09-08,307528540,365256740
2,2022-09-09,221905145,260472620
3,2022-09-10,180901484,211473270
4,2022-09-11,143542520,168413710
5,2022-09-12,128094440,150236580
6,2022-09-13,106171620,124419950


In [24]:
gcoin_master = load_data_mart("pc", end_date, end_date, "gcoin_master")

In [26]:
mclaren_gcoin = mclaren_gcoin.join(gcoin_master.select("account_id", "first_use_date"), "account_id", "left")

In [10]:
mclaren_gcoin.groupBy("pubg_region").agg(sum('paid_use'), sum(col("paid_use")+col("free_use")).alias("total")).toPandas()

Unnamed: 0,pubg_region,sum(paid_use),total
0,CN,1110377925,1334715990
1,,31431140,37818340
2,SA,5514390,9822250
3,KR,330074290,376557580
4,SEA,15775280,21112470
5,JP,3110190,3812970
6,TW/HK,11589770,13501280
7,CIS,14097939,20419530
8,EMEA,37007320,48773620
9,OC,5739080,6809150


In [11]:
mclaren_gcoin = mclaren_gcoin.withColumn("paid_pu", when(col("paid_use") > 0, col("account_id")).otherwise(lit(None)))

In [13]:
mclaren_gcoin.groupBy("pubg_region").agg(countDistinct("paid_pu").alias("paid_pu"), countDistinct("account_id").alias("total_pu")).toPandas()

Unnamed: 0,pubg_region,paid_pu,total_pu
0,CN,141432,211835
1,,5208,6780
2,SA,2629,5112
3,KR,30470,38456
4,SEA,3352,5586
5,JP,474,630
6,TW/HK,1250,1624
7,CIS,5235,9008
8,EMEA,11263,16876
9,OC,1165,1458


In [27]:
mclaren_gcoin.where(col("date") == col("first_use_date")).groupBy("pubg_region").agg(countDistinct("account_id").alias("npu")).toPandas()

Unnamed: 0,pubg_region,npu
0,CN,17082
1,,542
2,SA,390
3,KR,2404
4,SEA,442
5,JP,36
6,TW/HK,89
7,CIS,1086
8,EMEA,1448
9,OC,87


In [31]:
mclaren_gcoin.groupBy("product_name").agg(countDistinct("account_id").alias("pu"), countDistinct("paid_pu").alias("paid_pu"), \
                                              sum("qty").alias("unit_sold"), sum('paid_use').alias("paid_use"), sum(col("paid_use") + col("free_use")).alias("total_use")).toPandas()

Unnamed: 0,product_name,pu,paid_pu,unit_sold,paid_use,total_use
0,CRAFTER PASS: McLAREN TOKEN & LEVEL PACK,50495,49719,50495,146662625,165118650
1,McLAREN LOOT CACHE PACK (x11),46936,45738,79639,177509370,199097500
2,McLaren Level UP,55409,44753,996031,83187875,99603100
3,McLAREN LOOT CACHE PACK (x27),37743,37374,52201,296402710,326256250
4,McLAREN LOOT CACHE PACK (x55),39570,39372,64309,745149925,803862500
5,CRAFTER PASS: McLAREN TOKEN PACK,193707,110060,193707,77990879,191769930
6,McLAREN LOOT CACHE PACK (x1),104932,55477,350541,37813940,87635250


### 기존 스킨제작소 구매자 vs McLAREN 구매자

In [14]:
workshop_gcoin = load_data_mart("pc", "2022-07-13", end_date, "gcoin_use").where(col("event_name") == "202207_workshop")
workshop_first_buy_by_user = workshop_gcoin.groupBy("account_id").agg(min("date").alias("first_date"))

In [16]:
mclaren_first_buy_by_user = mclaren_gcoin.groupBy("account_id").agg(min("date").alias("mclaren_first_date"))

In [17]:
first_buy_by_user = workshop_first_buy_by_user.join(mclaren_first_buy_by_user, "account_id", "full_outer")

In [22]:
# a: 기존 스킨제작소 구매자가 시즈널 상품 구매
# b: 시즈널 상품, 기존 스킨제작소 동시 구매
# c: 시즈널 상품을 구매하고난 후 기존 스킨제작소 구매
# d: 시즈널 상품만 구매
# e: 기존 스킨제작소만 구매
first_buy_by_user = first_buy_by_user.withColumn("case",
                                                    when(col("first_date") < col("mclaren_first_date"), "a"). \
                                                    when(col("first_date") == col("mclaren_first_date"), "b"). \
                                                    when(col("first_date") > col("mclaren_first_date"), "c"). \
                                                    when(col("first_date").isNull(), "d"). \
                                                    when(col("mclaren_first_date").isNull(), "e"). \
                                                    otherwise(lit(None))
                                                )

In [23]:
first_buy_by_user.groupBy("case").agg(countDistinct("account_id").alias("user_cnt")).orderBy("case").toPandas()

Unnamed: 0,case,user_cnt
0,a,152029
1,b,5525
2,c,2990
3,d,136821
4,e,872576


### 멕라렌 제작소 사용률

In [56]:
pass_user_meta = spark.read.parquet("s3a://pubg-log-labs/data_mart/pass_user_meta/"+end_date)

In [38]:
pass_user_meta.where(col("pass_name") == "mclaren").show(truncate=False)

+----------------------------------------+---------+---------+---------------+
|accountid                               |pass_type|pass_name|pass_start_date|
+----------------------------------------+---------+---------+---------------+
|account.24590f854bc44496998977577c73955a|season   |mclaren  |2022-09-08     |
|account.ece1c2c554b64f85affd2d0b1de7b300|season   |mclaren  |2022-09-10     |
|account.6ff71f978feb46a480d214fa46125e1c|season   |mclaren  |2022-09-11     |
|account.5d319c8627b14ca4be97bc854aa5e52c|season   |mclaren  |2022-09-11     |
|account.86d47624adbd4577b086804fcb928dbf|season   |mclaren  |2022-09-12     |
|account.7e88411c656c48cf8a5e91517ac58abd|season   |mclaren  |2022-09-09     |
|account.cfe7015a561340908196e7d236254170|season   |mclaren  |2022-09-08     |
|account.88de7fd3fe1d41e1b1b6e378a9c8fff7|season   |mclaren  |2022-09-08     |
|account.3a6aec62807f4ddfb6c4dba89e2c399b|season   |mclaren  |2022-09-09     |
|account.7849d9b7a71d41c497290f150cb818cd|season   |

In [58]:
pass_user_meta.where(col("pass_start_date") >= "2022-01-12").select("pass_name").distinct().show()

+-----------+
|  pass_name|
+-----------+
|    mclaren|
|galaxysquad|
|     deston|
|   punkwave|
+-----------+



In [61]:
pass_user_meta.where((col("pass_name") == "punkwave") & (col("pass_start_date") <= "2022-03-22")).select(lit("punkwave"), countDistinct("accountid").alias("pass_user_cnt")).show()

+--------+-------------+
|punkwave|pass_user_cnt|
+--------+-------------+
|punkwave|       415938|
+--------+-------------+



In [63]:
pass_user_meta.where((col("pass_name") == "galaxysquad") & (col("pass_start_date") <= "2022-05-17")).select(lit("galaxysquad"), countDistinct("accountid").alias("pass_user_cnt")).show()

+-----------+-------------+
|galaxysquad|pass_user_cnt|
+-----------+-------------+
|galaxysquad|       236867|
+-----------+-------------+



In [65]:
pass_user_meta.where((col("pass_name") == "deston") & (col("pass_start_date") <= "2022-07-19")).select(lit("deston"), countDistinct("accountid").alias("pass_user_cnt")).show()

+------+-------------+
|deston|pass_user_cnt|
+------+-------------+
|deston|       182415|
+------+-------------+



In [66]:
pass_user_meta.where((col("pass_name") == "mclaren") & (col("pass_start_date") <= end_date)).select(lit("mclaren"), countDistinct("accountid").alias("pass_user_cnt")).show()

+-------+-------------+
|mclaren|pass_user_cnt|
+-------+-------------+
|mclaren|       130101|
+-------+-------------+



In [70]:
punkwave_pass_level = load_schema.lobby(spark, "pc", "live", "RewardLevel", "2022-03-16", "2022-03-22")
galaxysquad_pass_level = load_schema.lobby(spark, "pc", "live", "RewardLevel", "2022-05-11", "2022-05-17")
deston_pass_level = load_schema.lobby(spark, "pc", "live", "RewardLevel", "2022-07-13", "2022-07-19")
mclaren_pass_level = load_schema.lobby(spark, "pc", "live", "RewardLevel", start_date, end_date)

In [71]:
pass_level = (punkwave_pass_level.groupBy("AccountId").agg(max("Level").alias("max_level")).withColumn("pass_name", lit("punkwave"))).union(
    (galaxysquad_pass_level.groupBy("AccountId").agg(max("Level").alias("max_level")).withColumn("pass_name", lit("galaxysquad")))
).union(
    (deston_pass_level.groupBy("AccountId").agg(max("Level").alias("max_level")).withColumn("pass_name", lit("deston")))
).union(
    (mclaren_pass_level.groupBy("AccountId").agg(max("Level").alias("max_level")).withColumn("pass_name", lit("mclaren")))
)

In [76]:
pass_level.groupBy("pass_name").agg(countDistinct("AccountId").alias("user_cnt")).toPandas()

Unnamed: 0,pass_name,user_cnt
0,mclaren,1201077
1,galaxysquad,1344438
2,deston,858901
3,punkwave,1663923


In [73]:
pass_level_df = pass_level.groupBy("pass_name", "max_level").agg(countDistinct("AccountId").alias("user_cnt"))

In [74]:
mysql.insert_table(pass_level_df, "labs", "pass_level_df")

In [None]:
pass_user = mclaren_gcoin.where(col("product_id").isin(["itemdesc.13000628", "itemdesc.13000629"]))

In [None]:
def pass_user_rate(start_date, end_date):
    user = load_data_mart("pc", end_date, end_date, "user_master").where(col('lastlogindate') >= start_date)
    pass_level = load_schema.lobby(spark, "pc", "live", "RewardLevel", start_date, end_date).select("AccountId").distinct()
    pass_purchase = load_schema.lobby(spark, "pc", "live", "PurchaseResult", "2022-07-13", "2022-07-13").where(col("AnalyticEventType").like("%pass%")).select("AccountId").distinct()
    pass_user = pass_level.union(pass_purchase)
    
    au = user.select(countDistinct("accountid").alias("au")).collect()[0][0]
    user_cnt = pass_user.select(countDistinct("AccountId")).collect()[0][0]
    return au, user_cnt