In [1]:
from pubg_util import mysql, load_schema, notifier
from sphynx import sphynx, NODE_SMALL, NODE_MEDIUM, NODE_LARGE
from pyspark.sql.functions import *
import pandas as pd
import pickle

spark = sphynx.get_spark(executor_count=8, app_name='west0_mclaren_pass', node_spec=NODE_LARGE)

Spark cluster not assigned. creating a new one...
Node spec: 8 executors with 48G RAM each
Job Port 4049 is assigned for requested cluster
Waiting for Spark master to be available...
Spark master launched!
Creating new Spark session, name: west0_mclaren_pass...
Waiting for all executors ready...
All executors connected!
Complete! elapsed time: 00:00:27


In [15]:
sphynx.stop()

Stopping Spark session...
Destroying Spark cluster...
Done!


In [5]:
post_start_date = "2022-09-07"
post_gcoin_end_date = "2022-11-02"
post_craft_end_date = "2022-11-07"

In [6]:
gcoin = load_data_mart("pc", post_start_date, post_gcoin_end_date, "gcoin_use").where(col("event_name") == "202209_season_workshop")

In [7]:
gcoin = gcoin.withColumn("level_up", when(col("product_id") == "battlepasslevelup.202209", col("qty")).when(col("product_id") == "itemdesc.13000628", lit(30)).otherwise(lit(None))) \
    .withColumn("level_up_gcoin", when(col("product_id") == "battlepasslevelup.202209", col("paid_use") + col("free_use")).otherwise(lit(None))) \
    .withColumn("is_product", when(col("product_id").isin(["itemdesc.13000637", "itemdesc.13000638", "itemdesc.13000639", "itemdesc.13000640"]), lit(1)).otherwise(lit(0))) \
    .withColumn("product_gcoin", when(col("is_product") == 1, col("paid_use") + col("free_use")).otherwise(lit(None))) \
    .withColumn("is_pass", when(col("product_id").isin(["itemdesc.13000628", "itemdesc.13000629"]), lit(1)).otherwise(lit(0))) \
    .withColumn("pass_gcoin", when(col("is_pass") == 1, col("paid_use") + col("free_use")).otherwise(lit(None)))

## Pass User

In [8]:
pass_user = gcoin.where(col("product_id").isin(["itemdesc.13000628", "itemdesc.13000629"])).select("account_id").distinct()
pass_user_gcoin = gcoin.join(pass_user, "account_id")

In [197]:
pass_user.select(count("*").alias("pass_user_cnt")).show(truncate=False)

+-------------+
|pass_user_cnt|
+-------------+
|481597       |
+-------------+



### P-1

In [9]:
p1_user = pass_user_gcoin.groupBy("account_id").agg(count("*").alias("buy_cnt")).filter(col("buy_cnt") == 1).select("account_id").distinct()

In [199]:
p1_user.select(count("*").alias("p1_user_cnt")).show(truncate=False)

+-----------+
|p1_user_cnt|
+-----------+
|147774     |
+-----------+



In [10]:
p1_gcoin = gcoin.join(p1_user, "account_id")

In [11]:
p1_gcoin.groupBy("product_name").agg(count("*"), countDistinct("account_id")).show(truncate=False)

+----------------------------------------+--------+--------------------------+
|product_name                            |count(1)|count(DISTINCT account_id)|
+----------------------------------------+--------+--------------------------+
|CRAFTER PASS: McLAREN TOKEN & LEVEL PACK|7816    |7816                      |
|CRAFTER PASS: McLAREN TOKEN PACK        |139958  |139958                    |
+----------------------------------------+--------+--------------------------+



################################

In [201]:
p1_gcoin.select(count("*")).show()

+--------+
|count(1)|
+--------+
|  147774|
+--------+



In [202]:
# check if all p1_user is buying only pass
p1_check = p1_gcoin.groupBy("account_id", "product_id").agg(count("*").alias("cnt"))
# two values should be the same
p1_check.select(count("*"), sum("cnt")).show(truncate=False)

+--------+--------+
|count(1)|sum(cnt)|
+--------+--------+
|147774  |147774  |
+--------+--------+



In [203]:
# only "itemdesc.13000628", "itemdesc.13000629" have to be shown
p1_check.select("product_id").distinct().show(truncate=False)

+-----------------+
|product_id       |
+-----------------+
|itemdesc.13000629|
|itemdesc.13000628|
+-----------------+



################################

In [13]:
p1_by_user = p1_gcoin.select("account_id", "pass_gcoin", "paid_use", (col("paid_use") + col("free_use")).alias("total_use"), "level_up")

### P-3

"itemdesc.13000637", "itemdesc.13000638", "itemdesc.13000639", "itemdesc.13000640"

In [205]:
p3_user = pass_user_gcoin.join(p1_user, "account_id", "left_anti") \
    .where(col("product_id").isin(["itemdesc.13000637", "itemdesc.13000638", "itemdesc.13000639", "itemdesc.13000640"])) \
    .select("account_id").distinct()

In [206]:
p3_user.select(count("*").alias("p3_user_cnt")).show(truncate=False)

+-----------+
|p3_user_cnt|
+-----------+
|277093     |
+-----------+



In [207]:
p3_gcoin = pass_user_gcoin.join(p3_user, "account_id")

################################

In [208]:
# should be same as p-3 user count
p3_gcoin.groupBy("account_id").agg(sum("is_product").alias("is_product"), sum("is_pass").alias("is_pass")).filter((col("is_product") > 0) & (col("is_pass") > 0)).select(count("*")).show()

+--------+
|count(1)|
+--------+
|  277093|
+--------+



In [209]:
# the p3 user cnt who has bought level up
p3_gcoin.groupBy("account_id").agg(sum("level_up").alias("level_up_cnt")).filter(col("level_up_cnt") > 0).select(count("*")).show()

+--------+
|count(1)|
+--------+
|  198625|
+--------+



################################

In [210]:
p3_by_user = p3_gcoin.groupBy("account_id").agg(sum("pass_gcoin").alias("pass_gcoin"), sum("product_gcoin").alias("product_gcoin"), \
            sum("level_up").alias("level_up"), sum("level_up_gcoin").alias("level_up_gcoin"), sum("paid_use").alias("paid_use"), sum(col("paid_use") + col("free_use")).alias("total_use"))

### P-2

In [211]:
p2_user = pass_user.join(p1_user, "account_id", "leftanti").join(p3_user, "account_id", "leftanti")

In [212]:
p2_user.select(count("*").alias("p2_user_count")).show()

+-------------+
|p2_user_count|
+-------------+
|        56730|
+-------------+



In [213]:
p2_gcoin = gcoin.join(p2_user, "account_id")

################################

In [97]:
# total pass user - p1 user - p3 user
481597 - 147774 - 277093

56730

In [214]:
p2_gcoin.groupBy("account_id").agg(sum("is_product").alias("is_product")).filter(col("is_product") > 0).select(count("*")).show()

+--------+
|count(1)|
+--------+
|       0|
+--------+



################################

In [215]:
p2_by_user = p2_gcoin.groupBy("account_id").agg(sum("pass_gcoin").alias("pass_gcoin"), sum("level_up").alias("level_up"), sum("level_up_gcoin").alias("level_up_gcoin"), sum("paid_use").alias("paid_use"), sum(col("paid_use") + col("free_use")).alias("total_use"))

In [103]:
p2_by_user.printSchema()

root
 |-- account_id: string (nullable = true)
 |-- pass_gcoin: long (nullable = true)
 |-- level_up: long (nullable = true)
 |-- level_up_gcoin: long (nullable = true)
 |-- paid_use: long (nullable = true)
 |-- total_use: long (nullable = true)



## Total

In [216]:
pass_user_total = p1_by_user.withColumn("level_up_gcoin", lit(None)).withColumn("product_gcoin", lit(None)).withColumn("group", lit("P-1")) \
    .unionByName(p2_by_user.withColumn("product_gcoin", lit(None)).withColumn("group", lit("P-2"))) \
    .unionByName(p3_by_user.withColumn("group", lit("P-3")))

In [217]:
pass_user_total.groupBy("group").agg(count("*"), countDistinct("account_id")).show(truncate=False)

+-----+--------+--------------------------+
|group|count(1)|count(DISTINCT account_id)|
+-----+--------+--------------------------+
|P-1  |147774  |147774                    |
|P-3  |277093  |277093                    |
|P-2  |56730   |56730                     |
+-----+--------+--------------------------+



In [218]:
mysql.drop_table("labs", "pass_user_total")

In [219]:
mysql.insert_table(pass_user_total, "labs", "pass_user_total")

## Mclaren 

In [184]:
mclaren = gcoin.join(pass_user, "account_id", "leftanti")

In [161]:
mclaren.groupBy("product_name", "product_id").agg(count("*")).show(truncate=False)

+-----------------------------+------------------------+--------+
|product_name                 |product_id              |count(1)|
+-----------------------------+------------------------+--------+
|McLaren Level UP             |battlepasslevelup.202209|142     |
|McLAREN LOOT CACHE PACK (x55)|itemdesc.13000640       |12570   |
|McLAREN LOOT CACHE PACK (x27)|itemdesc.13000639       |10550   |
|McLAREN LOOT CACHE PACK (x11)|itemdesc.13000638       |37683   |
|McLAREN LOOT CACHE PACK (x1) |itemdesc.13000637       |293348  |
+-----------------------------+------------------------+--------+



In [185]:
level_up_user = mclaren.where(col("product_id") == "battlepasslevelup.202209").select("account_id").distinct()

In [162]:
mclaren.join(level_up_user, "account_id").groupBy("account_id").agg(countDistinct("product_id").alias("product_cnt")).groupBy("product_cnt").agg(countDistinct("account_id")).show(truncate=False)

+-----------+--------------------------+
|product_cnt|count(DISTINCT account_id)|
+-----------+--------------------------+
|5          |1                         |
|1          |45                        |
|3          |10                        |
|2          |27                        |
|4          |8                         |
+-----------+--------------------------+



In [187]:
level_up_user.count()

91

In [186]:
# pass를 구매하지 않고 소유하고 level up권을 구매한 유저들
mysql.insert_table(level_up_user, "labs", "level_up_buyer_without_pass")

In [116]:
mclaren_by_user = mclaren.groupBy("account_id").agg(sum("paid_use").alias("paid_use"), sum(col("free_use") + col("paid_use")).alias("total_use")).withColumn("group", lit("M-2"))

In [117]:
mclaren_by_user.select(count("*").alias("m2_user_cnt")).show()

+-----------+
|m2_user_cnt|
+-----------+
|     202813|
+-----------+



In [120]:
mysql.insert_table(mclaren_by_user, "labs", "m2_user")

In [118]:
gcoin.select(countDistinct("account_id")).show(truncate=False)

+--------------------------+
|count(DISTINCT account_id)|
+--------------------------+
|684410                    |
+--------------------------+



In [122]:
# mclaren user + pass user
202813+481597

684410