In [1]:
from pyspark.sql import SQLContext
from pyspark.ml.fpm import FPGrowth
from pyspark.sql import SparkSession
from pyspark.sql import Row
import mlflow
import json

spark = SparkSession.builder.appName('Project').getOrCreate()


dataset=spark.read.csv("items.tbl", inferSchema = True, header = True, sep = '|')
dataset.createTempView("items")


df = spark.read.json("clicks1.json")
df.createTempView("clicks")


q="SELECT collect_set(logs.wl_customer_id) as items, i.i_category_id FROM(select wl_customer_id, wl_item_id from clicks lateral view json_tuple('wl_customer_id','wl_item_id') l where wl_customer_id is not null) logs, items i WHERE logs.wl_item_id = i.i_item_id AND i.i_category_id IS NOT NULL group BY i_category_id"

df = spark.sql(q)


fpGrowth = FPGrowth(itemsCol="items", minSupport=0.01, minConfidence=0.9, numPartitions = 2)
model = fpGrowth.fit(df)

In [2]:
df.show()

+------------+-------------+
|       items|i_category_id|
+------------+-------------+
|[0, 1, 2, 3]|           12|
|[0, 1, 2, 3]|            1|
|[0, 1, 2, 3]|           13|
|[0, 1, 2, 3]|           16|
|[0, 1, 2, 3]|            6|
|[0, 1, 2, 3]|            3|
|[0, 1, 2, 3]|            5|
|[0, 1, 2, 3]|           19|
|[0, 1, 2, 3]|           15|
|[0, 1, 2, 3]|            9|
|[0, 1, 2, 3]|           17|
|[0, 1, 2, 3]|            4|
|[0, 1, 2, 3]|            8|
|[0, 1, 2, 3]|            7|
|[0, 1, 2, 3]|           10|
|[0, 1, 2, 3]|           11|
|   [0, 1, 2]|           14|
|[0, 1, 2, 3]|            2|
|[0, 1, 2, 3]|            0|
|[0, 1, 2, 3]|           18|
+------------+-------------+



In [3]:
# Display frequent itemsets.
model.freqItemsets.show()

model.associationRules.show()

model.transform(df).show()

+------------+----+
|       items|freq|
+------------+----+
|         [1]|  20|
|      [1, 2]|  20|
|   [1, 2, 0]|  20|
|      [1, 0]|  20|
|         [0]|  20|
|         [2]|  20|
|      [2, 0]|  20|
|         [3]|  19|
|      [3, 1]|  19|
|   [3, 1, 2]|  19|
|[3, 1, 2, 0]|  19|
|   [3, 1, 0]|  19|
|      [3, 2]|  19|
|   [3, 2, 0]|  19|
|      [3, 0]|  19|
+------------+----+

+----------+----------+----------+----+
|antecedent|consequent|confidence|lift|
+----------+----------+----------+----+
|    [3, 2]|       [1]|       1.0| 1.0|
|    [3, 2]|       [0]|       1.0| 1.0|
|    [3, 1]|       [2]|       1.0| 1.0|
|    [3, 1]|       [0]|       1.0| 1.0|
|    [1, 0]|       [2]|       1.0| 1.0|
|    [1, 0]|       [3]|      0.95| 1.0|
| [3, 1, 0]|       [2]|       1.0| 1.0|
|    [1, 2]|       [0]|       1.0| 1.0|
|    [1, 2]|       [3]|      0.95| 1.0|
|       [3]|       [1]|       1.0| 1.0|
|       [3]|       [2]|       1.0| 1.0|
|       [3]|       [0]|       1.0| 1.0|
| [1, 2, 0]|       