In [1]:
import polars as pl

In [6]:
train_data = pl.read_parquet('data/train.parquet')
train_data = train_data.explode("events").unnest("events")

test_data = pl.read_parquet('data/test.parquet')
print(test_data)

shape: (979_617, 3)
┌──────────┬─────────┬───────────────────────────────────┐
│ session  ┆ label   ┆ events                            │
│ ---      ┆ ---     ┆ ---                               │
│ i64      ┆ i64     ┆ list[struct[3]]                   │
╞══════════╪═════════╪═══════════════════════════════════╡
│ 11955459 ┆ 396956  ┆ [{861882,1661411708073,"clicks"}… │
│ 11391486 ┆ 70874   ┆ [{428948,1661213878385,"clicks"}… │
│ 11280097 ┆ 434423  ┆ [{1770344,1661185503611,"clicks"… │
│ 12275789 ┆ 1548608 ┆ [{1609770,1661523415651,"clicks"… │
│ 11748693 ┆ 1767107 ┆ [{650819,1661338091714,"clicks"}… │
│ …        ┆ …       ┆ …                                 │
│ 12706839 ┆ 370477  ┆ [{633484,1661683211298,"clicks"}… │
│ 11197298 ┆ 87613   ┆ [{728827,1661167953810,"clicks"}… │
│ 12317107 ┆ 117087  ┆ [{523335,1661533978099,"clicks"}… │
│ 11574797 ┆ 1428440 ┆ [{1344109,1661273002063,"clicks"… │
│ 12200806 ┆ 1508865 ┆ [{941516,1661501688627,"clicks"}… │
└──────────┴─────────┴──────────────

In [4]:
from polars.datatypes import Struct

class Recommender:
    def fit(self, data: pl.DataFrame) -> None:
        # fit the model to the training data
        pass
        
    def recommend(self, events: list[Struct]) -> list[int]:
        # return a list of k item ids
        pass

In [9]:
class BaselineRecommender(Recommender):
    def __init__(self):
        self.top_k = None
        
    def fit(self, data):
        self.top_k = data.group_by("aid").len().sort("len", descending=True).head(20)["len"].to_list()
        
    def recommend(self, events):
        return self.top_k


bs_model = BaselineRecommender()
bs_model.fit(train_data)
print(bs_model.recommend([1, 2, 3]))

[24455, 23562, 22671, 18888, 18444, 18303, 18039, 17837, 17472, 17205, 15723, 15669, 15632, 15223, 14951, 14785, 14780, 14037, 13701, 13114]


In [15]:
def evaluation(model, test_data):
    # evaluate the model on the test data
    right_predictions = 0
    for sequence in test_data.iter_rows():
        recommendations = model.recommend(sequence[2])
        label = sequence[1]
        if label in recommendations:
            right_predictions += 1
        
    print(f"Accuracy: {right_predictions / len(test_data) * 100} %")

evaluation(bs_model, test_data)

Accuracy: 0.0014291299558909247 %
