Skip to content

Commit

Permalink
Merge pull request #723 from tandav/master
Browse files Browse the repository at this point in the history
more readable collect_set in _get_top_k_items
  • Loading branch information
miguelgfierro committed Apr 9, 2019
2 parents 1a3d846 + b8ea92c commit 8bc76fd
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions reco_utils/evaluation/spark_evaluation.py
Expand Up @@ -371,9 +371,8 @@ def _get_top_k_items(
row_number().over(window_spec).alias("rank")
)
.where(col("rank") <= k)
.withColumn(col_prediction, F.collect_list(col_item).over(Window.partitionBy(col_user)))
.select(col_user, col_prediction)
.dropDuplicates([col_user, col_prediction])
.groupby(col_user)
.agg(F.collect_list(col_item).alias(col_prediction))
)

return items_for_user
Expand Down

0 comments on commit 8bc76fd

Please sign in to comment.