# Train and Validate a Collaborative Filtering Rec Sys on the Books Reviews Dataset


In [1]:
from datetime import datetime
!pip install -q tensorflow-recommenders
!pip install -q plotnine

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/96.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Google Colab Specific
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'

In [3]:
import io
import datetime
import json

from typing import List, Union, Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs

import plotnine
import gdown

In [4]:
# Google Colab Specific
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Import Books Metadata
book_meta_id = "1mUUnGp-kgcYeZVa9jxK0RmVItbBdIgUZ"
book_meta_url = f"https://drive.google.com/uc?id=1mUUnGp-kgcYeZVa9jxK0RmVItbBdIgUZ"
book_meta_output = "books_data.csv"
gdown.download(book_meta_url, book_meta_output, quiet=False)

books_df = pd.read_csv(book_meta_output)
books_df.head()

Downloading...
From (original): https://drive.google.com/uc?id=1mUUnGp-kgcYeZVa9jxK0RmVItbBdIgUZ
From (redirected): https://drive.google.com/uc?id=1mUUnGp-kgcYeZVa9jxK0RmVItbBdIgUZ&confirm=t&uuid=afec238b-022b-40b0-9843-73642d8533ed
To: /content/books_data.csv
100%|██████████| 181M/181M [00:04<00:00, 36.5MB/s]


Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount
0,Its Only Art If Its Well Hung!,,['Julie Strain'],http://books.google.com/books/content?id=DykPA...,http://books.google.nl/books?id=DykPAAAACAAJ&d...,,1996,http://books.google.nl/books?id=DykPAAAACAAJ&d...,['Comics & Graphic Novels'],
1,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],
2,Wonderful Worship in Smaller Churches,This resource includes twelve principles in un...,['David R. Ray'],http://books.google.com/books/content?id=2tsDA...,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,,2000,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,['Religion'],
3,Whispers of the Wicked Saints,Julia Thomas finds her life spinning out of co...,['Veronica Haddon'],http://books.google.com/books/content?id=aRSIg...,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,iUniverse,2005-02,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,['Fiction'],
4,"Nation Dance: Religion, Identity and Cultural ...",,['Edward Long'],,http://books.google.nl/books?id=399SPgAACAAJ&d...,,2003-03-01,http://books.google.nl/books?id=399SPgAACAAJ&d...,,


In [6]:
books_df.shape

(212404, 10)

In [7]:
# Import Books Reviews
book_rev_id = "1vaUum8JMyzsub74ih1rqJVaVidV0pXLP"
book_rev_url = f"https://drive.google.com/uc?id=1vaUum8JMyzsub74ih1rqJVaVidV0pXLP"
book_rev_output = "books_rating.csv"
gdown.download(book_rev_url, book_rev_output, quiet=False)

ratings_df = pd.read_csv(book_rev_output)
ratings_df.head()

Downloading...
From (original): https://drive.google.com/uc?id=1vaUum8JMyzsub74ih1rqJVaVidV0pXLP
From (redirected): https://drive.google.com/uc?id=1vaUum8JMyzsub74ih1rqJVaVidV0pXLP&confirm=t&uuid=5c72512a-7113-4cc1-8ee6-6e4feec8ccb8
To: /content/books_rating.csv
100%|██████████| 2.86G/2.86G [00:59<00:00, 47.7MB/s]


Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text
0,1882931173,Its Only Art If Its Well Hung!,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",7/7,4.0,940636800,Nice collection of Julie Strain images,This is only for Julie Strain fans. It's a col...
1,826414346,Dr. Seuss: American Icon,,A30TK6U7DNS82R,Kevin Killian,10/10,5.0,1095724800,Really Enjoyed It,I don't care much for Dr. Seuss but after read...
2,826414346,Dr. Seuss: American Icon,,A3UH4UZ4RSVO82,John Granger,10/11,5.0,1078790400,Essential for every personal and Public Library,"If people become the books they read and if ""t..."
3,826414346,Dr. Seuss: American Icon,,A2MVUWT453QH61,"Roy E. Perry ""amateur philosopher""",7/7,4.0,1090713600,Phlip Nel gives silly Seuss a serious treatment,"Theodore Seuss Geisel (1904-1991), aka &quot;D..."
4,826414346,Dr. Seuss: American Icon,,A22X4XUPKF66MR,"D. H. Richards ""ninthwavestore""",3/3,4.0,1107993600,Good academic overview,Philip Nel - Dr. Seuss: American IconThis is b...


In [8]:
ratings_df.shape

(3000000, 10)

## Data Cleansing

In [9]:
books_df['Title'].nunique()

212403

In [10]:
books_df.isnull().sum()

Unnamed: 0,0
Title,1
description,68442
authors,31413
image,52075
previewLink,23836
publisher,75886
publishedDate,25305
infoLink,23836
categories,41199
ratingsCount,162652


In [11]:
books_df = books_df.drop_duplicates(subset=['Title']).reset_index(drop=True)
books_df.dropna(subset=['Title'], inplace=True)
books_df.shape

(212403, 10)

In [12]:
ratings_df['Id'].nunique()

221998

In [13]:
ratings_df.isnull().sum()

Unnamed: 0,0
Id,0
Title,208
Price,2518829
User_id,561787
profileName,561905
review/helpfulness,0
review/score,0
review/time,0
review/summary,407
review/text,8


In [14]:
ratings_df = ratings_df.dropna(subset=['Title', 'User_id']).reset_index(drop=True)
ratings_df.shape

(2438018, 10)

In [15]:
ratings_df['review_date'] = pd.to_datetime(ratings_df['review/time'], unit='s')
ratings_df = ratings_df.drop(columns=['review/time'])
ratings_df.head()

Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/summary,review/text,review_date
0,1882931173,Its Only Art If Its Well Hung!,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",7/7,4.0,Nice collection of Julie Strain images,This is only for Julie Strain fans. It's a col...,1999-10-23
1,826414346,Dr. Seuss: American Icon,,A30TK6U7DNS82R,Kevin Killian,10/10,5.0,Really Enjoyed It,I don't care much for Dr. Seuss but after read...,2004-09-21
2,826414346,Dr. Seuss: American Icon,,A3UH4UZ4RSVO82,John Granger,10/11,5.0,Essential for every personal and Public Library,"If people become the books they read and if ""t...",2004-03-09
3,826414346,Dr. Seuss: American Icon,,A2MVUWT453QH61,"Roy E. Perry ""amateur philosopher""",7/7,4.0,Phlip Nel gives silly Seuss a serious treatment,"Theodore Seuss Geisel (1904-1991), aka &quot;D...",2004-07-25
4,826414346,Dr. Seuss: American Icon,,A22X4XUPKF66MR,"D. H. Richards ""ninthwavestore""",3/3,4.0,Good academic overview,Philip Nel - Dr. Seuss: American IconThis is b...,2005-02-10


In [16]:
# Convert all columns titles to lower case
books_df.columns = books_df.columns.str.lower()
print(f'Books DF columns: {books_df.columns}')

ratings_df.columns = ratings_df.columns.str.lower()
print(f'Ratings DF columns: {ratings_df.columns}')

Books DF columns: Index(['title', 'description', 'authors', 'image', 'previewlink', 'publisher',
       'publisheddate', 'infolink', 'categories', 'ratingscount'],
      dtype='object')
Ratings DF columns: Index(['id', 'title', 'price', 'user_id', 'profilename', 'review/helpfulness',
       'review/score', 'review/summary', 'review/text', 'review_date'],
      dtype='object')


In [17]:
# Format column title
ratings_df.columns = ratings_df.columns.str.replace('/', '_')
print(f'Ratings DF columns: {ratings_df.columns}')

Ratings DF columns: Index(['id', 'title', 'price', 'user_id', 'profilename', 'review_helpfulness',
       'review_score', 'review_summary', 'review_text', 'review_date'],
      dtype='object')


## Split Dataset for Validation based on Date

In [18]:
validation_start_date = (ratings_df['review_date'].max() - datetime.timedelta(days=365)).date()
validation_start_date = pd.Timestamp(validation_start_date)
validation_start_date

Timestamp('2012-03-04 00:00:00')

In [19]:
ratings_df['review_date'].max()

Timestamp('2013-03-04 00:00:00')

In [20]:
#### RAM Killer ####
# Define file paths (on Google Drive or local path)
train_file_path = "/content/drive/MyDrive/Capstone - Spring 2025 Personal/modeling/train_df.csv"
test_file_path = "/content/drive/MyDrive/Capstone - Spring 2025 Personal/modeling/test_df.csv"

# Check if the train/test files exist
if not os.path.exists(train_file_path) or not os.path.exists(test_file_path):
    print("Train/test split files do not exist. Creating them now...")

    # Sort the dataframe by user_id and timestamp
    ratings_df = ratings_df.sort_values(by=['user_id', 'review_date'])

    # Create train/test splits using groupby and apply with progress bar
    train_df = ratings_df.groupby('user_id').progress_apply(lambda x: x.iloc[:-1]).reset_index(drop=True)
    test_df = ratings_df.groupby('user_id').progress_apply(lambda x: x.iloc[-1:]).reset_index(drop=True)

    # Save the splits to CSV files on Google Drive
    train_df.to_csv(train_file_path, index=False)
    test_df.to_csv(test_file_path, index=False)

    print(f"Training set saved at: {train_file_path}")
    print(f"Test set saved at: {test_file_path}")
else:
    print(f"Train/test split files already exist. Loading them...")

    # Load the saved train/test splits from CSV files
    train_df = pd.read_csv(train_file_path)
    test_df = pd.read_csv(test_file_path)

# Check the sizes of the datasets
print(f"Training set: {train_df.shape}")
print(f"Test set: {test_df.shape}")

Train/test split files already exist. Loading them...
Training set: (1429057, 10)
Test set: (1008961, 10)


## Train on all data before Validation Date

In [22]:
# Convert datasets into tensor datasets
train_ds = tf.data.Dataset.from_tensor_slices(dict(train_df[['user_id', 'title', 'review_score']]))

for x in train_ds.take(5).as_numpy_iterator():
    print(x)

print('\n')

test_ds = tf.data.Dataset.from_tensor_slices(dict(test_df[['user_id', 'title', 'review_score']]))

for x in test_ds.take(5).as_numpy_iterator():
    print(x)

{'user_id': b'A0015610VMNR0JC9XVL1', 'title': b'The Richest Man in Babylon', 'review_score': 5.0}
{'user_id': b'A0015610VMNR0JC9XVL1', 'title': b'The richest man in Babylon (Babylonian parables dealing with the principles of finance, etc)', 'review_score': 5.0}
{'user_id': b'A0015610VMNR0JC9XVL1', 'title': b'The richest man in Babylon', 'review_score': 5.0}
{'user_id': b'A0015610VMNR0JC9XVL1', 'title': b'ATTITUDE 101', 'review_score': 3.0}
{'user_id': b'A0015610VMNR0JC9XVL1', 'title': b'The richest man in Babylon', 'review_score': 5.0}


{'user_id': b'A00109803PZJ91RLT7DPN', 'title': b"This Calder Range (Calder Saga's)", 'review_score': 5.0}
{'user_id': b'A00117421L76WVWG4UX95', 'title': b'The Queen of Harlem: A Novel', 'review_score': 5.0}
{'user_id': b'A0015610VMNR0JC9XVL1', 'title': b'The richest man in Babylon', 'review_score': 5.0}
{'user_id': b'A002258237PFYJV336T05', 'title': b'Swan Place', 'review_score': 5.0}
{'user_id': b'A00264602WCXBHHFPLTQ4', 'title': b'The Berenstain Bear

In [23]:
# Create Feature Vocabularies
unique_user_ids = train_df['user_id'].unique()
unique_titles = train_df['title'].unique()
unique_review_scores = train_df['review_score'].unique()

# Candidates for retrieval Task
candidate_ds = tf.data.Dataset.from_tensor_slices(dict(
    train_df[['title']].drop_duplicates()
))

for x in candidate_ds.take(5).as_numpy_iterator():
    print(x)

{'title': b'The Richest Man in Babylon'}
{'title': b'The richest man in Babylon (Babylonian parables dealing with the principles of finance, etc)'}
{'title': b'The richest man in Babylon'}
{'title': b'ATTITUDE 101'}
{'title': b'How to Win Friends & Influence People (Cardinal Editions, C 303)'}


In [24]:
# Cache train dataset & Candidate dataset
train_size = train_df.shape[0]
cached_train = train_ds.shuffle(train_size).batch(4096).cache()

## Build Two Tower Models

In [25]:
# User/Query Model
class UserModel(tf.keras.Model):
    '''
    The user(query) tower
    '''

    def __init__(self,
                 unique_user_ids: np.ndarray,
                 feature_user_id_name: str,
                 embedding_dimensions: int):
        '''
        Params
        :param unique_user_ids: array of unique user ids
        :param feature_user_id_name: name of the feature
        :param embedding_dimension: number of dimensions in embedding layer
        '''
        super().__init__()
        self.feature_user_id_name = feature_user_id_name

        self.user_embedding_layers = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=unique_user_ids,
                    mask_token=None,
                    name='user_id_vocab',
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(unique_user_ids) + 1,
                    output_dim=embedding_dimensions,
                    name='user_id_embedding',
                ),
            ],
        )

    def call(self, inputs: Dict[Text, tf.Tensor]) -> tf.Tensor:
        return self.user_embedding_layers(inputs[self.feature_user_id_name])

In [26]:
class BookModel(tf.keras.Model):
    '''
    The book(query) tower
    '''

    def __init__(self,
                 unique_titles: np.ndarray,
                 feature_book_title_name: str,
                 embedding_dimensions: int,
                 text_vectorization_max_tokens: int):
        '''
        Params
        :param unique_titles: array of unique titles
        :param unique_review_scores: array of unique review scores
        :param feature_book_title_name: name of the column title
        :param embedding_dimensions: number of dimensions in embedding layer
        :param text_vectorization_max_tokens: maximum number of tokens to vector
        '''
        super().__init__()
        self.feature_book_title_name = feature_book_title_name

        # Book Title embedding
        self.book_embedding_layers = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=unique_titles,
                    mask_token=None,
                    name='book_id_vocab',
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(unique_titles) + 1,
                    output_dim=embedding_dimensions,
                    name='book_id_embedding',
                ),
            ],
            name='book_id_embedding',
        )

    def call(self, inputs: Dict[Text, tf.Tensor]) -> tf.Tensor:
        return tf.concat([
            self.book_embedding_layers(inputs[self.feature_book_title_name]),
            # add more embedding layers as needed
        ], axis=1)

In [27]:
class BooksTwoTowersModel(tfrs.Model):
    '''
    Two-Towers books recommender model
    '''
    def __init__(self,
                 unique_user_ids: np.ndarray,
                 unique_titles: np.ndarray,
                 unique_review_scores: np.ndarray,
                 candidate_ds: tf.data.Dataset,
                 feature_user_id_name: str = 'user_id',
                 feature_book_title_name: str = 'title',
                 feature_review_score_name: str = 'review_score',
                 embedding_dimensions: int = 64):
        '''
        Instantiate query tower, candidate tower, and retrieval task.
        '''
        super().__init__()
        self.feature_user_id_name = feature_user_id_name
        self.feature_book_title_name = feature_book_title_name
        self.feature_review_score_name = feature_review_score_name

        # Query Tower
        self.user_model = UserModel(
            unique_user_ids=unique_user_ids,
            feature_user_id_name=feature_user_id_name,
            embedding_dimensions=embedding_dimensions,
        )

        # Candidate Tower
        text_vectorization_max_tokens = len(unique_titles) + len(unique_review_scores)

        book_model_raw = BookModel(
            unique_titles=unique_titles,
            feature_book_title_name=feature_book_title_name,
            embedding_dimensions=embedding_dimensions,
            text_vectorization_max_tokens=text_vectorization_max_tokens,
        )

        # Dense projection layer to equate final tower output dims
        self.book_model = tf.keras.Sequential(
            [
                book_model_raw,
                tf.keras.layers.Dense(
                    units=embedding_dimensions,
                    name='book_dense_projection',
                ),
            ],
            name='book_sequential',
        )

        # Retrieval Task
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=candidate_ds.batch(128).map(self.book_model),
                ks=(10, 20, 50)
            )
        )

    def compute_loss(self,
                     features: Dict[Text, tf.Tensor],
                     training=False) -> tf.Tensor:
        '''
        Get embeddings for users and books.
        Compute dot product and retrieve candidates.
        '''
        user_embeddings = self.user_model({
            self.feature_user_id_name: features[self.feature_user_id_name],
        })

        book_embeddings = self.book_model({
            self.feature_book_title_name: features[self.feature_book_title_name],
        })

        # Sample weight logic
        review_scores = tf.cast(features[self.feature_review_score_name], tf.float32)
        sample_weight = tf.where(review_scores >= 4, 1.0, 0.0)

        return self.task(user_embeddings, book_embeddings, compute_metrics=not training)

## Compile and Train Model

In [28]:
# Setup log dir for tensorboard
LOG_DIR = "/content/drive/MyDrive/Capstone - Spring 2025 Personal/modeling/logs"

if not os.path.exists(LOG_DIR):
    os.makedirs(LOG_DIR)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR)

In [29]:
# Compile Model
model = BooksTwoTowersModel(
    unique_user_ids=unique_user_ids,
    unique_titles=unique_titles,
    unique_review_scores=unique_review_scores,
    candidate_ds=candidate_ds,
    embedding_dimensions=64,
)

model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [30]:
# Train
model.fit(
    cached_train,
    epochs=10,
    callbacks=[tensorboard_callback],
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tf_keras.src.callbacks.History at 0x7f91feb1a2d0>

In [31]:
TRAIN_DATE = datetime.datetime.now().strftime('%Y-%m-%d_%H%M')
MODEL_PATH_BASE = f'/content/drive/MyDrive/Capstone - Spring 2025 Personal/modeling/models/train-date={TRAIN_DATE}'
MODEL_PATH_BASE

'/content/drive/MyDrive/Capstone - Spring 2025 Personal/modeling/models/train-date=2025-02-09_1519'

In [32]:
model.user_model.save(MODEL_PATH_BASE + '/user-tower', save_format='tf')
model.book_model.save(MODEL_PATH_BASE + '/book-tower', save_format='tf')



In [None]:
# Load existing model
MODEL_PATH_BASE = '/content/drive/MyDrive/Capstone - Spring 2025 Personal/modeling/models/train-date=2025-02-08_1519'

user_model = tf.keras.models.load_model(MODEL_PATH_BASE + '/user-tower')
article_model = tf.keras.models.load_model(MODEL_PATH_BASE + '/book-tower')

## Validate Recommendation over Test Period (last 1 year)

### Set up utility functions

In [36]:
test_ds = tf.data.Dataset.from_tensor_slices({
    "user_id": test_df["user_id"].values,  # User IDs
    "title": test_df["title"].values,      # Book Titles
    "review_score": test_df["review_score"].values,  # Review Scores
}).batch(256)

In [37]:
test_ds

<_BatchDataset element_spec={'user_id': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'title': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'review_score': TensorSpec(shape=(None,), dtype=tf.float64, name=None)}>

In [38]:
#### RAM Killer - takes about 1hr to run ####
# Evaluate Model and save results
metrics_file = "/content/drive/MyDrive/Capstone - Spring 2025 Personal/modeling/models/model_metrics.json"

# Check if the file exists
if os.path.exists(metrics_file):
    print("Metrics file already exists. Loading saved results...")

    # Load the saved results from the JSON file
    with open(metrics_file, "r") as file:
        metrics = json.load(file)
else:
    print("Metrics file not found. Running model evaluation...")

    # Run model evaluation
    metrics = model.evaluate(test_ds, return_dict=True)

    # Save the metrics as JSON
    with open(metrics_file, "w") as file:
        json.dump(metrics, file, indent=4)
    print("Metrics saved as JSON!")



In [39]:
# Print Test results for TwoTower model
print(f"Precision@10: {metrics['factorized_top_k/top_10_categorical_accuracy']}")
print(f"Precision@20: {metrics['factorized_top_k/top_20_categorical_accuracy']}")
print(f"Precision@50: {metrics['factorized_top_k/top_50_categorical_accuracy']}")

Precision@10: 0.1457519233226776
Precision@20: 0.15561552345752716
Precision@50: 0.16378730535507202


In [48]:
# Create function to generate random recs
def random_recs(users, candidates, k=10):
  '''
  Generate random recommendations for users

  Params
  :param users: list of users
  :param books: list of books
  :param k: number of recommendations to generate
  '''
  recommendations = []
  for user in users:
      random_book = np.random.choice(candidates, size=k, replace=False)
      recommendations.append({'user_id': user, 'title': list(random_book)})

  return pd.DataFrame(recommendations)

In [51]:
# Create function to calculate precision @ K
def precision_at_k(recommendations, test_df, k=10):
  '''
  Compute Precision@K for a given set of recommendations and test data.

  Params
  :param recommendations: DataFrame of recommendations
  :param test_df: DataFrame of test data
  :param k: Number of recommendations to consider
  '''

  def precision_at_k(recommendations, test_df, k):
    # Create a dictionary where the keys are user IDs and the values are the set of book titles
    test_dict = test_df.groupby('user_id')['title'].apply(set).to_dict()

    # Define a helper function to calculate precision for each user
    def calculate_precision(row):
        user_id = row['user_id']
        recommended_titles = set(row['title'][:k])  # Get the top-k recommended books
        actual_titles = test_dict.get(user_id, set())
        precision = len(recommended_titles & actual_titles) / k
        return precision

    # Apply the helper function to each row in the recommendations DataFrame
    precisions = recommendations.apply(calculate_precision, axis=1)

    # Return the average precision
    return precisions.mean()

In [None]:
# Convert candidate_ds into a list (or array) of books
candidate_books = [item['title'].numpy().decode() for item in candidate_ds]

# Define k_values for which you want to evaluate precision
k_values = [10, 20, 50]

# Create a dictionary to store precision results for each k
precisions_random = {}

# Loop through k_values, generate random recommendations, and calculate precision
for k in k_values:
    # Generate random recommendations for current k
    random_recs_df = random_recs(test_df['user_id'].unique(), candidate_books, k=k)

    # Calculate precision for the current k value
    precision = precision_at_k(random_recs_df, test_df, k)

    # Store precision result
    precisions_random[k] = precision

# Print the results for each k
for k, precision in precisions_random.items():
    print(f"Precision at {k}: {precision}")