In [4]:
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'

In [3]:
import os
from ast import literal_eval
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_recommenders as tfrs

In [3]:
# Test mlflow

import mlflow

mlflow.set_tracking_uri("http://ec2-13-61-4-77.eu-north-1.compute.amazonaws.com:5000/")

with mlflow.start_run():
    mlflow.log_param("param1", 15)
    mlflow.log_metric("metric1", 0.89)

üèÉ View run thoughtful-fowl-656 at: http://ec2-13-61-4-77.eu-north-1.compute.amazonaws.com:5000/#/experiments/0/runs/8524b3e063b14341b535c791acfc08ff
üß™ View experiment at: http://ec2-13-61-4-77.eu-north-1.compute.amazonaws.com:5000/#/experiments/0


In [5]:
books = pd.read_csv(
    "https://raw.githubusercontent.com/malcolmosh/goodbooks-10k/master/books_enriched.csv",
    index_col=[0],
    converters={"genres": literal_eval, "authors": literal_eval},
)

ratings = pd.read_csv(
    "https://raw.githubusercontent.com/malcolmosh/goodbooks-10k/master/ratings.csv")


In [6]:
print("number of users: ", ratings["user_id"].nunique())

number of users:  53424


In [None]:
books.columns

Index(['index', 'authors', 'average_rating', 'best_book_id', 'book_id',
       'books_count', 'description', 'genres', 'goodreads_book_id',
       'image_url', 'isbn', 'isbn13', 'language_code',
       'original_publication_year', 'original_title', 'pages', 'publishDate',
       'ratings_1', 'ratings_2', 'ratings_3', 'ratings_4', 'ratings_5',
       'ratings_count', 'small_image_url', 'title', 'work_id',
       'work_ratings_count', 'work_text_reviews_count', 'authors_2'],
      dtype='object')

In [7]:
books = books[
    ["book_id", "title", "description", "authors", "genres", "average_rating", "ratings_count"]
    ].copy()

In [None]:
books.head()

Unnamed: 0,book_id,title,description,authors,genres,average_rating,ratings_count
0,1,"The Hunger Games (The Hunger Games, #1)",WINNING MEANS FAME AND FORTUNE.LOSING MEANS CE...,[Suzanne Collins],"[young-adult, fiction, fantasy, science-fictio...",4.34,4780653
1,2,Harry Potter and the Sorcerer's Stone (Harry P...,Harry Potter's life is miserable. His parents ...,"[J.K. Rowling, Mary GrandPr√©]","[fantasy, fiction, young-adult, classics]",4.44,4602479
2,3,"Twilight (Twilight, #1)",About three things I was absolutely positive.\...,[Stephenie Meyer],"[young-adult, fantasy, romance, fiction, paran...",3.57,3866839
3,4,To Kill a Mockingbird,The unforgettable novel of a childhood in a sl...,[Harper Lee],"[classics, fiction, historical-fiction, young-...",4.25,3198671
4,5,The Great Gatsby,Alternate Cover Edition ISBN: 0743273567 (ISBN...,[F. Scott Fitzgerald],"[classics, fiction, historical-fiction, romance]",3.89,2683664


In [None]:
ratings.head()

Unnamed: 0,user_id,book_id,rating
0,1,258,5
1,2,4081,4
2,2,260,5
3,2,9296,5
4,2,2318,3


In [4]:
ratings_tf = tf.data.Dataset.from_tensor_slices(ratings.to_dict("list"))
for x in ratings_tf.take(3):
    print(x)

{'user_id': <tf.Tensor: shape=(), dtype=int32, numpy=1>, 'book_id': <tf.Tensor: shape=(), dtype=int32, numpy=258>, 'rating': <tf.Tensor: shape=(), dtype=int32, numpy=5>}
{'user_id': <tf.Tensor: shape=(), dtype=int32, numpy=2>, 'book_id': <tf.Tensor: shape=(), dtype=int32, numpy=4081>, 'rating': <tf.Tensor: shape=(), dtype=int32, numpy=4>}
{'user_id': <tf.Tensor: shape=(), dtype=int32, numpy=2>, 'book_id': <tf.Tensor: shape=(), dtype=int32, numpy=260>, 'rating': <tf.Tensor: shape=(), dtype=int32, numpy=5>}


2025-09-08 13:57:36.854256: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


**Query Tower**

User Model representing User and his reading history

In [8]:

class UserTower(tf.keras.Model):
    def __init__(self, unique_book_ids, num_tokens, embedding_matrix, 
                 user_emb_dim=32, dropout_rate=0.2):
        super(UserTower, self).__init__()

        self.book_embedding = tf.keras.Sequential([
            tf.keras.layers.IntegerLookup(
                vocabulary=unique_book_ids, 
                oov_token=num_tokens - 1,
                mask_token=None
            ),
            tf.keras.layers.Embedding(
                input_dim=num_tokens,
                output_dim=embedding_matrix.shape[1],  # 1536 for OpenAI embeddings
                embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix),
                trainable=False,  # freeze OpenAI vectors
            )
        ])

        self.attention = tf.keras.layers.MultiHeadAttention(
            num_heads=8,
            key_dim=embedding_matrix.shape[1] + 1,  # 1536 embedding + 1 rating
        )
        self.pooling = tf.keras.layers.GlobalAveragePooling1D()

        self.final_layers = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(user_emb_dim)
        ])


    def call(self, inputs):
        user_history = inputs["user_history"]
        history_ratings = inputs["history_ratings"]

        history_emb = self.book_embedding(user_history)  # (batch, seq_len, 1536)

        mean_rating = tf.reduce_mean(history_ratings, axis=1, keepdims=True)  # (batch, 1)
        norm_ratings = history_ratings - mean_rating  # (batch, seq_len)
        ratings_expanded = tf.expand_dims(norm_ratings, -1)  # (batch, seq_len, 1)
        concat_input = tf.concat([history_emb, ratings_expanded], axis=-1)  # (batch, seq_len, 1537)

        attn_out = self.attention(concat_input, concat_input)

        pooled = self.pooling(attn_out)

        user_emb = self.final_layers(pooled)

        return user_emb


**Testing user tower**

In [12]:
if __name__ == "__main__":
    import numpy as np
    import tensorflow as tf

    data = {
        "user_history": [1, 2, 3],
        "history_ratings": [4, 5, 3],
    }
    embedding_matrix = np.load("../src/data/embeddings_matrix.npy")
    num_books, embedding_dim = embedding_matrix.shape
    unique_book_ids = np.arange(num_books - 1)


    user_tower = UserTower(
        unique_book_ids=unique_book_ids,
        num_tokens=num_books,
        embedding_matrix=embedding_matrix,
    )

    example = {
        "user_history": tf.constant([data["user_history"]], dtype=tf.int32),
        "history_ratings": tf.constant([data["history_ratings"]], dtype=tf.float32),
    }

    user_embeddings = user_tower(example)

    print("\nUser embeddings shape:", user_embeddings.shape)
    print("User embeddings:", user_embeddings.numpy())



User embeddings shape: (1, 32)
User embeddings: [[-9.9997129e-04 -1.7348172e-04  3.2667897e-04 -2.4285991e-04
  -4.4389372e-04  7.6807063e-04  1.0735486e-03  1.1163257e-04
  -9.2693226e-04 -1.8707567e-04  5.2680302e-04  4.9088581e-04
  -3.3586338e-04  1.5380569e-03 -2.1850027e-04  1.9180549e-03
   1.6392160e-03 -1.0483534e-03  2.8725556e-04  1.5963594e-04
  -7.3438301e-04  7.1609538e-05 -7.0786203e-04  7.1581890e-05
  -1.0429180e-03  7.2478200e-04  3.6984694e-04 -5.5364775e-04
  -6.6615554e-05 -1.5791173e-03 -1.5890929e-04  1.2531534e-03]]


**Candidate Tower**

Book Model representing books and its features

In [None]:
class ItemTower(tf.keras.Model):
    def __init__(self, unique_book_ids, num_tokens, embedding_matrix, user_emb_dim=32, dropout_rate=0.2):
        super().__init__()
        self.vocabulary = unique_book_ids
        self.embedding_matrix = embedding_matrix

        self.book_embedding = tf.keras.Sequential([
            tf.keras.layers.IntegerLookup(
                vocabulary=unique_book_ids, 
                oov_token=num_books - 1,
                mask_token=None
            ),
            tf.keras.layers.Embedding(
                input_dim=num_tokens,
                output_dim=embedding_matrix.shape[1],  
                embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix),
                trainable=False,  
            )
        ])
                
        self.final_layers = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(user_emb_dim)
        ])

    def call(self, inputs):
        item_embedding = self.book_embedding(inputs["book_id"])
        rating = tf.expand_dims(inputs["avg_rating"], -1) 
        concat_input = tf.concat([item_embedding, rating], axis=-1)
        item_emb = self.final_layers(concat_input)
        return item_emb

**Testing candidate tower**

In [None]:
if __name__ == "__main__":
    data = {
        "book_id": [1],
        "avg_rating": [4],
    }
    embedding_matrix = np.load("../src/data/embeddings_matrix.npy")
    num_books, embedding_dim = embedding_matrix.shape
    unique_book_ids = np.arange(num_books - 1)

    book_tower = ItemTower(
        unique_book_ids=unique_book_ids,
        num_tokens=num_books,
        embedding_matrix=embedding_matrix,
    )

    example = {
        "book_id": tf.constant(data["book_id"], dtype=tf.int32), 
        "avg_rating": tf.constant(data["avg_rating"], dtype=tf.float32),
    }

    book_embeddings = book_tower(example)

    print("\Book embeddings shape:", book_embeddings.shape)
    print("Book embeddings:", book_embeddings.numpy())


\Book embeddings shape: (1, 32)
Book embeddings: [[ 0.09775109 -0.07769852  0.05263288 -0.03646014 -0.05195387  0.01323555
   0.11356245 -0.00553065 -0.06013871 -0.12366802 -0.13854872 -0.00403975
   0.02192115  0.24883085 -0.12797149  0.09379776  0.03131978 -0.03375912
  -0.01381343  0.14757746 -0.02357512  0.05439571 -0.1077126  -0.0903956
   0.11584894 -0.01130453 -0.06477856 -0.09171694  0.10833809 -0.02485837
   0.09170313 -0.1164398 ]]


In [None]:
id_to_title = dict(zip(books['book_id'], books['title']))

# Choose a query book
query_id = 5
query_title = id_to_title[query_id]
query_emb = embedding_matrix[query_id]

# Normalize embeddings
emb_norm = embedding_matrix / np.linalg.norm(embedding_matrix, axis=1, keepdims=True)
query_emb_norm = query_emb / np.linalg.norm(query_emb)

# Compute cosine similarity
cos_sim = np.dot(emb_norm, query_emb_norm)

# Get top-k similar books (excluding the query itself)
top_k = 5
top_k_idx = np.argsort(-cos_sim)
top_k_idx = top_k_idx[top_k_idx != query_id][:top_k]

# Print results
print(f"Query book: {query_title}\n")
print("Top similar books:")
for idx in top_k_idx:
    print(f"- {id_to_title[idx]} (cosine similarity: {cos_sim[idx]:.3f})")


Query book: The Great Gatsby

Top similar books:
- Babbitt (cosine similarity: 0.652)
- Jaws (cosine similarity: 0.635)
- David and Goliath: Underdogs, Misfits, and the Art of Battling Giants (cosine similarity: 0.548)
- The House of Thunder (cosine similarity: 0.543)
- Gone with the Wind (cosine similarity: 0.531)


**Two Tower Model**

In [28]:
class BookRetrievalModel(tfrs.models.Model):
    def __init__(self, user_tower, item_tower, candidates_ds):
        super().__init__()
        self.user_tower = user_tower
        self.item_tower = item_tower

        self.task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
        candidates=candidates_ds.map(item_tower)
    )
)
    def call(self, features):
        user_embeddings = self.user_tower({
            "user_history": features["user_history"],
            "history_ratings": features["history_ratings"]
        })

        book_embeddings = self.item_tower({
            "book_id": features["book_id"],
            "avg_rating": features["avg_rating"]
        })

        return user_embeddings, book_embeddings


    def compute_loss(self, features, training=False):
        user_embeddings, book_embeddings = self(features)
        return self.task(user_embeddings, book_embeddings)


Training

In [11]:
ratings = ratings.merge(
    books[["book_id", "average_rating"]],
    on="book_id",
    how="left"
)

ratings_tf = tf.data.Dataset.from_tensor_slices({
    "user_id": ratings["user_id"].values.astype("int32"),
    "book_id": ratings["book_id"].values.astype("int32"),
    "rating":  ratings["rating"].values.astype("float32"),
    "avg_rating": ratings["average_rating"].values.astype("float32"),
})

users = ratings["user_id"].unique()
train_cut = int(0.8 * len(users))
train_users = tf.constant(users[:train_cut], dtype=tf.int32)
val_users   = tf.constant(users[train_cut:], dtype=tf.int32)

train_filter = lambda x: tf.reduce_any(tf.equal(x["user_id"], train_users))
val_filter   = lambda x: tf.reduce_any(tf.equal(x["user_id"], val_users))

book_avg = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(
        keys=books["book_id"].values.astype("int32"),
        values=books["average_rating"].values.astype("float32"),
    ),
    default_value=0.0,
)

def group_by_user(ds):
    return ds.apply(
        tf.data.experimental.group_by_window(
            key_func=lambda x: tf.cast(x["user_id"], tf.int64),
            reduce_func=lambda _, ds: ds.batch(1000), 
            window_size=1000,
        )
    )

def make_example(batch):
    book_ids = batch["book_id"]
    ratings = batch["rating"]

    n = tf.shape(book_ids)[0]

    def make_valid():
        history = {
            "user_history": book_ids[:-1],
            "history_ratings": ratings[:-1],
            "book_id": book_ids[-1],
            "avg_rating": batch["avg_rating"][-1],
        }
        return tf.data.Dataset.from_tensors(history)

    def too_short():
        empty = {
            "user_history": tf.constant([], dtype=tf.int32),
            "history_ratings": tf.constant([], dtype=tf.float32),
            "book_id": tf.constant(0, dtype=tf.int32),
            "avg_rating": tf.constant(0.0, dtype=tf.float32),
        }
        return tf.data.Dataset.from_tensors(empty).take(0)

    return tf.cond(n < 2, too_short, make_valid)


train_ds = (
    group_by_user(ratings_tf.filter(train_filter))
    .flat_map(make_example)
    .padded_batch(
        128,
        padded_shapes={
            "user_history": [None],
            "history_ratings": [None],
            "book_id": [],
            "avg_rating": [],
        },
    )
)

val_ds = (
    group_by_user(ratings_tf.filter(val_filter))
    .flat_map(make_example)
    .padded_batch(
        128,
        padded_shapes={
            "user_history": [None],
            "history_ratings": [None],
            "book_id": [],
            "avg_rating": [],
        },
    )
)

candidates_ds = tf.data.Dataset.from_tensor_slices({
    "book_id": books["book_id"].values.astype("int32"),
    "avg_rating": books["average_rating"].values.astype("float32"),
}).batch(128)

for batch in train_ds.take(1):
    print({k: v.shape for k, v in batch.items()})


Instructions for updating:
Use `tf.data.Dataset.group_by_window(...)`.
{'user_history': TensorShape([128, 175]), 'history_ratings': TensorShape([128, 175]), 'book_id': TensorShape([128]), 'avg_rating': TensorShape([128])}


2025-09-08 21:06:32.345613: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
for batch in train_ds.take(1):
    print("Batch keys:", batch.keys())  

    print("\nUser history (first example):")
    print(batch['user_history'][0].numpy()) 
    print("Shape:", batch['user_history'][0].shape)

    print("\nHistory ratings (first example):")
    print(batch['history_ratings'][0].numpy())
    print("Shape:", batch['history_ratings'][0].shape)

    print("\nTarget book ID (first example):")
    print(batch['book_id'][0].numpy())

    print("\nTarget book avg rating (first example):")
    print(batch['avg_rating'][0].numpy())


Batch keys: dict_keys(['user_history', 'history_ratings', 'book_id', 'avg_rating'])

User history (first example):
[ 258  268 5556 3638 1796  867   47 2738 4691  238 2063  916 4614  111
   11 1644 3889  136 6665  150   35   33   60  148   10   94    4  492
   57 1521   70   42  103   36  138  119   32   13   66 3406 2002   43
  287 1041   45   38   67   46   22  115   31   16  132   40  407  256
  273  378  329   98  216 1176  140  869 2679 1310  414   54   85  219
  177  109  131  102   95  225   76  100  171  179  255  485  325  498
  323  162   72  233  496  306  354 1030 1055 2770  198 1761 1942  128
   81 5191 1187 2535 3294 4893 1180 6285 2133 1011  262  437  421  143
  142  642  901  212    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0   

In [12]:
import mlflow
mlflow.set_tracking_uri("http://ec2-13-61-4-77.eu-north-1.compute.amazonaws.com:5000/")

In [13]:
mlflow.set_experiment("TTM Baseline")

<Experiment: artifact_location='s3://mlflow-artifacts-123456789012-us-east-1/1', creation_time=1757249478539, experiment_id='1', last_update_time=1757249478539, lifecycle_stage='active', name='TTM Baseline', tags={}>

In [16]:
!pip install boto3



In [17]:
!pip install awscli



In [18]:
import dotenv
dotenv.load_dotenv("../.env")

True

In [14]:
!aws configure

AWS Access Key ID [None]: ^C


In [None]:
import numpy as np
import mlflow
import mlflow.tensorflow
import tensorflow as tf
import matplotlib.pyplot as plt

embedding_matrix = np.load("../src/data/embeddings_matrix.npy")
num_books, embedding_dim = embedding_matrix.shape
unique_book_ids = np.arange(num_books - 1)

user_tower = UserTower(
    unique_book_ids=unique_book_ids,
    num_tokens=num_books,
    embedding_matrix=embedding_matrix,
)
item_tower = ItemTower(
    unique_book_ids=unique_book_ids,
    num_tokens=num_books,
    embedding_matrix=embedding_matrix,
)
min_rating = 1.0
max_rating = 5.0

def normalize_rating(batch):
    batch["avg_rating"] = (batch["avg_rating"] - min_rating) / (max_rating - min_rating)
    return batch

# Apply to the dataset
train_ds = train_ds.map(normalize_rating)
val_ds = val_ds.map(normalize_rating)
candidates_ds = candidates_ds.map(normalize_rating)

model = BookRetrievalModel(user_tower, item_tower, candidates_ds)

model.compile(
    optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1),
    loss="sparse_categorical_crossentropy",
    metrics=[
        tf.keras.metrics.Precision(name="precision"),
        tf.keras.metrics.Recall(name="recall"),
        tf.keras.metrics.Accuracy(name="accuracy"),
    ],
)

mlflow_callback = tf.keras.callbacks.LambdaCallback(
    on_epoch_end=lambda epoch, logs: [
        mlflow.log_metric(name, value, step=epoch) for name, value in logs.items()
    ]
)

def compute_recall_at_k(model, test_users, true_items, k=10):
    user_embs = model.user_model(test_users)        # shape: (n_users, emb_dim)
    item_embs = model.item_model(np.arange(num_books))  # shape: (n_items, emb_dim)

    scores = tf.linalg.matmul(user_embs, item_embs, transpose_b=True)  # similarity
    top_k = tf.math.top_k(scores, k=k).indices.numpy()                 # top-k item IDs

    hits = 0
    for i, true_item in enumerate(true_items):
        if true_item in top_k[i]:
            hits += 1
    return hits / len(test_users)


with mlflow.start_run(run_name="two_tower_model_experiment"):

    mlflow.log_param("optimizer", "Adagrad")
    mlflow.log_param("learning_rate", 0.1)
    mlflow.log_param("epochs", 5)
    mlflow.log_param("loss", "sparse_categorical_crossentropy")
    mlflow.log_param("embedding_dim", embedding_dim)
    mlflow.log_param("num_books", num_books)

    model.fit(train_ds, epochs=5, callbacks=[mlflow_callback], validation_data=val_ds)

    mlflow.tensorflow.log_model(model, artifact_path="two_tower_model")

    
    test_users = []
    test_true_items = []

    for user, true_item in val_ds.take(-1):
        test_users.append(user.numpy())
        test_true_items.append(true_item.numpy())

    test_users = np.array(test_users)
    test_true_items = np.array(test_true_items)

    ks = [5, 10, 20]
    recalls = []
    for k in ks:
        recall = compute_recall_at_k(model, test_users, test_true_items, k)
        recalls.append(recall)
        mlflow.log_metric(f"recall@{k}", recall)

    plt.figure()
    plt.plot(ks, recalls, marker="o")
    plt.title("Recall@K")
    plt.xlabel("K")
    plt.ylabel("Recall")
    plt.savefig("recall_at_k.png")
    plt.close()
    mlflow.log_artifact("recall_at_k.png")

    print("MLflow run completed. Run ID:", mlflow.active_run().info.run_id)


1. The `call()` method of your layer may be crashing. Try to `__call__()` the layer eagerly on some test input first to see if it works. E.g. `x = np.random.random((3, 4)); y = layer(x)`
2. If the `call()` method is correct, then you may need to implement the `def build(self, input_shape)` method on your layer. It should create all variables used by the layer (e.g. by calling `layer.build()` on all its children layers).
Exception encountered: ''module 'keras' has no attribute 'KerasTensor'''


AttributeError: Exception encountered when calling ItemTower.call().

[1mmodule 'keras' has no attribute 'KerasTensor'[0m

Arguments received by ItemTower.call():
  ‚Ä¢ inputs={'book_id': 'tf.Tensor(shape=(None,), dtype=int32)', 'avg_rating': 'tf.Tensor(shape=(None,), dtype=float32)'}

Inference