In [1]:
import gzip
import json
import math
from collections import defaultdict
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Multiply
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder




In [2]:
path = 'goodreads_interactions_poetry.json.gz'

dataset = []

with gzip.open(path, 'rt', encoding='utf8') as file:
    # Process the file line by line
    for line in file:
        # Decode each line from JSON format
        json_content = json.loads(line)
        dataset.append(json_content)


In [29]:
usersPerItem = defaultdict(set)
itemsPerUser = defaultdict(set)
items = []
users = []
ratings = []

In [30]:
for d in dataset:
    user,item, rating = d['user_id'], d['book_id'], d['rating']
    usersPerItem[item].add(user)
    itemsPerUser[user].add(item)
    items.append(item)
    users.append(user)
    ratings.append(rating)

In [31]:
uniqueItems = list(set(items))
uniqueUsers = list(set(users))

In [34]:
# Create user and item indices
user_index = {user: i for i, user in enumerate(uniqueUsers)}
item_index = {item: i for i, item in enumerate(uniqueItems)}

# Convert reviews to numerical data
user_ids = [user_index[d['user_id']] for d in dataset]
item_ids = [item_index[d['book_id']] for d in dataset]

In [10]:
# ncf_model_with_gmf = tf.keras.models.load_model('ncf_with_gmf_model.h5')





In [19]:
def create_ncf_model_with_gmf(num_users, num_items, embedding_size=64, hidden_units=(64, 32)):
    user_input = Input(shape=(1,), name='user_input')
    item_input = Input(shape=(1,), name='item_input')

    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
    item_embedding = Embedding(input_dim=num_items, output_dim=embedding_size)(item_input)

    # GMF
    gmf_layer = Multiply()([user_embedding, item_embedding])

    user_flat = Flatten()(user_embedding)
    item_flat = Flatten()(item_embedding)
    gmf_flat = Flatten()(gmf_layer)

    # MLP
    mlp_output = Concatenate()([user_flat, item_flat])
    for units in hidden_units:
        mlp_output = Dense(units, activation='relu')(mlp_output)

    # CONCAT
    concat = Concatenate()([mlp_output, gmf_flat])

    output = Dense(1, activation='relu')(concat)

    model = Model(inputs=[user_input, item_input], outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

    return model



In [35]:
# TRAIN AND EVAL
user_ids = np.array(user_ids)
item_ids = np.array(item_ids)
ratings = np.array(ratings)

ncf_model_with_gmf = create_ncf_model_with_gmf(len(user_ids), len(item_ids))

In [38]:
ncf_model_with_gmf.fit([user_ids, item_ids], ratings, epochs=10, batch_size=000, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1f79e9b0210>

In [39]:
ncf_model_with_gmf.save("neumf.keras")

In [94]:
def display_recommendations(user, top_n=5):

    recs = []
    knownItems = itemsPerUser[user]
    print(knownItems)
    user_id = user_index[user]
    itemsToPredict = []
    usersToPredict = []

    for i in range(len(uniqueItems)):
        if uniqueItems[i] in knownItems:
            continue
        itemsToPredict.append(i)

    for i in itemsToPredict:
        usersToPredict.append(user_id)

    itemsToPredict = np.array(itemsToPredict)
    usersToPredict = np.array(usersToPredict)
    prediction = ncf_model_with_gmf.predict([usersToPredict, itemsToPredict])
    
    for i in range(len(prediction)):
        recs.append((uniqueItems[itemsToPredict[i]], prediction[i]))
    recs.sort(key=lambda x: x[1], reverse=True)

    print(f"Top {top_n} Recommendations for User {user}:")
    for i, (recommended_item, predicted_rating) in enumerate(recs[:top_n]):
        print(f"{i + 1}. Item: {recommended_item}, Predicted Rating: {predicted_rating}")

In [95]:
display_recommendations('8842281e1d1347389f2ab93d60773d4d')

{'1376', '30119', '1384'}
Top 5 Recommendations for User 8842281e1d1347389f2ab93d60773d4d:
1. Item: 10164837, Predicted Rating: [6.9882984]
2. Item: 17553758, Predicted Rating: [6.7774725]
3. Item: 12973539, Predicted Rating: [6.5570173]
4. Item: 352003, Predicted Rating: [6.543495]
5. Item: 13605561, Predicted Rating: [6.5263596]


In [96]:
print(usersPerItem['10164837'])

{'435291afef0d8ef5533c251f1dcef038', '446a78a97c434dca98e7ea176ee9fa1f'}
