In [1]:
import gzip
import json
import math
from collections import defaultdict
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Multiply
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder




In [2]:
path = 'goodreads_interactions_poetry.json.gz'

dataset = []

with gzip.open(path, 'rt', encoding='utf8') as file:
    # Process the file line by line
    for line in file:
        # Decode each line from JSON format
        json_content = json.loads(line)
        dataset.append(json_content)


In [97]:
path = 'goodreads_books_poetry.json.gz'

info = []

with gzip.open(path, 'rt', encoding='utf8') as file:
    # Process the file line by line
    for line in file:
        # Decode each line from JSON format
        json_content = json.loads(line)
        info.append(json_content)


In [100]:
bookInfo = {}

In [101]:
for i in info:
    book_id, title = i['book_id'], i['title']
    bookInfo[book_id] = title

In [125]:
usersPerItem = defaultdict(set)
itemsPerUser = defaultdict(set)
items = []
users = []
ratings = []
ratingDict = {}

In [127]:
for d in dataset:
    user,item, rating = d['user_id'], d['book_id'], d['rating']
    usersPerItem[item].add(user)
    itemsPerUser[user].add(item)
    ratingDict[(user, item)] = rating
    items.append(item)
    users.append(user)
    ratings.append(rating)

In [31]:
uniqueItems = list(set(items))
uniqueUsers = list(set(users))

In [34]:
# Create user and item indices
user_index = {user: i for i, user in enumerate(uniqueUsers)}
item_index = {item: i for i, item in enumerate(uniqueItems)}

# Convert reviews to numerical data
user_ids = [user_index[d['user_id']] for d in dataset]
item_ids = [item_index[d['book_id']] for d in dataset]

In [116]:
def create_ncf_model_with_gmf(num_users, num_items, embedding_size=64, hidden_units=(64, 32)):
    user_input = Input(shape=(1,), name='user_input')
    item_input = Input(shape=(1,), name='item_input')

    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
    item_embedding = Embedding(input_dim=num_items, output_dim=embedding_size)(item_input)

    # GMF
    gmf_layer = Multiply()([user_embedding, item_embedding])

    user_flat = Flatten()(user_embedding)
    item_flat = Flatten()(item_embedding)
    gmf_flat = Flatten()(gmf_layer)

    # MLP
    mlp_output = Concatenate()([user_flat, item_flat])
    for units in hidden_units:
        mlp_output = Dense(units, activation='relu')(mlp_output)

    # CONCAT
    concat = Concatenate()([mlp_output, gmf_flat])

    output = Dense(1, activation='relu')(concat)

    model = Model(inputs=[user_input, item_input], outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

    return model



In [117]:
# TRAIN AND EVAL
user_ids = np.array(user_ids)
item_ids = np.array(item_ids)
ratings = np.array(ratings)

ncf_model_with_gmf = create_ncf_model_with_gmf(len(user_ids), len(item_ids))

In [118]:
ncf_model_with_gmf.fit([user_ids, item_ids], ratings, epochs=5, batch_size=10000, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1f8db848b50>

In [119]:
ncf_model_with_gmf.save("leaky relu neumf.keras")

In [120]:
ncf_model_with_gmf = tf.keras.models.load_model('neumf.keras')

In [206]:
def cold_start(top_n = 10):
    recs = []

    itemsToPredict = []
    usersToPredict = []

    for i in range(len(uniqueItems)):
        itemsToPredict.append(i)

    for i in itemsToPredict:
        usersToPredict.append(500000)

    itemsToPredict = np.array(itemsToPredict)
    usersToPredict = np.array(usersToPredict)
    prediction = ncf_model_with_gmf.predict([usersToPredict, itemsToPredict])
    
    for i in range(len(prediction)):
        recs.append((uniqueItems[itemsToPredict[i]], prediction[i]))
    recs.sort(key=lambda x: x[1], reverse=True)

    print(f"Top {top_n} Recommendations for User")
    for i, (recommended_item, predicted_rating) in enumerate(recs[:top_n]):
        print(f"{i + 1}. Book: {bookInfo[recommended_item]}")

In [197]:
def display_recommendations(user, top_n=10):

    recs = []
    knownItems = itemsPerUser[user]
    
    user_id = user_index[user]
    itemsToPredict = []
    usersToPredict = []

    for i in range(len(uniqueItems)):
        if uniqueItems[i] not in knownItems:
            itemsToPredict.append(i)

    for i in itemsToPredict:
        usersToPredict.append(user_id)

    itemsToPredict = np.array(itemsToPredict)
    usersToPredict = np.array(usersToPredict)
    prediction = ncf_model_with_gmf.predict([usersToPredict, itemsToPredict])
    
    for i in range(len(prediction)):
        recs.append((uniqueItems[itemsToPredict[i]], prediction[i]))
    recs.sort(key=lambda x: x[1], reverse=True)

    print("User's has", len(knownItems),"ratings.")

    if (len(knownItems) > 5):
        print("Here are five of them:")
        sampleItems = list(knownItems)[0:5]
        for i in sampleItems:
            print(f"{bookInfo[i]}: {ratingDict[(user, i)]}")
    else:
        for i in knownItems:
            print(f"Book: {bookInfo[i]}, Rating: {ratingDict[(user, i)]}")

    print("\n")
    print(f"Top {top_n} Recommendations for User")
    for i, (recommended_item, predicted_rating) in enumerate(recs[:top_n]):
        print(f"{i + 1}. Book: {bookInfo[recommended_item]}")

In [208]:
display_recommendations('8842281e1d1347389f2ab93d60773d4d')

User's has 3 ratings.
Book: The Iliad, Rating: 4
Book: Where the Sidewalk Ends, Rating: 5
Book: The Odyssey, Rating: 4


Top 10 Recommendations for User
1. Book: The Ashgate Research Companion to Thomas Hardy
2. Book: Catmas Carols, revised edition
3. Book: Een vijver vol inkt
4. Book: The Short Fiction of Edgar Allan Poe
5. Book: Sonety / The Sonnets
6. Book: La Vida Es Sueño
7. Book: The Man From Snowy River
8. Book: My Planet of Kites
9. Book: No te des por vencido
10. Book: خذني إلى المسجد الأقصى


In [209]:
cold_start()

Top 10 Recommendations for User
1. Book: Stripped: A Collection of Inspired Writings for the Evolving Woman
2. Book: Haiku for the Single Girl
3. Book: The Canti: With a Selection of His Prose
4. Book: The Ashgate Research Companion to Thomas Hardy
5. Book: KarnaKavita
6. Book: Black Movie
7. Book: My Life By Water: Collected Poems, 1936 1968
8. Book: Money Poems
9. Book: Acorn
10. Book: The Poetical Works of Thomas Lovell Beddoes


In [210]:
display_recommendations('0d9674945bb29a45b5473e67c5b7208c')

User's has 20 ratings.
Here are five of them:
Sand and Foam: 5
The Book of Questions: 5
Odes to Common Things: 3
Leaves of Grass: 4
The Selected Poems: 5


Top 10 Recommendations for User
1. Book: The Ashgate Research Companion to Thomas Hardy
2. Book: The Collected Poetry, 1968-1998
3. Book: Orchards: A Sequence of French Poems
4. Book: المختار من شعر أبو القاسم الشابي
5. Book: Just Around the Corner: Poems
6. Book: Of Snails and Skylarks
7. Book: Poesia de Ricardo Reis Obra Essencial de Fernando Pessoa, #5)
8. Book: La Légende des Siècles
9. Book: El hacedor
10. Book: ملحمة كلكامش


In [140]:
leaky_rl = tf.keras.models.load_model('leaky relu neumf.keras')

In [143]:
def leaky_recommendations(user, top_n=5):

    recs = []
    knownItems = itemsPerUser[user]
    
    user_id = user_index[user]
    itemsToPredict = []
    usersToPredict = []

    for i in range(len(uniqueItems)):
        if uniqueItems[i] in knownItems:
            continue
        itemsToPredict.append(i)

    for i in itemsToPredict:
        usersToPredict.append(user_id)

    itemsToPredict = np.array(itemsToPredict)
    usersToPredict = np.array(usersToPredict)
    prediction = leaky_rl.predict([usersToPredict, itemsToPredict])
    
    for i in range(len(prediction)):
        recs.append((uniqueItems[itemsToPredict[i]], prediction[i]))
    recs.sort(key=lambda x: x[1], reverse=True)

    print("User's current ratings:")

    for i in knownItems:
        print(f"Book: {bookInfo[i]}, Rating: {ratingDict[(user, i)]}")
        
    print(f"Top {top_n} Recommendations for User")
    for i, (recommended_item, predicted_rating) in enumerate(recs[:top_n]):
        print(f"{i + 1}. Book: {bookInfo[recommended_item]}")

In [144]:
leaky_recommendations('8842281e1d1347389f2ab93d60773d4d')

User's current ratings:
Book: The Iliad, Rating: 4
Book: Where the Sidewalk Ends, Rating: 5
Book: The Odyssey, Rating: 4
Top 5 Recommendations for User
1. Book: Free Verse
2. Book: Pole Dancing to Gospel Hymns
3. Book: Uitzicht met zandkorrel
4. Book: Þungi eyjunnar
5. Book: ප්‍රබුද්ධ
