In [1]:
# Install required libraries if not already installed
!pip install tensorflow keras




In [10]:
!pip install h5py



In [28]:
import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Flatten, Concatenate, Multiply
from keras.optimizers import Adam
from keras.regularizers import l2
import os
import time
from evaluate import evaluate_model  # Ensure evaluate_model.py is uploaded in the Colab environment
from Dataset import Dataset  # Ensure Dataset.py is uploaded in the Colab environment

#################### Arguments ####################
class Args:
    path = './Data/'
    dataset = 'ml-1m'
    epochs = 100
    batch_size = 256
    num_factors = 8
    layers = '[64,32,16,8]'
    reg_mf = 0
    reg_layers = '[0,0,0,0]'
    num_neg = 4
    lr = 0.001
    verbose = 1
    out = 1
    mf_pretrain = 'ml-1m_GMF_model.h5'
    mlp_pretrain = 'ml-1m_MLP_model.h5'

args = Args()

#################### Model Definition ####################
def get_model(num_users, num_items, mf_dim=8, layers=[64, 32, 16, 8], reg_layers=[0, 0, 0, 0], reg_mf=0):
    # Input layers
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')

    # Embedding layers
    MF_Embedding_User = Embedding(input_dim=num_users, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)
    MF_Embedding_Item = Embedding(input_dim=num_items, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)

    MLP_Embedding_User = Embedding(input_dim=num_users, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)
    MLP_Embedding_Item = Embedding(input_dim=num_items, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)

    # MF part
    mf_user_latent = Flatten()(MF_Embedding_User(user_input))
    mf_item_latent = Flatten()(MF_Embedding_Item(item_input))
    mf_vector = Multiply()([mf_user_latent, mf_item_latent])

    # MLP part
    mlp_user_latent = Flatten()(MLP_Embedding_User(user_input))
    mlp_item_latent = Flatten()(MLP_Embedding_Item(item_input))
    mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])
    for idx in range(1, len(layers)):
        mlp_vector = Dense(layers[idx], activation='relu', kernel_regularizer=l2(reg_layers[idx]))(mlp_vector)

    # Concatenate MF and MLP parts
    predict_vector = Concatenate()([mf_vector, mlp_vector])

    # Prediction layer
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name="prediction")(predict_vector)

    model = Model(inputs=[user_input, item_input], outputs=prediction)
    return model

#################### Training & Evaluation ####################
if __name__ == '__main__':
    # Load Dataset
    dataset = Dataset(args.path + args.dataset)
    train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives
    num_users, num_items = train.shape

    # Load pre-trained models if available
    gmf_model = tf.keras.models.load_model(args.mf_pretrain) if os.path.exists(args.mf_pretrain) else None
    mlp_model = tf.keras.models.load_model(args.mlp_pretrain) if os.path.exists(args.mlp_pretrain) else None

    # Initialize the NeuMF model
    model = get_model(num_users, num_items, args.num_factors, eval(args.layers), eval(args.reg_layers), args.reg_mf)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=args.lr), loss='binary_crossentropy')

    # Initial performance evaluation
    print("Evaluating initial model...")
    hits, ndcgs = evaluate_model(model, testRatings, testNegatives, topK=10, evaluation_threads=1)
    hr, ndcg = np.mean(hits), np.mean(ndcgs)
    print(f'Init: HR = {hr:.4f}, NDCG = {ndcg:.4f}')
    best_hr, best_ndcg, best_iter = hr, ndcg, -1

    # Training loop
    for epoch in range(args.epochs):
        t1 = time.time()
        user_input, item_input, labels = dataset.get_train_instances(train, args.num_neg)

        hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels),
                         batch_size=args.batch_size, epochs=1, verbose=0, shuffle=True)
        t2 = time.time()

        # Evaluation
        if epoch % args.verbose == 0:
            hits, ndcgs = evaluate_model(model, testRatings, testNegatives, topK=10, evaluation_threads=1)
            hr, ndcg = np.mean(hits), np.mean(ndcgs)
            print(f'Epoch {epoch}: HR = {hr:.4f}, NDCG = {ndcg:.4f}, loss = {hist.history["loss"][0]:.4f}')
            if hr > best_hr:
                best_hr, best_ndcg, best_iter = hr, ndcg, epoch
                if args.out:
                    model.save(f'NeuMF_model_{epoch}.h5')

    # Final output
    print(f"End. Best Iteration {best_iter}: HR = {best_hr:.4f}, NDCG = {best_ndcg:.4f}.")


Evaluating initial model...




TypeError: evaluate_model() got an unexpected keyword argument 'topK'

In [29]:
# prompt: TypeError: evaluate_model() got an unexpected keyword argument 'topK'
# correct me this code

import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Flatten, Concatenate, Multiply
from keras.optimizers import Adam
from keras.regularizers import l2
import os
import time
from evaluate import evaluate_model  # Ensure evaluate_model.py is uploaded in the Colab environment
from Dataset import Dataset  # Ensure Dataset.py is uploaded in the Colab environment

# Install required libraries if not already installed
# !pip install tensorflow keras
# !pip install h5py


#################### Arguments ####################
class Args:
    path = './Data/'
    dataset = 'ml-1m'
    epochs = 100
    batch_size = 256
    num_factors = 8
    layers = '[64,32,16,8]'
    reg_mf = 0
    reg_layers = '[0,0,0,0]'
    num_neg = 4
    lr = 0.001
    verbose = 1
    out = 1
    mf_pretrain = 'ml-1m_GMF_model.h5'
    mlp_pretrain = 'ml-1m_MLP_model.h5'

args = Args()

#################### Model Definition ####################
def get_model(num_users, num_items, mf_dim=8, layers=[64, 32, 16, 8], reg_layers=[0, 0, 0, 0], reg_mf=0):
    # Input layers
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')

    # Embedding layers
    MF_Embedding_User = Embedding(input_dim=num_users, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)
    MF_Embedding_Item = Embedding(input_dim=num_items, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)

    MLP_Embedding_User = Embedding(input_dim=num_users, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)
    MLP_Embedding_Item = Embedding(input_dim=num_items, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)

    # MF part
    mf_user_latent = Flatten()(MF_Embedding_User(user_input))
    mf_item_latent = Flatten()(MF_Embedding_Item(item_input))
    mf_vector = Multiply()([mf_user_latent, mf_item_latent])

    # MLP part
    mlp_user_latent = Flatten()(MLP_Embedding_User(user_input))
    mlp_item_latent = Flatten()(MLP_Embedding_Item(item_input))
    mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])
    for idx in range(1, len(layers)):
        mlp_vector = Dense(layers[idx], activation='relu', kernel_regularizer=l2(reg_layers[idx]))(mlp_vector)

    # Concatenate MF and MLP parts
    predict_vector = Concatenate()([mf_vector, mlp_vector])

    # Prediction layer
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name="prediction")(predict_vector)

    model = Model(inputs=[user_input, item_input], outputs=prediction)
    return model

#################### Training & Evaluation ####################
if __name__ == '__main__':
    # Load Dataset
    dataset = Dataset(args.path + args.dataset)
    train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives
    num_users, num_items = train.shape

    # Load pre-trained models if available
    gmf_model = tf.keras.models.load_model(args.mf_pretrain) if os.path.exists(args.mf_pretrain) else None
    mlp_model = tf.keras.models.load_model(args.mlp_pretrain) if os.path.exists(args.mlp_pretrain) else None

    # Initialize the NeuMF model
    model = get_model(num_users, num_items, args.num_factors, eval(args.layers), eval(args.reg_layers), args.reg_mf)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=args.lr), loss='binary_crossentropy')

    # Initial performance evaluation
    print("Evaluating initial model...")
    hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1) #Removed topK
    hr, ndcg = np.mean(hits), np.mean(ndcgs)
    print(f'Init: HR = {hr:.4f}, NDCG = {ndcg:.4f}')
    best_hr, best_ndcg, best_iter = hr, ndcg, -1

    # Training loop
    for epoch in range(args.epochs):
        t1 = time.time()
        user_input, item_input, labels = dataset.get_train_instances(train, args.num_neg)

        hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels),
                         batch_size=args.batch_size, epochs=1, verbose=0, shuffle=True)
        t2 = time.time()

        # Evaluation
        if epoch % args.verbose == 0:
            hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1) #Removed topK
            hr, ndcg = np.mean(hits), np.mean(ndcgs)
            print(f'Epoch {epoch}: HR = {hr:.4f}, NDCG = {ndcg:.4f}, loss = {hist.history["loss"][0]:.4f}')
            if hr > best_hr:
                best_hr, best_ndcg, best_iter = hr, ndcg, epoch
                if args.out:
                    model.save(f'NeuMF_model_{epoch}.h5')

    # Final output
    print(f"End. Best Iteration {best_iter}: HR = {best_hr:.4f}, NDCG = {best_ndcg:.4f}.")



Evaluating initial model...
Init: HR = 0.1070, NDCG = 0.0487


AttributeError: 'Dataset' object has no attribute 'get_train_instances'

In [31]:
#NeuMF (with pre-training)_Output_Session1
import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Flatten, Concatenate, Multiply
from keras.optimizers import Adam
from keras.regularizers import l2
import os
import time
from evaluate import evaluate_model  # Ensure evaluate_model.py is uploaded in the Colab environment
from Dataset import Dataset  # Ensure Dataset.py is uploaded in the Colab environment

#################### Arguments ####################
class Args:
    path = './Data/'
    dataset = 'ml-1m'
    epochs = 20
    batch_size = 256
    num_factors = 8
    layers = '[64,32,16,8]'
    reg_mf = 0
    reg_layers = '[0,0,0,0]'
    num_neg = 4
    lr = 0.001
    verbose = 1
    out = 1
    mf_pretrain = 'ml-1m_GMF_model.h5'
    mlp_pretrain = 'ml-1m_MLP_model.h5'

args = Args()

#################### Utility Function ####################
def get_train_instances(train, num_negatives):
    """
    Generate user, item, and label data for training.
    :param train: The training matrix (users x items).
    :param num_negatives: Number of negative samples per positive interaction.
    :return: user_input, item_input, labels
    """
    user_input, item_input, labels = [], [], []
    num_users, num_items = train.shape
    for (u, i) in train.keys():
        # Positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # Negative instances
        for _ in range(num_negatives):
            j = np.random.randint(num_items)
            while (u, j) in train:
                j = np.random.randint(num_items)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

#################### Model Definition ####################
def get_model(num_users, num_items, mf_dim=8, layers=[64, 32, 16, 8], reg_layers=[0, 0, 0, 0], reg_mf=0):
    # Input layers
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')

    # Embedding layers
    MF_Embedding_User = Embedding(input_dim=num_users, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)
    MF_Embedding_Item = Embedding(input_dim=num_items, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)

    MLP_Embedding_User = Embedding(input_dim=num_users, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)
    MLP_Embedding_Item = Embedding(input_dim=num_items, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)

    # MF part
    mf_user_latent = Flatten()(MF_Embedding_User(user_input))
    mf_item_latent = Flatten()(MF_Embedding_Item(item_input))
    mf_vector = Multiply()([mf_user_latent, mf_item_latent])

    # MLP part
    mlp_user_latent = Flatten()(MLP_Embedding_User(user_input))
    mlp_item_latent = Flatten()(MLP_Embedding_Item(item_input))
    mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])
    for idx in range(1, len(layers)):
        mlp_vector = Dense(layers[idx], activation='relu', kernel_regularizer=l2(reg_layers[idx]))(mlp_vector)

    # Concatenate MF and MLP parts
    predict_vector = Concatenate()([mf_vector, mlp_vector])

    # Prediction layer
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name="prediction")(predict_vector)

    model = Model(inputs=[user_input, item_input], outputs=prediction)
    return model

#################### Training & Evaluation ####################
if __name__ == '__main__':
    # Load Dataset
    dataset = Dataset(args.path + args.dataset)
    train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives
    num_users, num_items = train.shape

    # Load pre-trained models if available
    gmf_model = tf.keras.models.load_model(args.mf_pretrain) if os.path.exists(args.mf_pretrain) else None
    mlp_model = tf.keras.models.load_model(args.mlp_pretrain) if os.path.exists(args.mlp_pretrain) else None

    # Initialize the NeuMF model
    model = get_model(num_users, num_items, args.num_factors, eval(args.layers), eval(args.reg_layers), args.reg_mf)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=args.lr), loss='binary_crossentropy')

    # Initial performance evaluation
    print("Evaluating initial model...")
    hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1)  # Removed topK
    hr, ndcg = np.mean(hits), np.mean(ndcgs)
    print(f'Init: HR = {hr:.4f}, NDCG = {ndcg:.4f}')
    best_hr, best_ndcg, best_iter = hr, ndcg, -1

    # Training loop
    for epoch in range(args.epochs):
        t1 = time.time()
        user_input, item_input, labels = get_train_instances(train, args.num_neg)

        hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels),
                         batch_size=args.batch_size, epochs=1, verbose=0, shuffle=True)
        t2 = time.time()

        # Evaluation
        if epoch % args.verbose == 0:
            hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1)  # Removed topK
            hr, ndcg = np.mean(hits), np.mean(ndcgs)
            print(f'Epoch {epoch}: HR = {hr:.4f}, NDCG = {ndcg:.4f}, loss = {hist.history["loss"][0]:.4f}')
            if hr > best_hr:
                best_hr, best_ndcg, best_iter = hr, ndcg, epoch
                if args.out:
                    model.save(f'NeuMF_model_{epoch}.h5')

    # Final output
    print(f"End. Best Iteration {best_iter}: HR = {best_hr:.4f}, NDCG = {best_ndcg:.4f}.")


KeyboardInterrupt: 

In [32]:
#NeuMF (without pre-training)_Output_Session1
import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Flatten, Concatenate, Multiply
from keras.optimizers import Adam
from keras.regularizers import l2
import os
import time
from evaluate import evaluate_model  # Ensure evaluate_model.py is uploaded in the Colab environment
from Dataset import Dataset  # Ensure Dataset.py is uploaded in the Colab environment

#################### Arguments ####################
class Args:
    path = './Data/'
    dataset = 'ml-1m'
    epochs = 100
    batch_size = 256
    num_factors = 8
    layers = '[64,32,16,8]'
    reg_mf = 0
    reg_layers = '[0,0,0,0]'
    num_neg = 4
    lr = 0.001
    verbose = 1
    out = 1
    # Removed pre-trained model paths since we are not using pre-training
    mf_pretrain = None
    mlp_pretrain = None

args = Args()

#################### Utility Function ####################
def get_train_instances(train, num_negatives):
    """
    Generate user, item, and label data for training.
    :param train: The training matrix (users x items).
    :param num_negatives: Number of negative samples per positive interaction.
    :return: user_input, item_input, labels
    """
    user_input, item_input, labels = [], [], []
    num_users, num_items = train.shape
    for (u, i) in train.keys():
        # Positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # Negative instances
        for _ in range(num_negatives):
            j = np.random.randint(num_items)
            while (u, j) in train:
                j = np.random.randint(num_items)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

#################### Model Definition ####################
def get_model(num_users, num_items, mf_dim=8, layers=[64, 32, 16, 8], reg_layers=[0, 0, 0, 0], reg_mf=0):
    # Input layers
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')

    # Embedding layers
    MF_Embedding_User = Embedding(input_dim=num_users, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)
    MF_Embedding_Item = Embedding(input_dim=num_items, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)

    MLP_Embedding_User = Embedding(input_dim=num_users, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)
    MLP_Embedding_Item = Embedding(input_dim=num_items, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)

    # MF part
    mf_user_latent = Flatten()(MF_Embedding_User(user_input))
    mf_item_latent = Flatten()(MF_Embedding_Item(item_input))
    mf_vector = Multiply()([mf_user_latent, mf_item_latent])

    # MLP part
    mlp_user_latent = Flatten()(MLP_Embedding_User(user_input))
    mlp_item_latent = Flatten()(MLP_Embedding_Item(item_input))
    mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])
    for idx in range(1, len(layers)):
        mlp_vector = Dense(layers[idx], activation='relu', kernel_regularizer=l2(reg_layers[idx]))(mlp_vector)

    # Concatenate MF and MLP parts
    predict_vector = Concatenate()([mf_vector, mlp_vector])

    # Prediction layer
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name="prediction")(predict_vector)

    model = Model(inputs=[user_input, item_input], outputs=prediction)
    return model

#################### Training & Evaluation ####################
if __name__ == '__main__':
    # Load Dataset
    dataset = Dataset(args.path + args.dataset)
    train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives
    num_users, num_items = train.shape

    # Initialize the NeuMF model (without pre-training)
    model = get_model(num_users, num_items, args.num_factors, eval(args.layers), eval(args.reg_layers), args.reg_mf)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=args.lr), loss='binary_crossentropy')

    # Initial performance evaluation
    print("Evaluating initial model...")
    hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1)  # Removed topK
    hr, ndcg = np.mean(hits), np.mean(ndcgs)
    print(f'Init: HR = {hr:.4f}, NDCG = {ndcg:.4f}')
    best_hr, best_ndcg, best_iter = hr, ndcg, -1

    # Training loop
    for epoch in range(args.epochs):
        t1 = time.time()
        user_input, item_input, labels = get_train_instances(train, args.num_neg)

        hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels),
                         batch_size=args.batch_size, epochs=1, verbose=0, shuffle=True)
        t2 = time.time()

        # Evaluation
        if epoch % args.verbose == 0:
            hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1)  # Removed topK
            hr, ndcg = np.mean(hits), np.mean(ndcgs)
            print(f'Epoch {epoch}: HR = {hr:.4f}, NDCG = {ndcg:.4f}, loss = {hist.history["loss"][0]:.4f}')
            if hr > best_hr:
                best_hr, best_ndcg, best_iter = hr, ndcg, epoch
                if args.out:
                    model.save(f'NeuMF_model_{epoch}.h5')

    # Final output
    print(f"End. Best Iteration {best_iter}: HR = {best_hr:.4f}, NDCG = {best_ndcg:.4f}.")

Evaluating initial model...


KeyboardInterrupt: 