<a href="https://colab.research.google.com/github/naphatsiri/DADS6003-Final-Project-Collaborative-filtering/blob/main/NeuMF_Improved(withpretraining)_Output_Session2_09012568_8.30pm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#NeuMF_Improving_(with_pre_training)_Output_Session2__Save-best-model_09/01/2568_16.30
#1) Added Dropout Layers: Introduced dropout in the MLP part to reduce overfitting.
#2) Activation Functions: Used relu for the intermediate layers and sigmoid for the output layer.
#3) Model Compilation: Added accuracy as an additional metric for better monitoring during training.

import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Flatten, Concatenate, Multiply, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
import os
import time
from evaluate import evaluate_model  # Ensure evaluate_model.py is uploaded in the environment
from Dataset import Dataset  # Ensure Dataset.py is uploaded in the environment

#################### Arguments ####################
class Args:
    path = './Data/'
    dataset = 'ml-1m'
    epochs = 20
    batch_size = 256
    num_factors = 8
    layers = '[64,32,16,8]'
    reg_mf = 0
    reg_layers = '[0,0,0,0]'
    num_neg = 4
    lr = 0.001
    verbose = 1
    out = 1
    mf_pretrain = 'ml-1m_GMF_model.h5'
    mlp_pretrain = 'ml-1m_MLP_model.h5'

args = Args()

#################### Utility Function ####################
def get_train_instances(train, num_negatives):
    """
    Generate user, item, and label data for training.
    :param train: The training matrix (users x items).
    :param num_negatives: Number of negative samples per positive interaction.
    :return: user_input, item_input, labels
    """
    user_input, item_input, labels = [], [], []
    num_users, num_items = train.shape
    for (u, i) in train.keys():
        # Positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # Negative instances
        for _ in range(num_negatives):
            j = np.random.randint(num_items)
            while (u, j) in train:
                j = np.random.randint(num_items)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

#################### Model Definition ####################
def get_model(num_users, num_items, mf_dim=8, layers=[64, 32, 16, 8], reg_layers=[0, 0, 0, 0], reg_mf=0):
    # Input layers
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')

    # Embedding layers
    MF_Embedding_User = Embedding(input_dim=num_users, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)
    MF_Embedding_Item = Embedding(input_dim=num_items, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)

    MLP_Embedding_User = Embedding(input_dim=num_users, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)
    MLP_Embedding_Item = Embedding(input_dim=num_items, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)

    # MF part
    mf_user_latent = Flatten()(MF_Embedding_User(user_input))
    mf_item_latent = Flatten()(MF_Embedding_Item(item_input))
    mf_vector = Multiply()([mf_user_latent, mf_item_latent])

    # MLP part
    mlp_user_latent = Flatten()(MLP_Embedding_User(user_input))
    mlp_item_latent = Flatten()(MLP_Embedding_Item(item_input))
    mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])
    for idx in range(1, len(layers)):
        mlp_vector = Dense(layers[idx], activation='relu', kernel_regularizer=l2(reg_layers[idx]))(mlp_vector)
        mlp_vector = Dropout(0.2)(mlp_vector)  # Add dropout to reduce overfitting

    # Concatenate MF and MLP parts
    predict_vector = Concatenate()([mf_vector, mlp_vector])

    # Prediction layer
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name="prediction")(predict_vector)

    model = Model(inputs=[user_input, item_input], outputs=prediction)
    return model

#################### Training & Evaluation ####################
if __name__ == '__main__':
    # Load Dataset
    dataset = Dataset(args.path + args.dataset)
    train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives
    num_users, num_items = train.shape

    # Load pre-trained models if available
    gmf_model = tf.keras.models.load_model(args.mf_pretrain) if os.path.exists(args.mf_pretrain) else None
    mlp_model = tf.keras.models.load_model(args.mlp_pretrain) if os.path.exists(args.mlp_pretrain) else None

    # Initialize the NeuMF model
    model = get_model(num_users, num_items, args.num_factors, eval(args.layers), eval(args.reg_layers), args.reg_mf)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=args.lr), loss='binary_crossentropy', metrics=['accuracy'])

    # Initial performance evaluation
    print("Evaluating initial model...")
    hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1)
    hr, ndcg = np.mean(hits), np.mean(ndcgs)
    print(f'Init: HR = {hr:.4f}, NDCG = {ndcg:.4f}')
    best_hr, best_ndcg, best_iter = hr, ndcg, -1

    # Training loop
    best_model_path = 'NeuMF_best_model.h5'
    for epoch in range(args.epochs):
        t1 = time.time()
        user_input, item_input, labels = get_train_instances(train, args.num_neg)

        hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels),
                         batch_size=args.batch_size, epochs=1, verbose=0, shuffle=True)
        t2 = time.time()

        # Evaluation
        if epoch % args.verbose == 0:
            hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1)
            hr, ndcg = np.mean(hits), np.mean(ndcgs)
            print(f'Epoch {epoch}: HR = {hr:.4f}, NDCG = {ndcg:.4f}, loss = {hist.history["loss"][0]:.4f}')

            # Save the best model
            if hr > best_hr:
                best_hr, best_ndcg, best_iter = hr, ndcg, epoch
                if args.out:
                    model.save(best_model_path)

    # Final output
    print(f"End. Best Iteration {best_iter}: HR = {best_hr:.4f}, NDCG = {best_ndcg:.4f}.")



Evaluating initial model...
Init: HR = 0.1022, NDCG = 0.0467




Epoch 0: HR = 0.5957, NDCG = 0.3371, loss = 0.3198




Epoch 1: HR = 0.6377, NDCG = 0.3665, loss = 0.2744




Epoch 2: HR = 0.6517, NDCG = 0.3790, loss = 0.2636




Epoch 3: HR = 0.6596, NDCG = 0.3856, loss = 0.2583




Epoch 4: HR = 0.6651, NDCG = 0.3913, loss = 0.2542




Epoch 5: HR = 0.6755, NDCG = 0.3997, loss = 0.2509
Epoch 6: HR = 0.6712, NDCG = 0.3982, loss = 0.2480
Epoch 7: HR = 0.6743, NDCG = 0.4003, loss = 0.2456
Epoch 8: HR = 0.6702, NDCG = 0.3992, loss = 0.2432




Epoch 9: HR = 0.6786, NDCG = 0.4063, loss = 0.2414




Epoch 10: HR = 0.6843, NDCG = 0.4088, loss = 0.2397
Epoch 11: HR = 0.6695, NDCG = 0.4006, loss = 0.2379
Epoch 12: HR = 0.6762, NDCG = 0.4027, loss = 0.2362
Epoch 13: HR = 0.6752, NDCG = 0.4031, loss = 0.2352
Epoch 14: HR = 0.6772, NDCG = 0.4053, loss = 0.2336
Epoch 15: HR = 0.6715, NDCG = 0.4012, loss = 0.2323
Epoch 16: HR = 0.6737, NDCG = 0.4026, loss = 0.2314
Epoch 17: HR = 0.6748, NDCG = 0.4028, loss = 0.2305
Epoch 18: HR = 0.6740, NDCG = 0.4048, loss = 0.2296
Epoch 19: HR = 0.6811, NDCG = 0.4059, loss = 0.2284
End. Best Iteration 10: HR = 0.6843, NDCG = 0.4088.


In [1]:
#Improvement NeuMF_Improving_(with_pre_training)_Output_Session2_09012568_17.25
#Improvig with these features
#1.LeakyReLU may improve performance by avoiding dead neurons.
#2.AdamW helps balance learning rate adaptation and regularization, often leading to better generalization.
#3.Added Dropout Layers: Introduced dropout in the MLP part to reduce overfitting.
#4.Model Compilation: Added accuracy as an additional metric for better monitoring during training.

import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import Embedding, Input, Dense, Flatten, Concatenate, Multiply, Dropout, LeakyReLU
from keras.optimizers import AdamW
from keras.regularizers import l2
import os
import time
from evaluate import evaluate_model  # Ensure evaluate_model.py is uploaded in the environment
from Dataset import Dataset  # Ensure Dataset.py is uploaded in the environment

#################### Arguments ####################
class Args:
    path = './Data/'
    dataset = 'ml-1m'
    epochs = 20
    batch_size = 256
    num_factors = 8
    layers = '[64,32,16,8]'
    reg_mf = 0
    reg_layers = '[0,0,0,0]'
    num_neg = 4
    lr = 0.001
    weight_decay = 1e-5  # Weight decay for AdamW
    verbose = 1
    out = 1
    mf_pretrain = 'ml-1m_GMF_model.h5'
    mlp_pretrain = 'ml-1m_MLP_model.h5'

args = Args()

#################### Utility Function ####################
def get_train_instances(train, num_negatives):
    """
    Generate user, item, and label data for training.
    :param train: The training matrix (users x items).
    :param num_negatives: Number of negative samples per positive interaction.
    :return: user_input, item_input, labels
    """
    user_input, item_input, labels = [], [], []
    num_users, num_items = train.shape
    for (u, i) in train.keys():
        # Positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # Negative instances
        for _ in range(num_negatives):
            j = np.random.randint(num_items)
            while (u, j) in train:
                j = np.random.randint(num_items)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

#################### Model Definition ####################
def get_model(num_users, num_items, mf_dim=8, layers=[64, 32, 16, 8], reg_layers=[0, 0, 0, 0], reg_mf=0):
    # Input layers
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')

    # Embedding layers
    MF_Embedding_User = Embedding(input_dim=num_users, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)
    MF_Embedding_Item = Embedding(input_dim=num_items, output_dim=mf_dim, embeddings_regularizer=l2(reg_mf), input_length=1)

    MLP_Embedding_User = Embedding(input_dim=num_users, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)
    MLP_Embedding_Item = Embedding(input_dim=num_items, output_dim=layers[0] // 2, embeddings_regularizer=l2(reg_layers[0]), input_length=1)

    # MF part
    mf_user_latent = Flatten()(MF_Embedding_User(user_input))
    mf_item_latent = Flatten()(MF_Embedding_Item(item_input))
    mf_vector = Multiply()([mf_user_latent, mf_item_latent])

    # MLP part
    mlp_user_latent = Flatten()(MLP_Embedding_User(user_input))
    mlp_item_latent = Flatten()(MLP_Embedding_Item(item_input))
    mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])
    for idx in range(1, len(layers)):
        mlp_vector = Dense(layers[idx], kernel_regularizer=l2(reg_layers[idx]))(mlp_vector)
        mlp_vector = LeakyReLU(alpha=0.01)(mlp_vector)  # Use LeakyReLU with a slope of 0.01 for negative values
        mlp_vector = Dropout(0.2)(mlp_vector)  # Add dropout to reduce overfitting

    # Concatenate MF and MLP parts
    predict_vector = Concatenate()([mf_vector, mlp_vector])

    # Prediction layer
    prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name="prediction")(predict_vector)

    model = Model(inputs=[user_input, item_input], outputs=prediction)
    return model

#################### Training & Evaluation ####################
if __name__ == '__main__':
    # Load Dataset
    dataset = Dataset(args.path + args.dataset)
    train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives
    num_users, num_items = train.shape

    # Load pre-trained models if available
    gmf_model = tf.keras.models.load_model(args.mf_pretrain) if os.path.exists(args.mf_pretrain) else None
    mlp_model = tf.keras.models.load_model(args.mlp_pretrain) if os.path.exists(args.mlp_pretrain) else None

    # Initialize the NeuMF model
    model = get_model(num_users, num_items, args.num_factors, eval(args.layers), eval(args.reg_layers), args.reg_mf)

    # Compile the model with AdamW optimizer
    optimizer = AdamW(learning_rate=args.lr, weight_decay=args.weight_decay)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Initial performance evaluation
    print("Evaluating initial model...")
    hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1)
    hr, ndcg = np.mean(hits), np.mean(ndcgs)
    print(f'Init: HR = {hr:.4f}, NDCG = {ndcg:.4f}')
    best_hr, best_ndcg, best_iter = hr, ndcg, -1

    # Training loop
    best_model_path = 'NeuMF_best_model.h5'
    for epoch in range(args.epochs):
        t1 = time.time()
        user_input, item_input, labels = get_train_instances(train, args.num_neg)

        hist = model.fit([np.array(user_input), np.array(item_input)], np.array(labels),
                         batch_size=args.batch_size, epochs=1, verbose=0, shuffle=True)
        t2 = time.time()

        # Evaluation
        if epoch % args.verbose == 0:
            hits, ndcgs = evaluate_model(model, testRatings, testNegatives, 10, 1)
            hr, ndcg = np.mean(hits), np.mean(ndcgs)
            print(f'Epoch {epoch}: HR = {hr:.4f}, NDCG = {ndcg:.4f}, loss = {hist.history["loss"][0]:.4f}')

            # Save the best model
            if hr > best_hr:
                best_hr, best_ndcg, best_iter = hr, ndcg, epoch
                if args.out:
                    model.save(best_model_path)

    # Final output
    print(f"End. Best Iteration {best_iter}: HR = {best_hr:.4f}, NDCG = {best_ndcg:.4f}.")



Evaluating initial model...
Init: HR = 0.0998, NDCG = 0.0467




Epoch 0: HR = 0.5859, NDCG = 0.3326, loss = 0.3296




Epoch 1: HR = 0.6187, NDCG = 0.3550, loss = 0.2839




Epoch 2: HR = 0.6369, NDCG = 0.3681, loss = 0.2743




Epoch 3: HR = 0.6469, NDCG = 0.3718, loss = 0.2702
Epoch 4: HR = 0.6450, NDCG = 0.3745, loss = 0.2672




Epoch 5: HR = 0.6523, NDCG = 0.3791, loss = 0.2645




Epoch 6: HR = 0.6571, NDCG = 0.3843, loss = 0.2624
Epoch 7: HR = 0.6563, NDCG = 0.3855, loss = 0.2606




Epoch 8: HR = 0.6639, NDCG = 0.3910, loss = 0.2593
Epoch 9: HR = 0.6613, NDCG = 0.3878, loss = 0.2579




Epoch 10: HR = 0.6694, NDCG = 0.3934, loss = 0.2567
Epoch 11: HR = 0.6659, NDCG = 0.3944, loss = 0.2562
Epoch 12: HR = 0.6684, NDCG = 0.3927, loss = 0.2555




Epoch 13: HR = 0.6732, NDCG = 0.3993, loss = 0.2544
Epoch 14: HR = 0.6727, NDCG = 0.3970, loss = 0.2539




Epoch 15: HR = 0.6753, NDCG = 0.3980, loss = 0.2529
Epoch 16: HR = 0.6750, NDCG = 0.3982, loss = 0.2528




Epoch 17: HR = 0.6757, NDCG = 0.3993, loss = 0.2521
Epoch 18: HR = 0.6755, NDCG = 0.3996, loss = 0.2516




Epoch 19: HR = 0.6765, NDCG = 0.3997, loss = 0.2508
End. Best Iteration 19: HR = 0.6765, NDCG = 0.3997.
