In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Load dataset
file_path = r"C:\Users\user\Desktop\CW4\online+retail\Online Retail.xlsx"
df = pd.read_excel(file_path, sheet_name='Online Retail')

# Cleaning
df_cleaned = df.dropna(subset=['CustomerID', 'Description'])
df_cleaned = df_cleaned[df_cleaned['Quantity'] > 0]

# Encoding
df_sorted = df_cleaned.sort_values(by=['CustomerID', 'InvoiceDate'])
item_encoder = LabelEncoder()
df_sorted['ItemID'] = item_encoder.fit_transform(df_sorted['Description'])

# Grouping by Customer
sequential_data = df_sorted.groupby('CustomerID')['ItemID'].apply(list).reset_index(name='ItemSequence')

# Lower minimum sequence length
min_sequence_length = 3
sequential_data = sequential_data[sequential_data['ItemSequence'].apply(len) >= min_sequence_length]

# Pad and create sequences
item_sequences = sequential_data['ItemSequence'].tolist()
sequence_length = 20
padded_sequences = pad_sequences(item_sequences, maxlen=sequence_length, padding='pre')

def create_sequences(sequences, seq_length=20):
    X, y = [], []
    for seq in sequences:
        for i in range(max(1, len(seq) - seq_length + 1)):
            X.append(seq[i:i + seq_length])
            y.append(seq[min(i + seq_length, len(seq) - 1)])  # Adjust for boundaries
    return np.array(X), np.array(y)

X, y = create_sequences(padded_sequences, seq_length=sequence_length)

# Dataset splitting
split_ratio = 0.8
split_index = int(len(X) * split_ratio)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Update unique_items to reflect the entire dataset
unique_items = np.unique(np.concatenate([X_train.flatten(), X_test.flatten()]))

# **Sanity Check: Clip Indices**
X_train = np.clip(X_train, 0, len(unique_items) - 1)
X_test = np.clip(X_test, 0, len(unique_items) - 1)
y_train = np.clip(y_train, 0, len(unique_items) - 1)
y_test = np.clip(y_test, 0, len(unique_items) - 1)

print(f"Updated Vocabulary Size: {len(unique_items)}")
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")
print(f"Vocabulary size (unique items): {len(unique_items)}")

# Save preprocessed data
np.save("X_train.npy", X_train)
np.save("X_test.npy", X_test)
np.save("y_train.npy", y_train)
np.save("y_test.npy", y_test)
np.save("unique_items.npy", unique_items)


Updated Vocabulary Size: 3323
X_train shape: (3371, 20)
X_test shape: (843, 20)
y_train shape: (3371,)
y_test shape: (843,)
Vocabulary size (unique items): 3323


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd

# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

# Hyperparameters
embedding_dim = 100  # Increased embedding dimension
hidden_units = 200  # More GRU units
sequence_length = 20  # Increased sequence length
batch_size = 32  # Adjust batch size
epochs = 30  # Train for more epochs

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.2),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        step_start_time = time.time()
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, "evaluation_results.csv")
print(f"Results saved to {os.path.join(base_path, 'evaluation_results.csv')}")





Epoch 1/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1687 - loss: 8.0772
105/105 ━━━━━━━━━━━━━ 32s total - accuracy: 0.1703 - loss: 8.1541

Epoch 2/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0510 - loss: 7.4732
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.0515 - loss: 7.5444

Epoch 3/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0485 - loss: 7.1065
105/105 ━━━━━━━━━━━━━ 23s total - accuracy: 0.0490 - loss: 7.1742

Epoch 4/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0546 - loss: 6.8676
105/105 ━━━━━━━━━━━━━ 23s total - accuracy: 0.0552 - loss: 6.9330

Epoch 5/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0587 - loss: 6.6950
105/105 ━━━━━━━━━━━━━ 24s total - accuracy: 0.0592 - loss: 6.7588

Epoch 6/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0622 - loss: 6.5526
105/105 ━━━━━━━━━━━━━ 24s total - accuracy: 0.0628 - loss: 6.6150

Epoch 7/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0660 - loss: 6.4223
105/105 ━━━━━━━━━━━━━ 25s total - acc

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd

# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

# Hyperparameters for Model 1 (Baseline)
embedding_dim = 100  # Embedding dimension
hidden_units = 200  # GRU units
sequence_length = 20  # Sequence length
batch_size = 32  # Batch size
epochs = 30  # Number of epochs
learning_rate = 0.0005  # Learning rate

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.2),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        step_start_time = time.time()
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, "evaluation_results_model1.csv")
print(f"Results saved to {os.path.join(base_path, 'evaluation_results_model1.csv')}")



Epoch 1/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1642 - loss: 8.0836
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.1658 - loss: 8.1606

Epoch 2/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0494 - loss: 7.4824
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.0498 - loss: 7.5537

Epoch 3/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0536 - loss: 7.1354
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.0541 - loss: 7.2033

Epoch 4/30
106/105 ━━━━━━━━━━━━━ -2s remaining - accuracy: 0.0556 - loss: 6.9033
105/105 ━━━━━━━━━━━━━ 216s total - accuracy: 0.0561 - loss: 6.9690

Epoch 5/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0590 - loss: 6.7352
105/105 ━━━━━━━━━━━━━ 27s total - accuracy: 0.0596 - loss: 6.7993

Epoch 6/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0622 - loss: 6.5980
105/105 ━━━━━━━━━━━━━ 26s total - accuracy: 0.0628 - loss: 6.6608

Epoch 7/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0656 - loss: 6.4741
105/105 ━━━━━━━━━━━━━ 30s total - a

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd

# Load preprocessed data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

# Calculate vocabulary size dynamically
vocab_size = max(np.max(X_train), np.max(X_test), np.max(y_train), np.max(y_test)) + 1
print(f"Updated Vocabulary Size: {vocab_size}")

# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

# Hyperparameters for Model 4
embedding_dim = 200  # Embedding dimension
hidden_units = 400  # GRU units
sequence_length = 20  # Sequence length
batch_size = 32  # Batch size
epochs = 30  # Number of epochs
learning_rate = 0.0007  # Learning rate

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.2),
    Dense(units=vocab_size, activation='softmax')  # Match vocabulary size
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, "evaluation_results_model4.csv")
print(f"Results saved to {os.path.join(base_path, 'evaluation_results_model4.csv')}")


Updated Vocabulary Size: 3877



Epoch 1/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1842 - loss: 7.9842
105/105 ━━━━━━━━━━━━━ 23s total - accuracy: 0.1860 - loss: 8.0602

Epoch 2/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0703 - loss: 7.2537
105/105 ━━━━━━━━━━━━━ 36s total - accuracy: 0.0710 - loss: 7.3228

Epoch 3/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0726 - loss: 6.8093
105/105 ━━━━━━━━━━━━━ 40s total - accuracy: 0.0733 - loss: 6.8741

Epoch 4/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0786 - loss: 6.5019
105/105 ━━━━━━━━━━━━━ 42s total - accuracy: 0.0794 - loss: 6.5638

Epoch 5/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.0885 - loss: 6.2512
105/105 ━━━━━━━━━━━━━ 43s total - accuracy: 0.0893 - loss: 6.3108

Epoch 6/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1044 - loss: 6.0188
105/105 ━━━━━━━━━━━━━ 45s total - accuracy: 0.1054 - loss: 6.0761

Epoch 7/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1258 - loss: 5.7835
105/105 ━━━━━━━━━━━━━ 47s total - acc

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import pandas as pd
import os
import time

# Load preprocessed data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")
unique_items = np.load("unique_items.npy")

# Hyperparameters for Experiment 1
embedding_dim = 100  # Modify for each experiment
hidden_units = 200  # Modify for each experiment
sequence_length = 20  # Modify for each experiment
batch_size = 32
epochs = 30
learning_rate = 0.001  # Modify for each experiment
dropout_rate = 0.3  # Modify for each experiment

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=dropout_rate),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        step_start_time = time.time()
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target)
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, "evaluation_results_experiment1.csv")
print(f"Results saved to {os.path.join(base_path, 'evaluation_results_experiment1.csv')}")



Epoch 1/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2408 - loss: 7.3538
105/105 ━━━━━━━━━━━━━ 21s total - accuracy: 0.2431 - loss: 7.4239

Epoch 2/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1668 - loss: 6.3357
105/105 ━━━━━━━━━━━━━ 21s total - accuracy: 0.1684 - loss: 6.3961

Epoch 3/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1722 - loss: 5.8715
105/105 ━━━━━━━━━━━━━ 20s total - accuracy: 0.1739 - loss: 5.9274

Epoch 4/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1893 - loss: 5.5651
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.1911 - loss: 5.6181

Epoch 5/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2021 - loss: 5.3197
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.2041 - loss: 5.3703

Epoch 6/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2148 - loss: 5.0968
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.2168 - loss: 5.1453

Epoch 7/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2306 - loss: 4.8810
105/105 ━━━━━━━━━━━━━ 24s total - acc

In [None]:
import tensorflow as tf
# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd


# Hyperparameters (Update for Each Experiment)
embedding_dim = 300  # Adjust embedding dimension
hidden_units = 500  # Adjust GRU units
sequence_length = 20 # Sequence length
batch_size = 32      # Adjust batch size
epochs = 30          # Increase number of epochs
learning_rate = 0.0002  # Learning rate

# Load Preprocessed Data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")
unique_items = np.load("unique_items.npy")

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.2),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, f"evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv")
print(f"Results saved to {os.path.join(base_path, f'evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv')}")





Epoch 1/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2120 - loss: 7.7289
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.2140 - loss: 7.8025

Epoch 2/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1615 - loss: 6.7314
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.1630 - loss: 6.7956

Epoch 3/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1827 - loss: 6.2540
105/105 ━━━━━━━━━━━━━ 22s total - accuracy: 0.1845 - loss: 6.3136

Epoch 4/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1962 - loss: 5.9750
105/105 ━━━━━━━━━━━━━ 23s total - accuracy: 0.1981 - loss: 6.0319

Epoch 5/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2054 - loss: 5.7815
105/105 ━━━━━━━━━━━━━ 23s total - accuracy: 0.2074 - loss: 5.8365

Epoch 6/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2115 - loss: 5.6287
105/105 ━━━━━━━━━━━━━ 24s total - accuracy: 0.2135 - loss: 5.6823

Epoch 7/30
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2182 - loss: 5.4942
105/105 ━━━━━━━━━━━━━ 25s total - acc

In [None]:
import tensorflow as tf
# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd

# Hyperparameters (Update for Each Experiment)
embedding_dim = 250  # Adjust embedding dimension
hidden_units = 350   # Adjust GRU units
sequence_length = 20 # Sequence length
batch_size = 32      # Adjust batch size
epochs = 35         # Increase number of epochs
learning_rate = 0.0002  # Learning rate

# Load Preprocessed Data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")
unique_items = np.load("unique_items.npy")

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.2),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, f"evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv")
print(f"Results saved to {os.path.join(base_path, f'evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv')}")



Epoch 1/35
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2014 - loss: 7.8500
105/105 ━━━━━━━━━━━━━ 37s total - accuracy: 0.2033 - loss: 7.9247

Epoch 2/35
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1650 - loss: 6.8745
105/105 ━━━━━━━━━━━━━ 36s total - accuracy: 0.1665 - loss: 6.9400

Epoch 3/35
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1850 - loss: 6.3547
105/105 ━━━━━━━━━━━━━ 38s total - accuracy: 0.1867 - loss: 6.4153

Epoch 4/35
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1960 - loss: 6.0716
105/105 ━━━━━━━━━━━━━ 38s total - accuracy: 0.1979 - loss: 6.1295

Epoch 5/35
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2042 - loss: 5.8732
105/105 ━━━━━━━━━━━━━ 38s total - accuracy: 0.2062 - loss: 5.9291

Epoch 6/35
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2103 - loss: 5.7187
105/105 ━━━━━━━━━━━━━ 38s total - accuracy: 0.2124 - loss: 5.7732

Epoch 7/35
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2147 - loss: 5.5883
105/105 ━━━━━━━━━━━━━ 39s total - acc

In [None]:
import tensorflow as tf
# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd


# Hyperparameters (Update for Each Experiment)
embedding_dim = 250  # Adjust embedding dimension
hidden_units = 400   # Adjust GRU units
sequence_length = 20 # Sequence length
batch_size = 32      # Adjust batch size
epochs = 40          # Increase number of epochs
learning_rate = 0.0003  # Learning rate

# Load Preprocessed Data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")
unique_items = np.load("unique_items.npy")

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.2),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, f"evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv")
print(f"Results saved to {os.path.join(base_path, f'evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv')}")



Epoch 1/40
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2113 - loss: 7.5912
105/105 ━━━━━━━━━━━━━ 57s total - accuracy: 0.2133 - loss: 7.6635

Epoch 2/40
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1582 - loss: 6.5884
105/105 ━━━━━━━━━━━━━ 64s total - accuracy: 0.1597 - loss: 6.6511

Epoch 3/40
106/105 ━━━━━━━━━━━━━ -3s remaining - accuracy: 0.1810 - loss: 6.1191
105/105 ━━━━━━━━━━━━━ 396s total - accuracy: 0.1827 - loss: 6.1774

Epoch 4/40
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1949 - loss: 5.8362
105/105 ━━━━━━━━━━━━━ 46s total - accuracy: 0.1968 - loss: 5.8918

Epoch 5/40
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2046 - loss: 5.6328
105/105 ━━━━━━━━━━━━━ 42s total - accuracy: 0.2065 - loss: 5.6864

Epoch 6/40
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2127 - loss: 5.4645
105/105 ━━━━━━━━━━━━━ 46s total - accuracy: 0.2147 - loss: 5.5166

Epoch 7/40
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2213 - loss: 5.3102
105/105 ━━━━━━━━━━━━━ 47s total - a

In [None]:
import tensorflow as tf
# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd


# Hyperparameters (Update for Each Experiment)
embedding_dim = 150 # Adjust embedding dimension
hidden_units = 300   # Adjust GRU units
sequence_length = 20 # Sequence length
batch_size = 32      # Adjust batch size
epochs = 50          # Increase number of epochs
learning_rate = 0.0005  # Learning rate

# Load Preprocessed Data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")
unique_items = np.load("unique_items.npy")

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.2),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, f"evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv")
print(f"Results saved to {os.path.join(base_path, f'evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv')}")



Epoch 1/50
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2233 - loss: 7.5001
105/105 ━━━━━━━━━━━━━ 54s total - accuracy: 0.2255 - loss: 7.5715

Epoch 2/50
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1617 - loss: 6.4951
105/105 ━━━━━━━━━━━━━ 51s total - accuracy: 0.1632 - loss: 6.5570

Epoch 3/50
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1733 - loss: 6.0162
105/105 ━━━━━━━━━━━━━ 50s total - accuracy: 0.1750 - loss: 6.0735

Epoch 4/50
106/105 ━━━━━━━━━━━━━ -5s remaining - accuracy: 0.1888 - loss: 5.7242
105/105 ━━━━━━━━━━━━━ 534s total - accuracy: 0.1906 - loss: 5.7787

Epoch 5/50
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1999 - loss: 5.5027
105/105 ━━━━━━━━━━━━━ 26s total - accuracy: 0.2018 - loss: 5.5551

Epoch 6/50
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2095 - loss: 5.3053
105/105 ━━━━━━━━━━━━━ 26s total - accuracy: 0.2115 - loss: 5.3559

Epoch 7/50
106/105 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.2203 - loss: 5.1145
105/105 ━━━━━━━━━━━━━ 30s total - a

In [None]:
import tensorflow as tf
# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd


# Hyperparameters (Update for Each Experiment)
embedding_dim = 250 # Adjust embedding dimension
hidden_units = 450   # Adjust GRU units
sequence_length = 20 # Sequence length
batch_size = 64      # Adjust batch size
epochs = 50          # Increase number of epochs
learning_rate = 0.00025  # Learning rate

# Load Preprocessed Data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")
unique_items = np.load("unique_items.npy")

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.2),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, f"evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv")
print(f"Results saved to {os.path.join(base_path, f'evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv')}")



Epoch 1/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1994 - loss: 7.9539
52/52 ━━━━━━━━━━━━━ 23s total - accuracy: 0.2032 - loss: 8.1069

Epoch 2/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1620 - loss: 7.0527
52/52 ━━━━━━━━━━━━━ 22s total - accuracy: 0.1651 - loss: 7.1883

Epoch 3/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1596 - loss: 6.5329
52/52 ━━━━━━━━━━━━━ 23s total - accuracy: 0.1627 - loss: 6.6585

Epoch 4/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1756 - loss: 6.2261
52/52 ━━━━━━━━━━━━━ 24s total - accuracy: 0.1790 - loss: 6.3459

Epoch 5/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1851 - loss: 6.0193
52/52 ━━━━━━━━━━━━━ 24s total - accuracy: 0.1887 - loss: 6.1351

Epoch 6/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1924 - loss: 5.8628
52/52 ━━━━━━━━━━━━━ 25s total - accuracy: 0.1961 - loss: 5.9756

Epoch 7/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1994 - loss: 5.7370
52/52 ━━━━━━━━━━━━━ 25s total - accuracy: 0.2032 - loss: 5.8474

In [None]:
import tensorflow as tf
# CPU Optimization Settings
tf.config.threading.set_intra_op_parallelism_threads(16)
tf.config.threading.set_inter_op_parallelism_threads(8)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import time
import os
import pandas as pd


# Hyperparameters (Update for Each Experiment)
embedding_dim = 250 # Adjust embedding dimension
hidden_units = 450   # Adjust GRU units
sequence_length = 20 # Sequence length
batch_size = 64      # Adjust batch size
epochs = 50          # Increase number of epochs
learning_rate = 0.0001  # Learning rate

# Load Preprocessed Data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")
unique_items = np.load("unique_items.npy")

# Prepare Data with tf.data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Build and Compile Model
model = Sequential([
    Embedding(input_dim=len(unique_items), output_dim=embedding_dim, input_length=sequence_length),
    GRU(units=hidden_units, return_sequences=False, dropout=0.3),
    Dense(units=len(unique_items), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.build(input_shape=(None, sequence_length))
model.summary()

# Training with Step and Countdown Display
steps_per_epoch = max(1, len(X_train) // batch_size)

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    start_time = time.time()
    training_loss, training_accuracy = 0, 0

    for step, (x_batch, y_batch) in enumerate(train_dataset, start=1):
        metrics = model.train_on_batch(x_batch, y_batch)
        training_loss += metrics[0]
        training_accuracy += metrics[1]

        # Estimate remaining time
        elapsed_time = time.time() - start_time
        steps_remaining = steps_per_epoch - step
        time_per_step = elapsed_time / max(1, step)
        estimated_time_remaining = steps_remaining * time_per_step

        # Dynamic updates
        print(
            f"\r{step}/{steps_per_epoch} ━━━━━━━━━━━━━ {int(estimated_time_remaining)}s remaining - "
            f"accuracy: {training_accuracy / step:.4f} - loss: {training_loss / step:.4f}", end=""
        )

    epoch_time = time.time() - start_time
    print(
        f"\n{steps_per_epoch}/{steps_per_epoch} ━━━━━━━━━━━━━ "
        f"{int(epoch_time)}s total - accuracy: {training_accuracy / steps_per_epoch:.4f} - "
        f"loss: {training_loss / steps_per_epoch:.4f}"
    )

# Evaluate the Model
def evaluate_model(model, test_data, k=10):
    total_precision, total_recall, total_hits, total_mrr = 0, 0, 0, 0
    total_users = 0

    for x, y_true in test_data:
        y_pred = model.predict(x, verbose=0)
        top_k_indices = np.argsort(y_pred, axis=1)[:, -k:]

        for idx, target in enumerate(y_true):
            predictions = top_k_indices[idx]
            target = int(target.numpy())
            if target in predictions:
                rank = np.where(predictions == target)[0][0] + 1
                total_hits += 1
                total_mrr += 1 / rank

            precision_k = len(set(predictions) & {target}) / k
            recall_k = len(set(predictions) & {target}) / 1
            total_precision += precision_k
            total_recall += recall_k

        total_users += len(y_true)

    precision = total_precision / total_users
    recall = total_recall / total_users
    hit_rate = total_hits / total_users
    mrr = total_mrr / total_users

    return precision, recall, hit_rate, mrr

# Evaluate
results = evaluate_model(model, test_dataset, k=10)
print("\nFinal Results (Precision@10, Recall@10, Hit Rate, MRR):")
print(f"({results[0]:.4f}, {results[1]:.4f}, {results[2]:.4f}, {results[3]:.4f})")

# Save Results to CSV
def save_results(results, base_path, file_name):
    result_df = pd.DataFrame([{
        "Precision@10": results[0],
        "Recall@10": results[1],
        "Hit Rate": results[2],
        "MRR": results[3],
    }])
    file_path = os.path.join(base_path, file_name)
    result_df.to_csv(file_path, index=False)

base_path = r"C:\Users\user\Desktop\CW4\online+retail"
save_results(results, base_path, f"evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv")
print(f"Results saved to {os.path.join(base_path, f'evaluation_results_experiment_{embedding_dim}_{hidden_units}_{epochs}.csv')}")



Epoch 1/50
53/52 ━━━━━━━━━━━━━ -2s remaining - accuracy: 0.1210 - loss: 8.0974
52/52 ━━━━━━━━━━━━━ 120s total - accuracy: 0.1233 - loss: 8.2532

Epoch 2/50
53/52 ━━━━━━━━━━━━━ -2s remaining - accuracy: 0.1420 - loss: 7.7949
52/52 ━━━━━━━━━━━━━ 119s total - accuracy: 0.1447 - loss: 7.9448

Epoch 3/50
47/52 ━━━━━━━━━━━━━ 11s remaining - accuracy: 0.1447 - loss: 7.1623

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x00000136E5664490>>
Traceback (most recent call last):
  File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


53/52 ━━━━━━━━━━━━━ -8s remaining - accuracy: 0.1450 - loss: 7.1409
52/52 ━━━━━━━━━━━━━ 450s total - accuracy: 0.1478 - loss: 7.2783

Epoch 4/50
53/52 ━━━━━━━━━━━━━ -1s remaining - accuracy: 0.1462 - loss: 6.7719
52/52 ━━━━━━━━━━━━━ 55s total - accuracy: 0.1490 - loss: 6.9021

Epoch 5/50
53/52 ━━━━━━━━━━━━━ -1s remaining - accuracy: 0.1473 - loss: 6.5394
52/52 ━━━━━━━━━━━━━ 60s total - accuracy: 0.1502 - loss: 6.6652

Epoch 6/50
53/52 ━━━━━━━━━━━━━ -1s remaining - accuracy: 0.1574 - loss: 6.3688
52/52 ━━━━━━━━━━━━━ 55s total - accuracy: 0.1604 - loss: 6.4913

Epoch 7/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1667 - loss: 6.2346
52/52 ━━━━━━━━━━━━━ 49s total - accuracy: 0.1699 - loss: 6.3545

Epoch 8/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1736 - loss: 6.1258
52/52 ━━━━━━━━━━━━━ 48s total - accuracy: 0.1769 - loss: 6.2436

Epoch 9/50
53/52 ━━━━━━━━━━━━━ 0s remaining - accuracy: 0.1789 - loss: 6.0347
52/52 ━━━━━━━━━━━━━ 50s total - accuracy: 0.1823 - loss: 6.1508

Epoch