In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model

In [2]:
# Load MovieLens 100K dataset
url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
data = pd.read_csv(url, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])

In [3]:
# Parameters
latent_dim = 10
num_users = data['user_id'].nunique()  # Number of unique users
num_items = data['item_id'].nunique()  # Number of unique items
batch_size = 64
epochs = 100

In [4]:
# Normalize the ratings to be between 0 and 1
ratings = data['rating'].values
ratings = (ratings - ratings.min()) / (ratings.max() - ratings.min())

# Generate user and item embeddings from the data
user_embeddings = np.random.normal(size=(num_users, latent_dim))
item_embeddings = np.random.normal(size=(num_items, latent_dim))

In [5]:
# Create the interaction matrix (user-item interactions)
interaction_matrix = np.zeros((num_users, num_items))
for i, row in data.iterrows():
    user_idx = row['user_id'] - 1  # User ID starts at 1, so subtract 1 for indexing
    item_idx = row['item_id'] - 1  # Item ID starts at 1, so subtract 1 for indexing
    interaction_matrix[user_idx, item_idx] = ratings[i]

In [6]:
# Generator model
def build_generator(latent_dim, num_items):
    user_input = layers.Input(shape=(latent_dim,))
    noise_input = layers.Input(shape=(latent_dim,))
    merged = layers.Concatenate()([user_input, noise_input])
    x = layers.Dense(128, activation='relu')(merged)
    x = layers.Dense(256, activation='relu')(x)
    generated_items = layers.Dense(num_items, activation='sigmoid')(x)
    return Model([user_input, noise_input], generated_items)

In [7]:
# Discriminator model
def build_discriminator(latent_dim, num_items):
    user_input = layers.Input(shape=(latent_dim,))
    item_input = layers.Input(shape=(num_items,))
    merged = layers.Concatenate()([user_input, item_input])
    x = layers.Dense(256, activation='relu')(merged)
    x = layers.Dense(128, activation='relu')(x)
    validity = layers.Dense(1, activation='sigmoid')(x)
    return Model([user_input, item_input], validity)

In [8]:
# Build models
generator = build_generator(latent_dim, num_items)
discriminator = build_discriminator(latent_dim, num_items)

# Optimizers
optimizer = tf.keras.optimizers.Adam(0.0002, 0.5)

# Compile discriminator
discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [9]:
# Build and compile the combined model
discriminator.trainable = False
user_input = layers.Input(shape=(latent_dim,))
noise_input = layers.Input(shape=(latent_dim,))
generated_items = generator([user_input, noise_input])
validity = discriminator([user_input, generated_items])
combined = Model([user_input, noise_input], validity)
combined.compile(loss='binary_crossentropy', optimizer=optimizer)

In [10]:
# Training loop
for epoch in range(epochs):
    # Select a random batch of users
    idx = np.random.randint(0, num_users, batch_size)
    users = user_embeddings[idx]

    # Generate random noise
    noise = np.random.normal(0, 1, (batch_size, latent_dim))

    # Generate a batch of fake item interactions
    fake_items = generator.predict([users, noise])

    # Get a batch of real item interactions
    real_items = interaction_matrix[idx]

    # Labels for real and fake data
    real = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))

    # Train discriminator
    d_loss_real = discriminator.train_on_batch([users, real_items], real)
    d_loss_fake = discriminator.train_on_batch([users, fake_items], fake)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train generator
    g_loss = combined.train_on_batch([users, noise], real)

    # Print progress
    print(f"Epoch {epoch + 1}/{epochs} [D loss: {d_loss[0]} | D accuracy: {d_loss[1]}] [G loss: {g_loss}]")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 




Epoch 1/100 [D loss: 0.7145473957061768 | D accuracy: 0.28515625] [G loss: [array(0.62289923, dtype=float32), array(0.62289923, dtype=float32), array(0.5234375, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Epoch 2/100 [D loss: 0.6572242975234985 | D accuracy: 0.439453125] [G loss: [array(0.62880003, dtype=float32), array(0.62880003, dtype=float32), array(0.51953125, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Epoch 3/100 [D loss: 0.6500600576400757 | D accuracy: 0.47864586114883423] [G loss: [array(0.6351319, dtype=float32), array(0.6351319, dtype=float32), array(0.5260417, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Epoch 4/100 [D loss: 0.651644229888916 | D accuracy: 0.4914899468421936] [G loss: [array(0.6423391, dtype=float32), array(0.6423391, dtype=float32), array(0.5253906, dtype=float32)]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s