In [3]:
import numpy as np
import pandas as pd
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, Flatten, Input, Reshape, Concatenate
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Create a synthetic rating matrix for example (user-item ratings matrix)
# Replace this with your actual rating matrix
num_users = 100
num_items = 50
rating_matrix = pd.DataFrame(np.random.rand(num_users, num_items) * 5, columns=[f"Item{i}" for i in range(num_items)])

# Normalize the ratings between 0 and 1
normalized_ratings = (rating_matrix - rating_matrix.min()) / (rating_matrix.max() - rating_matrix.min())

# Create user-item pairs for training
user_item_pairs = np.array([(i, j) for i in range(num_users) for j in range(num_items)])

# Mask for missing values (for simplicity, we set 10% of ratings as missing)
missing_mask = np.random.rand(num_users, num_items) < 0.1
rating_matrix[missing_mask] = np.nan

# Build Generator Model
def build_generator():
    user_input = Input(shape=(1,))
    item_input = Input(shape=(1,))
    
    user_embedding = Embedding(input_dim=num_users, output_dim=10)(user_input)
    item_embedding = Embedding(input_dim=num_items, output_dim=10)(item_input)
    
    user_vec = Flatten()(user_embedding)
    item_vec = Flatten()(item_embedding)
    
    concatenated = Concatenate()([user_vec, item_vec])
    hidden = Dense(64, activation='relu')(concatenated)
    output = Dense(1, activation='sigmoid')(hidden)
    
    generator = Model(inputs=[user_input, item_input], outputs=output)
    return generator

# Build Discriminator Model
def build_discriminator():
    user_input = Input(shape=(1,))
    item_input = Input(shape=(1,))
    
    user_embedding = Embedding(input_dim=num_users, output_dim=10)(user_input)
    item_embedding = Embedding(input_dim=num_items, output_dim=10)(item_input)
    
    user_vec = Flatten()(user_embedding)
    item_vec = Flatten()(item_embedding)
    
    concatenated = Concatenate()([user_vec, item_vec])
    hidden = Dense(64, activation='relu')(concatenated)
    output = Dense(1, activation='sigmoid')(hidden)
    
    discriminator = Model(inputs=[user_input, item_input], outputs=output)
    discriminator.compile(optimizer=Adam(learning_rate=0.0002), loss='binary_crossentropy', metrics=['accuracy'])
    return discriminator

# Build GAN (Generator + Discriminator)
def build_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = [Input(shape=(1,)), Input(shape=(1,))]  # GAN takes two inputs: user and item
    generated_rating = generator(gan_input)  # Output of generator
    gan_output = discriminator([gan_input[0], gan_input[1]])  # Discriminator takes user and item inputs
    gan = Model(inputs=gan_input, outputs=gan_output)
    gan.compile(optimizer=Adam(learning_rate=0.0002), loss='binary_crossentropy')
    return gan

# Training function for GAN
def train_gan(generator, discriminator, gan, user_item_pairs, ratings, epochs=100, batch_size=64):
    for epoch in range(epochs):
        # Train discriminator
        idx = np.random.randint(0, user_item_pairs.shape[0], batch_size)
        real_user_item = user_item_pairs[idx]
        real_ratings = ratings[real_user_item[:, 0], real_user_item[:, 1]]
        
        fake_user_item = user_item_pairs[np.random.randint(0, user_item_pairs.shape[0], batch_size)]
        fake_ratings = generator.predict([fake_user_item[:, 0], fake_user_item[:, 1]])
        
        real_labels = np.ones(batch_size)
        fake_labels = np.zeros(batch_size)
        
        d_loss_real = discriminator.train_on_batch([real_user_item[:, 0], real_user_item[:, 1]], real_labels)
        d_loss_fake = discriminator.train_on_batch([fake_user_item[:, 0], fake_user_item[:, 1]], fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
        
        # Train generator
        idx = np.random.randint(0, user_item_pairs.shape[0], batch_size)
        g_loss = gan.train_on_batch([user_item_pairs[idx][:, 0], user_item_pairs[idx][:, 1]], np.ones(batch_size))
        
        if epoch % 10 == 0:
            print(f"Epoch {epoch}/{epochs} | D Loss: {d_loss[0]} | G Loss: {g_loss}")

# Evaluate the model
def evaluate_model(generator, user_item_pairs, actual_ratings, missing_mask, test_mask):
    # Generate predictions for missing test ratings
    predicted_test_ratings = generator.predict([user_item_pairs[:, 0], user_item_pairs[:, 1]])
    
    # Filter out the predicted ratings where the original ratings are available (non-missing)
    actual_test_ratings = actual_ratings.flatten()[test_mask]
    predicted_test_ratings = predicted_test_ratings.flatten()[test_mask]
    
    # Calculate evaluation metrics - MAE and RMSE
    mae = mean_absolute_error(actual_test_ratings, predicted_test_ratings)
    rmse = np.sqrt(mean_squared_error(actual_test_ratings, predicted_test_ratings))
    
    return mae, rmse

# Split data into training and testing sets (80% training, 20% testing)
train_mask, test_mask = train_test_split(np.array(range(rating_matrix.size)), test_size=0.2, random_state=42)

# Create training and testing ratings matrix
train_matrix = rating_matrix.values.flatten()[train_mask]
test_matrix = rating_matrix.values.flatten()[test_mask]

# Create user-item pairs for training and testing
user_item_pairs = np.array([(i, j) for i in range(num_users) for j in range(num_items)])
train_user_item_pairs = user_item_pairs[train_mask]
test_user_item_pairs = user_item_pairs[test_mask]

# Build the models
generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)

# Train the model
train_gan(generator, discriminator, gan, train_user_item_pairs, normalized_ratings.values, epochs=100, batch_size=64)

# Evaluate the model
mae, rmse = evaluate_model(generator, test_user_item_pairs, normalized_ratings.values, missing_mask, test_mask)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step  




Epoch 0/100 | D Loss: 0.6936526298522949 | G Loss: [array(0.6923928, dtype=float32), array(0.6923928, dtype=float32), array(0.5546875, dtype=float32)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1000us/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Epoch 10/100 | D Loss: 0.693328320980072 | G Loss: [array(0.6932366, dtype=float32), array(0.6932366, dtype=float32), array(0.5

IndexError: index 1501 is out of bounds for axis 0 with size 1000