In [28]:
import numpy as np
import pandas as pd
from surprise import Dataset
import tensorflow as tf
from keras import Sequential, Model
from keras.layers import Layer, Input, Dense, Dropout, Flatten, Reshape, Embedding, Concatenate, Dot, BatchNormalization, LeakyReLU
from keras.optimizers import Adam
from keras.losses import MeanSquaredError, BinaryCrossentropy
from sklearn.model_selection import train_test_split
import neptune
from neptune.integrations.tensorflow_keras import NeptuneCallback


In [2]:
df = pd.read_csv('ml-100k/u.data', delimiter = '\t', usecols=[0, 1, 2], names=['user_id', 'item_id', 'rating'])

In [3]:
df

Unnamed: 0,user_id,item_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1
...,...,...,...
99995,880,476,3
99996,716,204,5
99997,276,1090,1
99998,13,225,2


In [4]:
num_users, num_items = df['user_id'].nunique(), df['item_id'].nunique()

In [5]:
x_train, x_test, y_train, y_test = train_test_split(df[df.columns[:-1]].values, df[df.columns[-1]].values, test_size=0.2, random_state=40)

In [6]:
latent_dim = 5

In [20]:
item_input = Input(shape=[1], name='item_input')
item_embedding = Embedding(num_items + 1, latent_dim, name='item_embedding')(item_input)
item_flatten = Flatten(name='item_flatten')(item_embedding)

In [21]:
user_input = Input(shape=[1], name='user_input')
user_embedding = Embedding(num_users + 1, latent_dim, name='user_embedding')(user_input)
user_flatten = Flatten(name='user_flatten')(user_embedding)

In [22]:
rating_vec = Dot(axes=1, name='rating_dot')([item_flatten, user_flatten])

In [23]:
deepmf = Model([user_input, item_input], rating_vec)

In [24]:
deepmf.compile(optimizer=Adam(), metrics=['mae'], loss=MeanSquaredError())

In [25]:
deepmf.summary()

In [26]:
run = neptune.init_run(
    project="sagnik-main/GANRS",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI5Mjc5ZmJhYi0wNWUyLTQ3OWItYTY1YS0wNTY2MTMyM2IxYzgifQ==",
)

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/sagnik-main/GANRS/e/GAN-2


In [27]:
deepmf.fit(
    [x_train[:, 0], x_train[:, 1]],
    y_train,
    validation_data=([x_test[:, 0], x_test[:, 1]], y_test),
    epochs=10,
    callbacks=NeptuneCallback(run=run, base_namespace='training')
)

Epoch 1/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 694us/step - loss: 13.0105 - mae: 3.4182 - val_loss: 5.2803 - val_mae: 2.0057
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 588us/step - loss: 3.8035 - mae: 1.6258 - val_loss: 1.8339 - val_mae: 1.0604
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 758us/step - loss: 1.5856 - mae: 0.9855 - val_loss: 1.2414 - val_mae: 0.8637
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 742us/step - loss: 1.1309 - mae: 0.8300 - val_loss: 1.0527 - val_mae: 0.7972
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 706us/step - loss: 0.9799 - mae: 0.7749 - val_loss: 0.9793 - val_mae: 0.7701
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 649us/step - loss: 0.9286 - mae: 0.7572 - val_loss: 0.9472 - val_mae: 0.7586
Epoch 7/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x773c2c75b990>

In [30]:
class GAN():
    
    def __init__(self):
        self.latent_dim = latent_dim
        self.noise_dim = 100
        
        self.optimizer = Adam(0.0002, 0.5)
        
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss=BinaryCrossentropy(),
            optimizer=self.optimizer,
            metrics=['accuracy'])

        self.generator = self.build_generator()

        z = Input(shape=(self.noise_dim,))
        fake_sample = self.generator(z)

        self.discriminator.trainable = False

        validity = self.discriminator(fake_sample)

        self.combined = Model(z, validity)
        self.combined.compile(loss='binary_crossentropy', optimizer=self.optimizer)
    
    def build_generator(self):
        model = Sequential([
            Dense(10, input_dim=self.noise_dim),
            LeakyReLU(alpha=0.2),
            BatchNormalization(momentum=0.8),
            Dense(20, input_dim=self.noise_dim),
            LeakyReLU(alpha=0.2),
            Dropout(rate=0.2),
            Dense(self.latent_dim * 2 + 1, activation='linear'),
        ])
        
        model.summary()
        
        noise = Input(shape=(self.noise_dim, ))
        sample = model(noise)
        
        return Model(noise, sample)
    
    def build_discriminator(self):
        model = Sequential([
            Dense(4, input_dim=self.latent_dim * 2 + 1),
            LeakyReLU(alpha=0.2),
            Dense(1, activation='sigmoid')
        ])
        
        model.summary()
        
        sample = Input(shape=(self.latent_dim * 2 + 1, ))
        validity = model(sample)
        
        return Model(sample, validity)
    
    def train(self, dataset, epochs, batch_size=128, sample_interval=50, num_training_samples=100000):
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))
        
        for epoch in range(epochs):
            
            idx = np.random.randint(0, num_training_samples, batch_size)
            real_samples = dataset[idx]
            
            noise = np.random.normal(0, 1, (batch_size, self.noise_dim))

            fake_samples = self.generator.predict(noise)

            d_loss_real = self.discriminator.train_on_batch(real_samples, valid)
            d_loss_fake = self.discriminator.train_on_batch(fake_samples, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            noise = np.random.normal(0, 1, (batch_size, self.noise_dim))

            g_loss = self.combined.train_on_batch(noise, valid)

            if epoch % sample_interval == 0:
                print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
                noise = np.random.normal(0, 1, (1, self.noise_dim))