# **Collaborative Flitering**



Import Library

In [20]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
from sklearn.model_selection import KFold

# **Load Dataset**

In [4]:
# Step 1: Load dataset
df = pd.read_csv('https://raw.githubusercontent.com/kucingku-capstone/machine-learning/main/dataset/clean_cats_dataset.csv')


# **Data Preprocessing**

In [5]:
# Step 2: Data preprocessing
user_encoder = LabelEncoder()
cat_encoder = LabelEncoder()

df['user_id'] = user_encoder.fit_transform(df['user_id'].astype(str))
df['cat_id'] = cat_encoder.fit_transform(df['cat_id'].astype(str))


# **Split Dataset into Training and Testing Sets**

In [22]:
# Step 3: Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


# **Create Collaborative Filtering Model using TensorFlow**

In [23]:
# Step 4: Create the collaborative filtering model using TensorFlow
num_users = len(user_encoder.classes_)
num_cats = len(cat_encoder.classes_)
embedding_size = 50

# User embedding layer
user_input = Input(shape=(1,), name='user_input')
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size, input_length=1)(user_input)
user_embedding = Flatten()(user_embedding)

# Cat embedding layer
cat_input = Input(shape=(1,), name='cat_input')
cat_embedding = Embedding(input_dim=num_cats, output_dim=embedding_size, input_length=1)(cat_input)
cat_embedding = Flatten()(cat_embedding)

# Concatenate user and cat embeddings
concatenated = Concatenate()([user_embedding, cat_embedding])

# Add additional dense layers
dense_layer_1 = Dense(128, activation='relu')(concatenated)
dense_layer_2 = Dense(64, activation='relu')(dense_layer_1)

# Dot product of user and cat embeddings
dot_product = Dense(1, activation='linear')(dense_layer_2)

# Combine all layers into a model
model = Model(inputs=[user_input, cat_input], outputs=dot_product)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')


# Train the Model

In [10]:
# Train the model
history = model.fit(
    x=[train_df['user_id'], train_df['cat_id']],
    y=train_df['rating'],
    epochs=10,
    batch_size=64,
    validation_split=0.2
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# **Evaluate the Model on the Test Set**

In [11]:
# Step 5: Evaluate the model on the test set
test_loss = model.evaluate(x=[test_df['user_id'], test_df['cat_id']], y=test_df['rating'])
print(f'Test Loss: {test_loss}')


Test Loss: 0.923750102519989


# **Make Predictions**

In [12]:
# Rest of the code for making predictions...
# Step 6: Make predictions for a specific user
user_index_example = 0
user_input_example = np.array([user_index_example])

# Provide a single cat index for each prediction
cat_indices = np.arange(num_cats)

# Reshape user input to have the same shape as cat_indices
user_input_example = np.repeat(user_input_example, num_cats)

predictions = model.predict([user_input_example, cat_indices])

# Get top recommendations
top_cat_indices = np.argsort(predictions.flatten())[::-1][:5]
top_cat_ids = cat_encoder.inverse_transform(top_cat_indices)

print(top_cat_ids)

['46118240' '31958270' '34313667' '46209900' '40550873']


# **Save Model To H5**

In [13]:
# Save model to h5
model.save("collaborative_model.h5")

  saving_api.save_model(


# **Loss Test from model**

In [15]:
# Load the model from the h5 file
loaded_model = tf.keras.models.load_model("collaborative_model.h5")

# Evaluate the loaded model on the test set
test_loss = loaded_model.evaluate(x=[test_df['user_id'], test_df['cat_id']], y=test_df['rating'])
print(f'Test Loss: {test_loss}')

Test Loss: 0.923750102519989


# **Convert to TFLITE**

In [17]:
import tensorflow as tf

# Load the Keras model
loaded_model = tf.keras.models.load_model("collaborative_model.h5")

# Convert the Keras model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(loaded_model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open("collaborative_model.tflite", "wb") as f:
    f.write(tflite_model)
