In [1]:
pip install tensorflow python-dotenv pandas numpy scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


## Import Necessary Libraries

In [1]:
import os
import requests
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from dotenv import load_dotenv




## Load Data

In [2]:
load_dotenv()  # Load environment variables from .env file
jwt_token = os.getenv('JWT_TOKEN')

headers = {'Authorization': f'Bearer {jwt_token}'}

api_urls = {
    'interactions': 'http://161.97.109.65:3000/api/interactions',
    'users': 'http://161.97.109.65:3000/api/users',
    'products': 'http://161.97.109.65:3000/api/products'
}

def fetch_data(url, headers):
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raises an HTTPError for bad requests
        data = pd.DataFrame(response.json())
        print(f"Data successfully fetched from {url}")
        print(data.head())  # Display the first few rows of the DataFrame
        return data
    except requests.RequestException as e:
        print(f'Failed to fetch data from {url}: {str(e)}')
        return pd.DataFrame()

# Fetch data from APIs
users = fetch_data(api_urls['users'], headers)
products = fetch_data(api_urls['products'], headers)
interactions = fetch_data(api_urls['interactions'], headers)

# Check if data was fetched successfully
if not users.empty and not products.empty and not interactions.empty:
    print("All data fetched successfully.")
else:
    print("Data fetching failed, check errors and retry.")
    # Optionally, add logic to halt further processing if data is crucial


Data successfully fetched from http://161.97.109.65:3000/api/users
                        _id       firstname       lastname         username  \
0  6665e9847aa0dfec0ad43b26         Machine       Learning  machinelearning   
1  6665eab57aa0dfec0ad43b2a  DummyFirstname  DummyLastname       dummydata1   
2  6665eac87aa0dfec0ad43b2d  DummyFirstname  DummyLastname       dummydata2   
3  6665eacc7aa0dfec0ad43b30  DummyFirstname  DummyLastname       dummydata3   
4  6665eacf7aa0dfec0ad43b33  DummyFirstname  DummyLastname       dummydata4   

                  email         phone  \
0          ml@admin.com    7777777777   
1  dummydata1@admin.com  777777770001   
2  dummydata2@admin.com  777777770002   
3  dummydata3@admin.com  777777770003   
4  dummydata4@admin.com  777777770004   

                                            password           address  \
0  $2a$10$oNSoSQcmxvHAefk5dKx0UuJw8oSdGeCumA.ZqIN...  Bangkit Capstone   
1  $2a$10$ihldsbescWBR9v94/sRhReBpX8mZMGrRpwkUohU...   Dummy Ad

## Data Preprocessing

In [4]:
# Let's assume 'interactions' is a DataFrame with a column containing dictionaries
# First, ensure that the 'interactions' column is appropriately normalized
if 'interactions' in interactions.columns:
    interactions_expanded = pd.json_normalize(interactions['interactions'])
else:
    interactions_expanded = pd.json_normalize(interactions.iloc[:, 0])  # If 'interactions' is the name of DataFrame and not a column

# Assuming the JSON data has keys 'userId', 'productId', and 'interactionValue'
interactions_expanded['user_id'] = interactions_expanded['userId']
interactions_expanded['product_id'] = interactions_expanded['productId']
interactions_expanded['interaction_value'] = interactions_expanded['interactionValue']

# Encode user_id and product_id
user_ids = interactions_expanded['user_id'].unique().tolist()
product_ids = interactions_expanded['product_id'].unique().tolist()

user2user_encoded = {x: i for i, x in enumerate(user_ids)}
product2product_encoded = {x: i for i, x in enumerate(product_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
productencoded2product = {i: x for i, x in enumerate(product_ids)}

interactions_expanded['user'] = interactions_expanded['user_id'].map(user2user_encoded)
interactions_expanded['product'] = interactions_expanded['product_id'].map(product2product_encoded)

# Split the data
train, test = train_test_split(interactions_expanded, test_size=0.2, random_state=42)

# Convert data into required format
x_train = [train['user'].values, train['product'].values]
y_train = train['interaction_value'].values
x_test = [test['user'].values, test['product'].values]
y_test = test['interaction_value'].values

## Define Model

In [5]:
user_input = Input(shape=(1,))
user_embedding = Embedding(len(user2user_encoded), 50)(user_input)  # Removed input_length
user_vec = Flatten()(user_embedding)

product_input = Input(shape=(1,))
product_embedding = Embedding(len(product2product_encoded), 50)(product_input)  # Removed input_length
product_vec = Flatten()(product_embedding)

dot_product = Dot(axes=1)([user_vec, product_vec])
model = Model(inputs=[user_input, product_input], outputs=dot_product)
model.compile(optimizer=Adam(), loss='mean_squared_error')





## Train Model

In [6]:
# Assuming 'train' and 'test' datasets are already split and preprocessed
x_train = [np.array(train['user']), np.array(train['product'])]
y_train = np.array(train['interaction_value'])

# Fit the model
history = model.fit(
    x_train, 
    y_train, 
    epochs=20, 
    verbose=1, 
    validation_split=0.2
)

# Optionally, you can add callbacks, for example to save the best model or early stopping
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the path to the model directory
model_dir = os.path.join('..', 'model', 'collaborative_model.keras')

# Setup callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True),
    ModelCheckpoint(model_dir, monitor='val_loss', save_best_only=True, verbose=1)
]

# Fit the model with callbacks
history = model.fit(
    x_train, 
    y_train, 
    epochs=20, 
    verbose=1, 
    validation_split=0.2,
    callbacks=callbacks
)

Epoch 1/20

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 1: val_loss improved from inf to 4.04671, saving model to ..\model\collaborative_model.keras
Epoch 2/20
Epoch 2: val_loss improved from 4.04671 to 4.04362, saving model to ..\model\collaborative_model.keras
Epoch 3/20
Epoch 3: val_loss improved from 4.04362 to 4.04158, saving model to ..\model\collaborative_model.keras
Epoch 4/20
Epoch 4: val_loss improved from 4.04158 to 4.04039, saving model to ..\model\collaborative_model.keras
Epoch 5/20
Epoch 5: val_loss improved from 4.04039 to 4.03951, saving model to ..\model\collaborative_model.keras
Epoch 6/20
Epoch 6: val_loss improved from 4.03951 to 4.03834, saving model to ..\model\collaborative_model.keras
Epoch 7/20
Epoch 7: val_loss improved from 4.03834 to 4.03790, saving model to ..\model\

## Try Model

In [8]:
# Function to get recommendations for a specific user
def recommend_products(user_id, model, interactions, user2user_encoded, product2product_encoded, productencoded2product, products, top_n=30):
    # Check if user_id is in the encoding map
    if user_id not in user2user_encoded:
        print(f"User ID {user_id} not found.")
        return pd.DataFrame()

    user_encoded = user2user_encoded[user_id]
    # Get all encoded product IDs as a list of integers
    product_ids = list(product2product_encoded.values())

    # Create user-product array for prediction
    # Ensure all entries are integers for the model input
    user_product_array = np.array([[user_encoded] * len(product_ids), product_ids]).T.astype(int)

    # Predict interaction values using the model
    predictions = model.predict([user_product_array[:, 0], user_product_array[:, 1]])
    predictions = predictions.flatten()

    # Get top N product indices
    top_indices = predictions.argsort()[-top_n:][::-1]
    # Decode the top indices to product IDs
    recommended_product_ids = [productencoded2product[x] for x in top_indices]
 
    # Filter the products DataFrame to get recommended products using the correct column name
    recommended_products = products[products['_id'].isin(recommended_product_ids)]
    return recommended_products

# Try the model with the specified user ID
user_id = '6665eab57aa0dfec0ad43b2a'
recommended_products = recommend_products(user_id, model, interactions, user2user_encoded, product2product_encoded, productencoded2product, products)
print(f"Recommended products for user {user_id}:")
print(recommended_products)

Recommended products for user 6665eab57aa0dfec0ad43b2a:
                           _id     category     price  \
102   6667ef73b3e75416b2fa7e99         Meja   62000.0   
104   6667ef73b3e75416b2fa7e9b         Meja   67900.0   
135   6667ef73b3e75416b2fa7eba         Meja  144250.0   
140   6667ef73b3e75416b2fa7ebf         Meja  195000.0   
145   6667ef73b3e75416b2fa7ec4         Meja  174982.0   
174   6667ef73b3e75416b2fa7ee1         Meja  120000.0   
198   6667ef73b3e75416b2fa7ef9         Meja   88000.0   
243   6667ef73b3e75416b2fa7f26         Meja  239000.0   
300   6667ef73b3e75416b2fa7f5f        Kasur   85400.0   
366   6667ef73b3e75416b2fa7fa1        Kasur  139900.0   
400   6667ef73b3e75416b2fa7fc3        Kasur  239000.0   
409   6667ef73b3e75416b2fa7fcc        Kasur  313900.0   
433   6667ef73b3e75416b2fa7fe4  Kipas Angin  199000.0   
491   6667ef73b3e75416b2fa801e  Kipas Angin   31500.0   
515   6667ef73b3e75416b2fa8036  Kipas Angin  175000.0   
542   6667ef73b3e75416b2fa8051  