In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model

In [2]:
def create_category_embeddings(categories):
    vocabulary = sorted(list(set(categories)))
    cat_to_idx = {cat: idx for idx, cat in enumerate(vocabulary)}

    num_categories = len(vocabulary)
    one_hot = np.zeros((len(categories), num_categories))
    for i, cat in enumerate(categories):
        one_hot[i, cat_to_idx[cat]] = 1

    return one_hot, vocabulary

def cosine_similarity_matrix(vectors):
    normalized = tf.nn.l2_normalize(vectors, axis=1)
    similarity = tf.matmul(normalized, normalized, transpose_b=True)
    return similarity

In [3]:
class TFRecommender:
    def __init__(self):
        self.category_matrix = None
        self.vocabulary = None
        self.data = None
        self.similarity_matrix = None

    def fit(self, data):
        self.data = data
        categories = data['category'].values

        self.category_matrix, self.vocabulary = create_category_embeddings(categories)
        self.similarity_matrix = cosine_similarity_matrix(
            tf.constant(self.category_matrix, dtype=tf.float32)
        )

    def get_nearest_neighbors(self, idx, k=10):
        similarities = self.similarity_matrix[idx]
        values, indices = tf.math.top_k(similarities, k=k+1)
        return values[1:].numpy(), indices[1:].numpy()

def recommend_low_sugar_tf(recommender, data, product_id, n=5, sugar_threshold=20):
    if product_id not in data['product_id'].values:
        return f"Produk dengan ID '{product_id}' tidak ditemukan dalam dataset."

    product_index = data[data['product_id'] == product_id].index[0]

    similarities, indices = recommender.get_nearest_neighbors(product_index, k=n*2)

    recommended_lowsugar = []
    for idx in indices:
        product_name = data.iloc[idx]['product_name']
        sugar_content = data.iloc[idx]['sugar_intake']

        if pd.notna(sugar_content) and sugar_content < sugar_threshold:
            recommended_lowsugar.append({
                'product_id': data.iloc[idx]['product_id'],
                'product_name': product_name,
                'category': data.iloc[idx]['category'],
                'sugar_intake': sugar_content,
                'similarity': similarities[len(recommended_lowsugar)]
            })

        if len(recommended_lowsugar) >= n:
            break

    if not recommended_lowsugar:
        return f"Tidak ada produk rendah gula yang ditemukan di sekitar produk ID '{product_id}'."

    recommended_lowsugar_data = pd.DataFrame(recommended_lowsugar)
    return recommended_lowsugar_data[['product_id', 'product_name', 'category', 'sugar_intake', 'similarity']]

def evaluate_recommendation_system_tf(recommender, data, test_products, n=5, sugar_threshold=20):
    precision_scores = []
    recall_scores = []
    sugar_differences = []
    diversity_scores = []

    for product_id in test_products:
        if product_id not in data['product_id'].values:
            print(f"Produk dengan ID '{product_id}' tidak ditemukan dalam dataset.")
            continue

        recommended = recommend_low_sugar_tf(recommender, data, product_id, n=n, sugar_threshold=sugar_threshold)

        if isinstance(recommended, str):
            print(recommended)
            continue

        correct_recommendations = recommended['sugar_intake'] < sugar_threshold
        precision = correct_recommendations.sum() / len(recommended)
        precision_scores.append(precision)

        input_category = data.loc[data['product_id'] == product_id, 'category'].values[0]
        possible_low_sugar = data[(data['category'] == input_category) & (data['sugar_intake'] < sugar_threshold)]
        recall = correct_recommendations.sum() / len(possible_low_sugar) if len(possible_low_sugar) > 0 else 0
        recall_scores.append(recall)

        input_sugar = data.loc[data['product_id'] == product_id, 'sugar_intake'].values[0]
        avg_difference = (input_sugar - recommended['sugar_intake']).mean()
        sugar_differences.append(avg_difference)

        diversity = recommended['category'].nunique()
        diversity_scores.append(diversity)

    metrics = {
        'Average Precision': np.mean(precision_scores) if precision_scores else 0,
        'Average Recall': np.mean(recall_scores) if recall_scores else 0,
        'Average Sugar Difference': np.mean(sugar_differences) if sugar_differences else 0,
        'Average Diversity': np.mean(diversity_scores) if diversity_scores else 0
    }

    return metrics

In [4]:
data = pd.read_csv("/content/products_fixed.csv")
data['sugar_intake'] = data['sugar_intake'].str.replace(',', '.').astype(float)

recommender = TFRecommender()
recommender.fit(data)

product_id = 8998888121943
n_recommendations = 10
sugar_threshold = 20

result = recommend_low_sugar_tf(recommender, data, product_id, n_recommendations, sugar_threshold)
print("\nRecommendations:")
print(result)

test_products = data['product_id'].sample(10).tolist()
metrics = evaluate_recommendation_system_tf(recommender, data, test_products, n=5, sugar_threshold=20)
print("\nEvaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.2f}")


Recommendations:
      product_id                                       product_name  \
0  8992696527447  Nescafe Minuman Kopi Susu Rasa Coconut Latte C...   
1  8998866202893            Golda Minuman Kopi Cappucino Pet 200 ml   
2  8998866201841          Golda Minuman Kopi Dolce Latte Pet 200 ml   
3  9556001288592                   Nescafe Latte Kopi Kaleng 220 ml   
4  8991002122017                       ABC Minuman Kopi Susu 200 ml   
5  8994171102101             Luwak White Koffie Original Pet 220 ml   
6  8991002122000              ABC Minuman Kopi Chocomalt Pet 200 ml   
7  9556001295248               Nescafe Minuman Es Kopi Hitam 220 ml   
8  9556001288547              Nescafe Cappuccino Kopi Kaleng 220 ml   
9  9556001288561       Nescafe Caramel Macchiato Kopi Kaleng 220 ml   

          category  sugar_intake  similarity  
0  Kopi Siap Minum          14.0         1.0  
1  Kopi Siap Minum          19.0         1.0  
2  Kopi Siap Minum          15.0         1.0  
3  Kopi Siap