In [40]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from typing import Dict, List, Tuple
from sklearn.model_selection import train_test_split

In [41]:
def create_category_embeddings(categories):
    vocabulary = sorted(list(set(categories)))
    cat_to_idx = {cat: idx for idx, cat in enumerate(vocabulary)}

    num_categories = len(vocabulary)
    one_hot = np.zeros((len(categories), num_categories))
    for i, cat in enumerate(categories):
        one_hot[i, cat_to_idx[cat]] = 1

    return one_hot, vocabulary

def cosine_similarity_matrix(vectors):
    normalized = tf.nn.l2_normalize(vectors, axis=1)
    similarity = tf.matmul(normalized, normalized, transpose_b=True)
    return similarity

In [42]:
class TFRecommender:
    def __init__(self):
        self.category_matrix = None
        self.vocabulary = None
        self.data = None
        self.similarity_matrix = None

    def fit(self, data):
        self.data = data
        categories = data['category'].values

        self.category_matrix, self.vocabulary = create_category_embeddings(categories)
        self.similarity_matrix = cosine_similarity_matrix(
            tf.constant(self.category_matrix, dtype=tf.float32)
        )

    def get_nearest_neighbors(self, idx, k=10):
        similarities = self.similarity_matrix[idx]
        values, indices = tf.math.top_k(similarities, k=k+1)
        return values[1:].numpy(), indices[1:].numpy()

def recommend_low_sugar_tf(recommender, data, product_id, n=5, sugar_threshold=20):
    if product_id not in data['product_id'].values:
        return f"Produk dengan ID '{product_id}' tidak ditemukan dalam dataset."

    product_index = data[data['product_id'] == product_id].index[0]

    similarities, indices = recommender.get_nearest_neighbors(product_index, k=n*2)

    recommended_lowsugar = []
    for idx in indices:
        product_name = data.iloc[idx]['product_name']
        sugar_content = data.iloc[idx]['sugar_intake']

        if pd.notna(sugar_content) and sugar_content < sugar_threshold:
            recommended_lowsugar.append({
                'product_id': data.iloc[idx]['product_id'],
                'product_name': product_name,
                'category': data.iloc[idx]['category'],
                'sugar_intake': sugar_content,
                'similarity': similarities[len(recommended_lowsugar)]
            })

        if len(recommended_lowsugar) >= n:
            break

    if not recommended_lowsugar:
        return f"Tidak ada produk rendah gula yang ditemukan di sekitar produk ID '{product_id}'."

    recommended_lowsugar_data = pd.DataFrame(recommended_lowsugar)
    return recommended_lowsugar_data[['product_id', 'product_name', 'category', 'sugar_intake', 'similarity']]

def evaluate_recommendation_system_tf(recommender, data, test_products, n=5, sugar_threshold=20):
    precision_scores = []
    recall_scores = []
    sugar_differences = []
    diversity_scores = []

    for product_id in test_products:
        if product_id not in data['product_id'].values:
            print(f"Produk dengan ID '{product_id}' tidak ditemukan dalam dataset.")
            continue

        recommended = recommend_low_sugar_tf(recommender, data, product_id, n=n, sugar_threshold=sugar_threshold)

        if isinstance(recommended, str):
            print(recommended)
            continue

        correct_recommendations = recommended['sugar_intake'] < sugar_threshold
        precision = correct_recommendations.sum() / len(recommended)
        precision_scores.append(precision)

        input_category = data.loc[data['product_id'] == product_id, 'category'].values[0]
        possible_low_sugar = data[(data['category'] == input_category) & (data['sugar_intake'] < sugar_threshold)]
        recall = correct_recommendations.sum() / len(possible_low_sugar) if len(possible_low_sugar) > 0 else 0
        recall_scores.append(recall)

        input_sugar = data.loc[data['product_id'] == product_id, 'sugar_intake'].values[0]
        avg_difference = (input_sugar - recommended['sugar_intake']).mean()
        sugar_differences.append(avg_difference)

        diversity = recommended['category'].nunique()
        diversity_scores.append(diversity)

    metrics = {
        'Average Precision': np.mean(precision_scores) if precision_scores else 0,
        'Average Recall': np.mean(recall_scores) if recall_scores else 0,
        'Average Sugar Difference': np.mean(sugar_differences) if sugar_differences else 0,
        'Average Diversity': np.mean(diversity_scores) if diversity_scores else 0
    }

    return metrics

In [54]:
data = pd.read_csv("products_fixed.csv")
data['sugar_intake'] = data['sugar_intake'].str.replace(',', '.').astype(float)

recommender = TFRecommender()
recommender.fit(data)

product_id = 8997009510116
n_recommendations = 10
sugar_threshold = 20

result = recommend_low_sugar_tf(recommender, data, product_id, n_recommendations, sugar_threshold)
print("\nRecommendations:")
print(result)

test_products = data['product_id'].sample(10).tolist()
metrics = evaluate_recommendation_system_tf(recommender, data, test_products, n=5, sugar_threshold=20)
print("\nEvaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.2f}")


Recommendations:
      product_id                                       product_name  \
0  8997035601482                             Fibe Mini Botol 100 ml   
1   749921021106  Tropicana Slim Collagen Shot Minuman Tinggi Se...   
2   792649436547       100PLUS Active Minuman Isotonik Botol 350 ml   
3  8991102028158   Panjang Jiwo Larutan Penyegar Melon Botol 350 ml   
4  8992994110112                     Yakult Minuman Probiotik 5 pcs   
5  8997009510055           You C1000 Minuman Jeruk Vitamin C 140 ml   
6  8997009510017           You C1000 Minuman Vitamin C Lemon 140 ml   
7  8991102800020                  Kiranti Sehat Datang Bulan 150 ml   
8  8995227500995  Cap Kaki Tiga Anak Larutan Penyegar Leci Kalen...   
9  8992994110143     Yakult Light Minuman Susu Fermentasi 5 x 65 ml   

            category  sugar_intake  similarity  
0  Minuman Kesehatan          9.00         1.0  
1  Minuman Kesehatan          6.00         1.0  
2  Minuman Kesehatan         19.00         1.0  
3  M

In [56]:
class RecommenderModel:
    def __init__(self):
        self.category_matrix = None
        self.vocabulary = None
        self.product_data = None
        self.product_id_to_index = None
        self.tflite_model = None

    def preprocess_data(self, data: pd.DataFrame) -> None:
        self.product_data = data.copy()
        # index mapping
        self.product_id_to_index = {pid: idx for idx, pid in enumerate(data['product_id'])}

        # category embeddings
        categories = data['category'].values
        vocabulary = sorted(list(set(categories)))
        cat_to_idx = {cat: idx for idx, cat in enumerate(vocabulary)}

        num_categories = len(vocabulary)
        one_hot = np.zeros((len(categories), num_categories))
        for i, cat in enumerate(categories):
            one_hot[i, cat_to_idx[cat]] = 1

        self.category_matrix = one_hot
        self.vocabulary = vocabulary

    def create_tflite_model(self) -> None:
        class SimilarityModel(tf.Module):
            def __init__(self, category_matrix):
                super().__init__()
                self.category_matrix = tf.Variable(category_matrix, dtype=tf.float32)

            @tf.function(input_signature=[tf.TensorSpec(shape=(), dtype=tf.int32)])
            def compute_similarities(self, product_idx):
                product_vector = tf.gather(self.category_matrix, product_idx)
                product_vector = tf.expand_dims(product_vector, 0)

                product_norm = tf.nn.l2_normalize(product_vector, axis=1)
                matrix_norm = tf.nn.l2_normalize(self.category_matrix, axis=1)

                #similarity scores
                similarities = tf.matmul(product_norm, matrix_norm, transpose_b=True)
                return tf.squeeze(similarities)

        # create and convert model
        model = SimilarityModel(self.category_matrix)
        converter = tf.lite.TFLiteConverter.from_concrete_functions(
            [model.compute_similarities.get_concrete_function()])

        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS,
            tf.lite.OpsSet.SELECT_TF_OPS
        ]

        self.tflite_model = converter.convert()

    def save_model(self, model_path: str, metadata_path: str) -> None:
        # save TFLite model
        with open(model_path, 'wb') as f:
            f.write(self.tflite_model)

        # save metadata (product data and mappings)
        metadata = {
            'product_id_to_index': self.product_id_to_index,
            'products': self.product_data.to_dict('records')
        }
        pd.to_pickle(metadata, metadata_path)

    @staticmethod
    def get_recommendations(
        product_id: int,
        tflite_model_path: str,
        metadata_path: str,
        n_recommendations: int = 5,
        sugar_threshold: float = 20.0
    ) -> List[Dict]:
        # load metadata
        metadata = pd.read_pickle(metadata_path)
        product_id_to_index = metadata['product_id_to_index']
        products = pd.DataFrame(metadata['products'])

        if product_id not in product_id_to_index:
            raise ValueError(f"Product ID {product_id} not found in the dataset")

        interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
        interpreter.allocate_tensors()

        # get input and output details
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

        product_idx = product_id_to_index[product_id]
        interpreter.set_tensor(input_details[0]['index'], np.array(product_idx, dtype=np.int32))

        interpreter.invoke()
        similarities = interpreter.get_tensor(output_details[0]['index'])

        # get top similar products with low sugar
        similar_indices = np.argsort(similarities)[::-1]
        recommendations = []

        for idx in similar_indices:
            if idx == product_idx:
                continue

            product = products.iloc[idx]
            sugar_content = product['sugar_intake']

            if pd.notna(sugar_content) and sugar_content < sugar_threshold:
                recommendations.append({
                    'product_id': product['product_id'],
                    'product_name': product['product_name'],
                    'category': product['category'],
                    'sugar_intake': sugar_content,
                    'similarity': similarities[idx]
                })

            if len(recommendations) >= n_recommendations:
                break

        return recommendations

In [64]:
def main():
    # load and preprocess data
    data = pd.read_csv("products_fixed.csv")
    data['sugar_intake'] = data['sugar_intake'].str.replace(',', '.').astype(float)

    # create and save model
    recommender = RecommenderModel()
    recommender.preprocess_data(data)
    recommender.create_tflite_model()
    recommender.save_model('recommender.tflite', 'recommender_metadata.pkl')

    # Test recommendations
    test_product_id = 8992772586030
    recommendations = RecommenderModel.get_recommendations(
        product_id=test_product_id,
        tflite_model_path='recommender.tflite',
        metadata_path='recommender_metadata.pkl',
        n_recommendations=5,
        sugar_threshold=20.0
    )

    print("\nRecommendations:")
    for rec in recommendations:
        print(f"Product: {rec['product_name']}")
        print(f"Category: {rec['category']}")
        print(f"Total Sugar: {rec['sugar_intake']}g")
        print(f"Similarity: {rec['similarity']:.2f}")
        print()

if __name__ == "__main__":
    main()




Recommendations:
Product: X'Tragin Minuman Serbuk Es Kunyit Asam + Madu 5 x 23 g
Category: Minuman Kesehatan
Total Sugar: 18.0g
Similarity: 1.00

Product: hemaviton C1000 Lemon Kaleng 330 ml
Category: Minuman Kesehatan
Total Sugar: 0.0g
Similarity: 1.00

Product: Lasegar Twist Minuman Penyegar Leci Lemon 320 ml
Category: Minuman Kesehatan
Total Sugar: 11.0g
Similarity: 1.00

Product: Kuas Minuman Kunyit Asam Original 250 ml
Category: Minuman Kesehatan
Total Sugar: 18.0g
Similarity: 1.00

Product: Sinde Larutan Penyegar Cap BADAK 350 ml
Category: Minuman Kesehatan
Total Sugar: 12.0g
Similarity: 1.00



In [63]:
from google.colab import files
import os
import zipfile

def save_and_download_tflite_model(recommender, base_filename='recommender'):
    os.makedirs('model_export', exist_ok=True)

    # save model and metadata
    tflite_path = f'model_export/{base_filename}.tflite'
    metadata_path = f'model_export/{base_filename}_metadata.pkl'

    recommender.save_model(tflite_path, metadata_path)

    # create a zip file
    zip_path = f'{base_filename}_export.zip'
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        zipf.write(tflite_path, os.path.basename(tflite_path))
        zipf.write(metadata_path, os.path.basename(metadata_path))

        # Add a README file with instructions
        readme_content = """
Low Sugar Product Recommender Based On Category - TFLite Model

Files included:
1. recommender.tflite - TensorFlow Lite model file
2. recommender_metadata.pkl - Product metadata and mappings

Instructions:
1. Place both files in your mobile app's assets folder
2. Use TensorFlow Lite interpreter to load the model
3. Load the metadata file using pickle in Python
4. Input : product_id (barcode)
5.  We will leave the rest to MD team -goodluck!"""

        readme_path = 'model_export/README.txt'
        with open(readme_path, 'w') as f:
            f.write(readme_content)
        zipf.write(readme_path, 'README.txt')

    # Download the zip file
    files.download(zip_path)

# Example usage in Colab:
def export_model():
    # Load and preprocess data
    data = pd.read_csv("products_fixed.csv")
    data['sugar_intake'] = data['sugar_intake'].str.replace(',', '.').astype(float)

    # Create and convert model
    recommender = RecommenderModel()
    recommender.preprocess_data(data)
    recommender.create_tflite_model()

    # Save and download the model
    save_and_download_tflite_model(recommender)

    print("Model export completed! Check your downloads folder for the zip file.")

# Run the export process
export_model()



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Model export completed! Check your downloads folder for the zip file.


In [59]:
import tensorflow as tf
import numpy as np
import pandas as pd
from typing import Dict, List

def test_tflite_recommender(
    tflite_path: str,
    metadata_path: str,
    test_product_ids: List[int],
    n_recommendations: int = 5,
    sugar_threshold: float = 20.0
):

    # load metadata
    metadata = pd.read_pickle(metadata_path)
    products_df = pd.DataFrame(metadata['products'])
    product_id_to_index = metadata['product_id_to_index']

    # load TFLite model
    interpreter = tf.lite.Interpreter(model_path=tflite_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    print("Testing TFLite Recommender Model\n")
    print("Model Details:")
    print(f"Input Shape: {input_details[0]['shape']}")
    print(f"Input Type: {input_details[0]['dtype']}")
    print(f"Output Shape: {output_details[0]['shape']}")
    print(f"Output Type: {output_details[0]['dtype']}\n")

    for test_id in test_product_ids:
        print(f"\nTesting Product ID: {test_id}")

        # Get original product details
        original_product = products_df[products_df['product_id'] == test_id].iloc[0]
        print("\nInput Product:")
        print(f"Name: {original_product['product_name']}")
        print(f"Category: {original_product['category']}")
        print(f"Sugar Content: {original_product['sugar_intake']}g")

        try:
            # get product index
            product_idx = product_id_to_index[test_id]

            # set input tensor
            interpreter.set_tensor(input_details[0]['index'],
                                np.array(product_idx, dtype=np.int32))

            interpreter.invoke()
            similarities = interpreter.get_tensor(output_details[0]['index'])
            similar_indices = np.argsort(similarities)[::-1]
            recommendations = []

            print("\nRecommendations:")
            print("-" * 50)

            for idx in similar_indices:
                if idx == product_idx:
                    continue

                product = products_df.iloc[idx]
                sugar_content = product['sugar_intake']

                if pd.notna(sugar_content) and sugar_content < sugar_threshold:
                    recommendations.append({
                        'product_id': product['product_id'],
                        'product_name': product['product_name'],
                        'category': product['category'],
                        'sugar_intake': sugar_content,
                        'similarity': similarities[idx]
                    })

                    print(f"\nProduct: {product['product_name']}")
                    print(f"Category: {product['category']}")
                    print(f"Sugar Content: {sugar_content}g")
                    print(f"Similarity Score: {similarities[idx]:.4f}")

                if len(recommendations) >= n_recommendations:
                    break

            # verify recommendations
            if recommendations:
                print("\nVerification Results:")
                print("-" * 50)

                # check category matching
                same_category = sum(1 for r in recommendations
                                  if r['category'] == original_product['category'])
                print(f"Category Match Rate: {same_category/len(recommendations)*100:.1f}%")

                # check sugar content
                all_low_sugar = all(r['sugar_intake'] < sugar_threshold
                                  for r in recommendations)
                print(f"All Products Below Sugar Threshold: {'Yes' if all_low_sugar else 'No'}")

                # average similarity score
                avg_similarity = np.mean([r['similarity'] for r in recommendations])
                print(f"Average Similarity Score: {avg_similarity:.4f}")

            else:
                print("\nNo recommendations found meeting the criteria.")

        except Exception as e:
            print(f"Error processing product {test_id}: {str(e)}")

        print("\n" + "="*70 + "\n")

In [62]:
def main():
    test_ids = [
        8997204306590,  # product id
    ]

    test_tflite_recommender(
        tflite_path='recommender.tflite',
        metadata_path='recommender_metadata.pkl',
        test_product_ids=test_ids,
        n_recommendations=5,
        sugar_threshold=20.0
    )

if __name__ == "__main__":
    main()

Testing TFLite Recommender Model

Model Details:
Input Shape: []
Input Type: <class 'numpy.int32'>
Output Shape: [369]
Output Type: <class 'numpy.float32'>


Testing Product ID: 8997204306590

Input Product:
Name: Bonteh Teh Manis 330 ml
Category: Teh Siap Minum
Sugar Content: 27.0g

Recommendations:
--------------------------------------------------

Product: Cap Panda Minuman Liang Teh Kaleng 310 ml
Category: Teh Siap Minum
Sugar Content: 8.0g
Similarity Score: 1.0000

Product: Tebs Sparkling Lemon Lime Botol 300 ml
Category: Teh Siap Minum
Sugar Content: 10.0g
Similarity Score: 1.0000

Product: Tehbotol Sosro Minuman Teh Original 350 ml
Category: Teh Siap Minum
Sugar Content: 18.0g
Similarity Score: 1.0000

Product: Fruit Tea Minuman Teh Blackcurrant 250 ml
Category: Teh Siap Minum
Sugar Content: 18.0g
Similarity Score: 1.0000

Product: Garantea Teh Rasa Apel 350 ml
Category: Teh Siap Minum
Sugar Content: 0.0g
Similarity Score: 1.0000

Verification Results:
-------------------------