In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import NearestNeighbors
import pandas as pd

In [None]:
data = pd.read_csv("products_fixed.csv")
print(data.head())

      product_id                                       product_name  \
0  9556156046108     Yeo's Minuman Jeli Sarang Burung Kaleng 300 ml   
1  8997035601482                             Fibe Mini Botol 100 ml   
2  8998009020186             Buavita Minuman Sari Buah Jambu 245 mL   
3  8994947000273  ITO EN Oi Ocha Minuman Teh Hijau Tanpa Gula Ho...   
4  8998888150455                SUNQUICK Jus Sari Buah Lemon 330 ml   

            category sugar_intake  
0  Minuman Kesehatan           25  
1  Minuman Kesehatan            9  
2    Jus & Sari Buah           23  
3     Teh Siap Minum            0  
4    Jus & Sari Buah           40  


In [None]:
data['sugar_intake'] = data['sugar_intake'].str.replace(',', '.').astype(float)

In [None]:
print(data[['product_id', 'product_name', 'category', 'sugar_intake']].head())

      product_id                                       product_name  \
0  9556156046108     Yeo's Minuman Jeli Sarang Burung Kaleng 300 ml   
1  8997035601482                             Fibe Mini Botol 100 ml   
2  8998009020186             Buavita Minuman Sari Buah Jambu 245 mL   
3  8994947000273  ITO EN Oi Ocha Minuman Teh Hijau Tanpa Gula Ho...   
4  8998888150455                SUNQUICK Jus Sari Buah Lemon 330 ml   

            category  sugar_intake  
0  Minuman Kesehatan          25.0  
1  Minuman Kesehatan           9.0  
2    Jus & Sari Buah          23.0  
3     Teh Siap Minum           0.0  
4    Jus & Sari Buah          40.0  


In [None]:
categories = data['category'].values.reshape(-1, 1)
encoder = OneHotEncoder(sparse_output=False)
category_matrix = encoder.fit_transform(categories)

In [None]:
knn = NearestNeighbors(n_neighbors=10, metric='cosine')
knn.fit(category_matrix)

In [None]:
def recommend_low_sugar(product_id, n=5, sugar_threshold=20):
    if product_id not in data['product_id'].values:
        return f"Produk dengan ID '{product_id}' tidak ditemukan dalam dataset."    #validasi product ada atau ga

    product_index = data[data['product_id'] == product_id].index[0]      #ambil index
    distances, indices = knn.kneighbors([category_matrix[product_index]])

    recommended_lowsugar = []
    for i in indices[0][1:n+1]:
        neighbor_index = i
        product_name = data.iloc[neighbor_index]['product_name']
        sugar_content = data.iloc[neighbor_index]['sugar_intake']

        if pd.notna(sugar_content) and sugar_content < sugar_threshold:
            recommended_lowsugar.append({
                'product_id': data.iloc[neighbor_index]['product_id'],
                'product_name': product_name,
                'category': data.iloc[neighbor_index]['category'],
                'sugar_intake': sugar_content
            })

    if not recommended_lowsugar:
        return f"Tidak ada produk rendah gula yang ditemukan di sekitar produk ID '{product_id}'."

    recommended_lowsugar_data = pd.DataFrame(recommended_lowsugar)

    return recommended_lowsugar_data[['product_id', 'product_name', 'category', 'sugar_intake']].head(n)

In [None]:
product_id = 8998888121943  #barcode id
n_recommendations = 10  #jumlah recom
sugar_threshold = 20

result = recommend_low_sugar(product_id, n_recommendations, sugar_threshold)
print(result)

      product_id                                       product_name  \
0  9556001288547              Nescafe Cappuccino Kopi Kaleng 220 ml   
1  8991002121096        Good Day Minuman Kopi Groovy Cookies 250 ml   
2  8996001600597           Kopiko Lucky Day Minuman Kopi Pet 180 ml   
3  8991749000012  Kopi Kenangan Hanya Untukmu Minuman Kopi Black...   
4  8991998117585  Caffino Delizio Milky Espresso Minuman Kopi Bo...   
5  9556001288561       Nescafe Caramel Macchiato Kopi Kaleng 220 ml   

          category  sugar_intake  
0  Kopi Siap Minum          14.0  
1  Kopi Siap Minum          12.0  
2  Kopi Siap Minum          10.0  
3  Kopi Siap Minum           8.0  
4  Kopi Siap Minum          12.0  
5  Kopi Siap Minum          16.0  


In [None]:
def evaluate_recommendation_system(data, knn, category_matrix, test_products, n=5, sugar_threshold=20):
    precision_scores = []
    recall_scores = []
    sugar_differences = []
    diversity_scores = []

    for product_id in test_products:
        if product_id not in data['product_id'].values:
            print(f"Produk dengan ID '{product_id}' tidak ditemukan dalam dataset.")
            continue

        # Get the recommendations
        recommended = recommend_low_sugar(product_id, n=n, sugar_threshold=sugar_threshold)

        if isinstance(recommended, str):  # No recommendations
            print(recommended)
            continue

        # Precision: Correct recommendations / Total recommendations
        correct_recommendations = recommended['sugar_intake'] < sugar_threshold
        precision = correct_recommendations.sum() / len(recommended)
        precision_scores.append(precision)

        # Recall: Correct recommendations / Total possible low-sugar products
        input_category = data.loc[data['product_id'] == product_id, 'category'].values[0]
        possible_low_sugar = data[(data['category'] == input_category) & (data['sugar_intake'] < sugar_threshold)]
        recall = correct_recommendations.sum() / len(possible_low_sugar) if len(possible_low_sugar) > 0 else 0
        recall_scores.append(recall)

        # Sugar Difference: Average difference in sugar content
        input_sugar = data.loc[data['product_id'] == product_id, 'sugar_intake'].values[0]
        avg_difference = (input_sugar - recommended['sugar_intake']).mean()
        sugar_differences.append(avg_difference)

        # Diversity: Number of unique categories in recommendations
        diversity = recommended['category'].nunique()
        diversity_scores.append(diversity)

    # Aggregate metrics
    metrics = {
        'Average Precision': sum(precision_scores) / len(precision_scores) if precision_scores else 0,
        'Average Recall': sum(recall_scores) / len(recall_scores) if recall_scores else 0,
        'Average Sugar Difference': sum(sugar_differences) / len(sugar_differences) if sugar_differences else 0,
        'Average Diversity': sum(diversity_scores) / len(diversity_scores) if diversity_scores else 0
    }

    return metrics


In [None]:
test_products = data['product_id'].sample(10).tolist()

# Evaluate
metrics = evaluate_recommendation_system(data, knn, category_matrix, test_products, n=5, sugar_threshold=20)
print("Evaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.2f}")


Evaluation Metrics:
Average Precision: 1.00
Average Recall: 0.12
Average Sugar Difference: 17.81
Average Diversity: 1.00
