In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import random
import os

## **Mencari rata rata nutrisi yang cocok untuk pasien diabetes berdasarkan dataset**

Link Dataset:
https://www.kaggle.com/datasets/nandagopll/food-suitable-for-diabetes-and-blood-pressure/data

In [18]:
ROOT_DIR = os.getcwd()
DATA_DIR = os.path.join(ROOT_DIR, 'flask-server', 'dataset')

In [19]:
food_df = pd.read_csv(os.path.join(DATA_DIR, "diabet_food_recomendation.csv"))
print("jumlah data : {}".format(food_df.shape[0]))
food_df.head()

jumlah data : 502


Unnamed: 0,Food Name,Glycemic Index,Calories,Carbohydrates,Protein,Fat,Suitable for Diabetes,Suitable for Blood Pressure,Sodium Content,Potassium Content,Magnesium Content,Calcium Content,Fiber Content
0,Apple,39,52,14.0,0.3,0.2,1,1,0,107,9,6,2.4
1,Banana,51,96,23.0,1.1,0.2,1,1,1,358,27,5,2.6
2,Orange,42,43,9.0,0.9,0.1,1,1,0,181,10,40,2.3
3,Strawberries,40,29,7.0,0.7,0.3,1,1,1,153,13,16,2.0
4,Blueberries,53,57,14.0,0.7,0.3,1,1,1,77,9,6,2.4


In [20]:
print('jumlah data kosong: {}'.format(food_df.isna().sum().to_dict()))
print('jumlah data duplikat: {}'.format(food_df.duplicated().sum()))

jumlah data kosong: {'Food Name': 0, 'Glycemic Index': 0, 'Calories': 0, 'Carbohydrates': 0, 'Protein': 0, 'Fat': 0, 'Suitable for Diabetes': 0, 'Suitable for Blood Pressure': 0, 'Sodium Content': 0, 'Potassium Content': 0, 'Magnesium Content': 0, 'Calcium Content': 0, 'Fiber Content': 1}
jumlah data duplikat: 70


In [21]:
# buang data kosong dan duplicate
food_df = food_df.dropna()
food_df = food_df.drop_duplicates()

print('jumlah data kosong: {}'.format(food_df.isna().sum().to_dict()))
print('jumlah data duplikat: {}'.format(food_df.duplicated().sum()))

print('jumlah data clean: {}'.format(food_df.shape[0]))

jumlah data kosong: {'Food Name': 0, 'Glycemic Index': 0, 'Calories': 0, 'Carbohydrates': 0, 'Protein': 0, 'Fat': 0, 'Suitable for Diabetes': 0, 'Suitable for Blood Pressure': 0, 'Sodium Content': 0, 'Potassium Content': 0, 'Magnesium Content': 0, 'Calcium Content': 0, 'Fiber Content': 0}
jumlah data duplikat: 0
jumlah data clean: 431


In [22]:
food_df.columns

Index(['Food Name', 'Glycemic Index', 'Calories', 'Carbohydrates', 'Protein',
       'Fat', 'Suitable for Diabetes', 'Suitable for Blood Pressure',
       'Sodium Content', 'Potassium Content', 'Magnesium Content',
       'Calcium Content', 'Fiber Content'],
      dtype='object')

In [23]:
food_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 431 entries, 0 to 501
Data columns (total 13 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Food Name                    431 non-null    object 
 1   Glycemic Index               431 non-null    int64  
 2   Calories                     431 non-null    int64  
 3   Carbohydrates                431 non-null    float64
 4   Protein                      431 non-null    float64
 5   Fat                          431 non-null    float64
 6   Suitable for Diabetes        431 non-null    object 
 7   Suitable for Blood Pressure  431 non-null    int64  
 8   Sodium Content               431 non-null    int64  
 9   Potassium Content            431 non-null    int64  
 10  Magnesium Content            431 non-null    int64  
 11  Calcium Content              431 non-null    int64  
 12  Fiber Content                431 non-null    float64
dtypes: float64(4), int64(7), 

In [24]:
print(food_df['Suitable for Diabetes'].unique())
print(food_df['Suitable for Diabetes'].dtype)

['1' 'Yes' '0']
object


In [25]:
# encoder pada kolom Suitable for Diabetes dan Suitable for Blood Pressure
food_df['Suitable for Diabetes'] = food_df['Suitable for Diabetes'].map({'Yes': 1, '1': 1, '0': 0})

print(food_df['Suitable for Diabetes'].unique())
print(food_df['Suitable for Diabetes'].dtype)

[1 0]
int64


In [26]:
diabet_food = food_df.copy()
diabet_food = diabet_food[diabet_food['Suitable for Diabetes'] == 1]
print('jumlah data nutrisi yang cocok untuk penderita diabet: {}'.format(diabet_food.shape[0]))

jumlah data nutrisi yang cocok untuk penderita diabet: 393


In [27]:
diabet_food = diabet_food[['Calories', 'Carbohydrates', 'Protein', 'Fat']]
diabet_food.head()

Unnamed: 0,Calories,Carbohydrates,Protein,Fat
0,52,14.0,0.3,0.2
1,96,23.0,1.1,0.2
2,43,9.0,0.9,0.1
3,29,7.0,0.7,0.3
4,57,14.0,0.7,0.3


In [28]:
diabet_food.describe()

Unnamed: 0,Calories,Carbohydrates,Protein,Fat
count,393.0,393.0,393.0,393.0
mean,184.282443,18.026718,9.531858,8.8257
std,154.561886,17.271423,9.244055,13.696184
min,0.0,0.0,0.0,0.0
25%,54.0,5.0,1.9,0.4
50%,160.0,12.0,5.3,3.6
75%,265.0,26.3,15.7,13.5
max,902.0,77.5,31.0,100.0


In [29]:
cal_mean = diabet_food['Calories'].mean()
carb_mean = diabet_food['Carbohydrates'].mean()
protein_mean = diabet_food['Protein'].mean()
fat_mean = diabet_food['Fat'].mean()

max_cal = diabet_food['Calories'].max()
max_carb = diabet_food['Carbohydrates'].max()
max_protein = diabet_food['Protein'].max()
max_fat = diabet_food['Fat'].max()

print('rata-rata kalori: {:.2f}'.format(cal_mean))
print('rata-rata karbohidrat: {:.2f}'.format(carb_mean))
print('rata-rata protein: {:.2f}'.format(protein_mean))
print('rata-rata lemak: {:.2f}'.format(fat_mean))

print('\n')

print('max kalori: {:.2f}'.format(max_cal))
print('max karbohidrat: {:.2f}'.format(max_carb))
print('max protein: {:.2f}'.format(max_protein))

rata-rata kalori: 184.28
rata-rata karbohidrat: 18.03
rata-rata protein: 9.53
rata-rata lemak: 8.83


max kalori: 902.00
max karbohidrat: 77.50
max protein: 31.00


In [35]:
print("data duplicated: {}".format(diabet_food.duplicated().sum()))
diabet_food = diabet_food.drop_duplicates()
print("data duplicated: {}".format(diabet_food.duplicated().sum()))

data duplicated: 74
data duplicated: 0


In [36]:
# export CSV clean data
diabet_food.to_csv(os.path.join(DATA_DIR, 'diabet_food_recomendation_clean.csv'), index=False)

## **REKOMENDASI MAKANAN INDO**

Link dataset: https://www.kaggle.com/datasets/anasfikrihanif/indonesian-food-and-drink-nutrition-dataset/data

In [29]:
df = pd.read_csv(os.path.join(DATA_DIR, 'nutrition.csv'))
df.head()

Unnamed: 0,id,calories,proteins,fat,carbohydrate,name,image
0,1,280.0,9.2,28.4,0.0,Abon,https://img-cdn.medkomtek.com/PbrY9X3ignQ8sVuj...
1,2,513.0,23.7,37.0,21.3,Abon haruwan,https://img-global.cpcdn.com/recipes/cbf330fbd...
2,3,0.0,0.0,0.2,0.0,Agar-agar,https://res.cloudinary.com/dk0z4ums3/image/upl...
3,4,45.0,1.1,0.4,10.8,Akar tonjong segar,https://images.tokopedia.net/img/cache/200-squ...
4,5,37.0,4.4,0.5,3.8,Aletoge segar,https://nilaigizi.com/assets/images/produk/pro...


In [None]:
print(f'Jumlah data: {df.shape[0]}')
print(f"Jumlah data kosong: {df.isna().sum().to_dict()}")
print(f"Jumlah data duplicate: {df.duplicated().sum()}")

print("Informasi Data")
df.describe()

Jumlah data: 1346
Jumlah data kosong: {'id': 0, 'calories': 0, 'proteins': 0, 'fat': 0, 'carbohydrate': 0, 'name': 0, 'image': 0}
Jumlah data duplicate: 0
Informasi Data


Unnamed: 0,id,calories,proteins,fat,carbohydrate
count,1346.0,1346.0,1346.0,1346.0,1346.0
mean,673.5,203.217385,10.001189,7.584027,25.390193
std,388.701042,163.07543,11.84798,13.733063,32.193054
min,1.0,0.0,0.0,0.0,0.0
25%,337.25,75.0,1.8,0.5,4.525
50%,673.5,146.0,5.0,2.0,13.3
75%,1009.75,333.75,15.0,8.275,37.575
max,1346.0,940.0,83.0,100.0,647.0


In [None]:
invalid_images = df[~df['image'].str.startswith("https://")]

print(f"Data gambar tanpa URL yang valid: {len(invalid_images)}")

df = df.drop(invalid_images.index)

invalid_images = df[~df['image'].str.startswith("https://")]
print(f"Data gambar tanpa URL yang valid: {len(invalid_images)}")

print(f"Jumlah data sekarang: {df.shape[0]}")

Data gambar tanpa URL yang valid: 0
Data gambar tanpa URL yang valid: 0
Jumlah data sekarang: 1340


In [None]:
def filter_food(df, max_calories=None, max_carbohydrate=None, max_fat=None, max_protein=None):
    """
    Filter makanan berdasarkan batas kalori, karbohidrat, dan lemak.
    """
    filtered_df = df.copy()
    if max_calories is not None:
        filtered_df = filtered_df[filtered_df['calories'] <= max_calories]
    if max_carbohydrate is not None:
        filtered_df = filtered_df[filtered_df['carbohydrate'] <= max_carbohydrate]
    if max_fat is not None:
        filtered_df = filtered_df[filtered_df['fat'] <= max_fat]
    if max_protein is not None:
        filtered_df = filtered_df[filtered_df['proteins'] <= max_protein]

    return filtered_df

def generate_combinations(food_df, num_combinations=5, items_per_combination=5):
    """
    Generate kombinasi makanan acak.
    """
    combinations = []
    for _ in range(num_combinations):
        random_selection = food_df.sample(min(len(food_df), items_per_combination))

        combinations.append(random_selection)
    return combinations

In [None]:
# contoh input
while True:
    try:
        user_diabetes = float(input("Masukkan nilai (0 hingga 1): "))

        if 0 <= user_diabetes <= 1:
            user_diabetes = 1 if user_diabetes >= 0.5 else 0
            break
        else:
            print("Input harus berada antara 0 dan 1.")
    except ValueError:
        print("Input tidak valid! Harap masukkan angka.")

print(f"karena {'kemungkinan user merupakan pasien diabetes' if user_diabetes else 'user tidak mengalami diabetes'}")

print(f"berikut beberapa opsi makanan yang mungkin cocok untuk user:")

diabetes_food = filter_food(df, max_calories=max_cal, max_carbohydrate=max_carb, max_fat=max_fat, max_protein=max_protein)
normal_food = df

diabetes_combinations = generate_combinations(diabetes_food, num_combinations=5)
normal_combinations = generate_combinations(normal_food, num_combinations=5)

if user_diabetes:
    for idx, combo in enumerate(diabetes_combinations, 1):
        print(f"\nKombinasi {idx}:")
        print(combo[["name", "calories", "carbohydrate", "fat", "proteins", "image"]])
else:
    for idx, combo in enumerate(normal_combinations, 1):
        print(f"\nKombinasi {idx}:")
        print(combo[["name", "calories", "carbohydrate", "fat", "proteins", "image"]])

Masukkan nilai (0 hingga 1): 0.5
karena kemungkinan user merupakan pasien diabetes
berikut beberapa opsi makanan yang mungkin cocok untuk user:

Kombinasi 1:
                     name  calories  carbohydrate   fat  proteins  \
951                  Otak     125.0           0.8   8.6      10.4   
315  Daun ubi putih segar      46.0           8.2   0.2       2.8   
230           Daging Kuda     118.0           0.9   4.1      18.1   
16                  Angsa     354.0           0.0  31.5      16.4   
673        Kangkung kukus      30.0           4.7   0.7       3.2   

                                                 image  
951  https://asset.kompas.com/crops/Zb2CWR9vmVrUUr6...  
315  https://asset-a.grid.id/crop/0x0:0x0/700x465/p...  
230  https://awsimages.detik.net.id/community/media...  
16   https://cdn.idntimes.com/content-images/commun...  
673  https://cdn1-production-images-kly.akamaized.n...  

Kombinasi 2:
                        name  calories  carbohydrate   fat  proteins  \

In [None]:
print(df['image'][951])

https://asset.kompas.com/crops/Zb2CWR9vmVrUUr6v372o4SJ8gs0=/0x0:1000x667/750x500/data/photo/2021/07/18/60f3913f44be3.jpg


In [None]:
import requests
from PIL import Image
from io import BytesIO

image_url = df['image'][1099]

response = requests.get(image_url)

if response.status_code == 200:
    img = Image.open(BytesIO(response.content))

    width, height = img.size
    print(f"Ukuran gambar pada URL di baris 951: Lebar = {width} piksel, Tinggi = {height} piksel")
else:
    print("Gambar tidak dapat diunduh. Status code:", response.status_code)

Ukuran gambar pada URL di baris 951: Lebar = 750 piksel, Tinggi = 500 piksel
