In [1]:
pip install pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [4]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.221-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.221-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.17-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.221 ultralytics-thop-2.0.17


In [5]:
import cv2
import os
import re
import math
import numpy as np
from ultralytics import YOLO
import pytesseract
from PIL import Image

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


# **CONFIG TESTING**

In [6]:
# --- 0. Konfigurasi ---

# Sesuaikan dengan model path yang dimiliki
model_path = '/content/drive/MyDrive/CODE/yolo_model.pt'
# Sesuikan dengan image path yang ingin di test
image_path = '/content/drive/MyDrive/CODE/assets/image.jpg'

output_crop_dir = "cropped_objects"
CONF_THRESHOLD = 0.5

# Pastikan direktori output ada
os.makedirs(output_crop_dir, exist_ok=True)

# **DATA LOADING**

In [7]:
# Image Loading
try:
    model = YOLO(model_path, task='obb') # Tentukan task='obb' secara eksplisit
except Exception as e:
    print(f"Gagal memuat model: {e}")
    exit()

# Muat gambar dan ubah ukuran
original_image = cv2.imread(image_path)
if original_image is None:
    print(f"Gagal memuat gambar dari: {image_path}")
    exit()

# Gambar yang di-resize harus dalam BGR (default CV2) agar sesuai dengan hasil BBox
resized_image = cv2.resize(original_image, (640, 640), interpolation=cv2.INTER_LINEAR)

# **PREDICTION**

In [8]:

# Model Prediction
results = model.predict(
    source=resized_image,
    conf=CONF_THRESHOLD,
    save=False,
    verbose=True # Matikan output log yang berlebihan
)


0: 640x640 1 garam, 1 kalori, 1 lemak, 1 nutrition-fact, 1 protein, 38.4ms
Speed: 8.7ms preprocess, 38.4ms inference, 209.7ms postprocess per image at shape (1, 3, 640, 640)


In [9]:
# Dictionary ini akan menyimpan deteksi terbaik untuk setiap label kelas
best_detections = {}

for r in results:
    # Mengakses hasil OBB
    # Gunakan r.boxes untuk kotak batas persegi standar (lebih mudah untuk cropping)
    boxes = r.boxes if r.boxes is not None and len(r.boxes) > 0 else (r.obb if hasattr(r, 'obb') and r.obb is not None else None)

    # Perbaikan: Cek apakah ada deteksi yang valid
    if boxes is None or len(boxes) == 0:
        continue

    # Filter Object duplikat
    for box in boxes:
        conf = box.conf[0].item()
        cls = box.cls[0].item()
        label = model.names.get(int(cls), f"Unknown_{int(cls)}")

        # tidak membutuhkan object nutrition-fact
        if label == "nutrition-fact":
            continue

        # menyimpan hasil deteksi
        # Kunci (key) adalah label, Nilai (value) adalah dictionary berisi box dan conf
        if label not in best_detections or conf > best_detections[label]['conf']:
            best_detections[label] = {
                'box': box,
                'conf': conf,
                'cls': cls
            }

# **CROPPING**

In [10]:
print("\n--- Proses Cropping Dimulai ---")
detection_count = 0

# Iterasi Croping
for label, data in best_detections.items():

    # Ambil data dari deteksi terbaik
    box = data['box']
    conf = data['conf']
    cls = data['cls']

    # Mendapatkan BBox dan Label
    # Koordinat dalam bentuk (x1, y1, x2, y2)
    xyxy_tensor = box.xyxy[0].cpu() # Ambil tensor, pastikan di CPU
    x_min, y_min, x_max, y_max = xyxy_tensor.numpy().astype(int)

    # Melakukan cropping dengan Slicing NumPy (CV2)
    # Format Slicing: [y_min:y_max, x_min:x_max]
    cropped_segment = resized_image[y_min:y_max, x_min:x_max]

    # Chekck and save image
    # Minimal lebar atau tinggi 1 piksel
    if cropped_segment.shape[0] > 1 and cropped_segment.shape[1] > 1:
        # Nama file: label_index_conf.jpg
        output_filename = os.path.join(output_crop_dir, f"{label}_{detection_count:02d}_{int(conf*100)}.jpg")
        cv2.imwrite(output_filename, cropped_segment)

        print(f"✅ Berhasil crop '{label}' (Conf: {conf:.2f}). Disimpan di: {output_filename}")
        detection_count += 1
    else:
        print(f"❌ Gagal crop '{label}'. BBox sangat kecil atau tidak valid.")

print(f"\nProses selesai. Total {detection_count} objek unik telah di-crop dan disimpan di direktori '{output_crop_dir}'.")


--- Proses Cropping Dimulai ---
✅ Berhasil crop 'lemak' (Conf: 0.88). Disimpan di: cropped_objects/lemak_00_87.jpg
✅ Berhasil crop 'kalori' (Conf: 0.87). Disimpan di: cropped_objects/kalori_01_87.jpg
✅ Berhasil crop 'protein' (Conf: 0.62). Disimpan di: cropped_objects/protein_02_62.jpg
✅ Berhasil crop 'garam' (Conf: 0.59). Disimpan di: cropped_objects/garam_03_59.jpg

Proses selesai. Total 4 objek unik telah di-crop dan disimpan di direktori 'cropped_objects'.


# **INFERENCE OCR**

In [11]:

# --- Konfigurasi ---
# Direktori tempat gambar hasil crop Anda berada
INPUT_CROP_DIR = "cropped_objects"
# Faktor penskalaan untuk memperbesar gambar (misalnya 3x)
UPSCALE_FACTOR = 5

# Opsional: Tentukan path Tesseract jika tidak terdeteksi oleh sistem
# Contoh Windows:
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# --- Proses Utama OCR (Tanpa Preprocessing CV2) ---
print(f"Memulai proses OCR hanya dengan upscaling {UPSCALE_FACTOR}x pada gambar di '{INPUT_CROP_DIR}'...")

extracted_data = {}

for filename in os.listdir(INPUT_CROP_DIR):
    if filename.endswith((".jpg", ".jpeg", ".png")):

        file_path = os.path.join(INPUT_CROP_DIR, filename)

        # Dapatkan label objek (misalnya 'garam_01.jpg' -> 'garam')
        try:
            object_label = filename.split('_')[0]
        except IndexError:
            object_label = "unknown"

        # 1. Muat Gambar Asli (Hasil Crop)
        img_cv2 = cv2.imread(file_path)

        if img_cv2 is None:
            print(f"Gagal memuat {filename}.")
            continue

        # 2. Upscaling (Penskalaan)
        # Penting untuk teks kecil. INTER_CUBIC menjaga ketajaman tepi.
        upscaled_img = cv2.resize(
            img_cv2,
            None,
            fx=UPSCALE_FACTOR,
            fy=UPSCALE_FACTOR,
            interpolation=cv2.INTER_CUBIC
        )

        # 3. Konversi cv2 (NumPy array BGR) ke PIL Image (Pytesseract)
        # Pytesseract akan menangani konversi grayscale/biner secara internal.
        img_pil = Image.fromarray(cv2.cvtColor(upscaled_img, cv2.COLOR_BGR2RGB)) # Konversi BGR ke RGB untuk PIL

        # 4. Terapkan OCR dengan Konfigurasi
        # --psm 7: Asumsi satu baris teks (ideal untuk label nutrisi)
        # --oem 3: Gunakan mesin Tesseract terbaru
        custom_config = r'--oem 3 --psm 7'

        text = pytesseract.image_to_string(img_pil, config=custom_config)

        # Bersihkan teks
        cleaned_text = text.strip().replace('\n', ' ').replace('|', 'l')

        extracted_data[object_label] = cleaned_text

        print(f"\n--- {filename} ({object_label}) ---")
        print(f"Teks Terekstrak: {cleaned_text}")

# --- Ringkasan Hasil ---
print("\n----------------------------------------------------")
print("✅ Ringkasan Data Terekstrak Akhir:")
for key, value in extracted_data.items():
    print(f"{key:<10}: {value}")
print("----------------------------------------------------")

Memulai proses OCR hanya dengan upscaling 5x pada gambar di 'cropped_objects'...

--- protein_02_62.jpg (protein) ---
Teks Terekstrak: _—— “>.

--- lemak_00_87.jpg (lemak) ---
Teks Terekstrak: - Lemek Jenuh 156

--- garam_03_59.jpg (garam) ---
Teks Terekstrak: _ Garam (Neatriurn) as —

--- kalori_01_87.jpg (kalori) ---
Teks Terekstrak: See Vote ae 110 kke

----------------------------------------------------
✅ Ringkasan Data Terekstrak Akhir:
protein   : _—— “>.
lemak     : - Lemek Jenuh 156
garam     : _ Garam (Neatriurn) as —
kalori    : See Vote ae 110 kke
----------------------------------------------------


# **EXTRACT VALUE**

## base value

In [12]:
def clean_and_transform_data(extracted_data: dict) -> dict:

    REQUIRED_KEYS = [
        "garam",
        "gula",
        "kalori",
        "karbo",
        "lemak",
        "protein",
        "serving",
        "takaran-satuan",
        "serat"
    ]

    # Buat dictionary hasil dengan semua value = 0 (kecuali serving = 1)
    cleaned_data = {
        key: (1 if key == "serving" else 0)
        for key in REQUIRED_KEYS
    }

    # 2. PROSES: Ekstraksi dan Pembatasan
    for key, value in extracted_data.items():

        # Hanya proses jika kunci tersebut termasuk dalam daftar wajib
        if key in REQUIRED_KEYS:

            # --- LANGKAH 1: EKSTRAKSI TOTAL (Mengambil SEMUA digit) ---
            # Regex: Hapus semua karakter non-digit (\D)
            # Ini menangani kasus seperti "10.5g" menjadi "105"
            all_digits_string = re.sub(r'\D', '', value)

            # --- LANGKAH 2: PEMBATASAN DAN KONVERSI ---

            if all_digits_string:
                # Ambil hanya 3 karakter pertama (slicing)
                max_3_digits_string = all_digits_string[:3]

                try:
                    # Konversi ke Integer
                    integer_value = int(max_3_digits_string)

                    # Hanya update jika nilai baru lebih besar atau kunci bukan 'serving'
                    # Ini mencegah nilai inisiasi 'serving' 1 tertimpa oleh 0
                    if integer_value > 0 or key != "serving":
                        cleaned_data[key] = integer_value

                except ValueError:
                    # Biarkan nilai tetap jika konversi gagal
                    pass

    return cleaned_data

In [13]:
# Panggil fungsi
final_cleaned_data = clean_and_transform_data(extracted_data)

# Mencetak hasil akhir
print("\n--- Hasil Akhir Setelah Transformasi ---")
for key, value in final_cleaned_data.items():
    print(f"{key:<15}: {value}")


--- Hasil Akhir Setelah Transformasi ---
garam          : 0
gula           : 0
kalori         : 110
karbo          : 0
lemak          : 156
protein        : 0
serving        : 1
takaran-satuan : 0
serat          : 0


## Total Nutrition

In [14]:
def calculate_total_nutrients(cleaned_data: dict) -> dict:

    # 1. Pastikan nilai 'serving' ada dan bukan nol.
    # Jika serving tidak ada (misalnya karena gagal dideteksi), diasumsikan 1.
    serving_value = cleaned_data.get('serving', 1)
    if serving_value == 0:
        serving_value = 1 # Menghindari perkalian dengan nol yang akan menghasilkan nol

    # Kunci yang ingin dihitung totalnya (hanya nutrisi)
    NUTRIENT_KEYS = [
        "garam",
        "gula",
        "kalori",
        "karbo",
        "lemak",
        "protein",
        "serat"
    ]

    final_nutrients = {}

    # Kalkulasi
    for key in NUTRIENT_KEYS:
        # Ambil nilai nutrisi, jika tidak terdeteksi, value = 0
        nutrient_value = cleaned_data.get(key, 0)

        # Hitung Total Nutrisi per Kemasan
        total_value = nutrient_value * serving_value

        # Simpan hasil dalam dictionary final
        final_nutrients[key] = total_value


    return final_nutrients

In [15]:
# Panggil fungsi kalkulasi
total_nutrients = calculate_total_nutrients(final_cleaned_data)

# Mencetak hasil
print("\n--- Total Nutrisi Per Kemasan ---")
for key, value in total_nutrients.items():
    print(f"{key:<10}: {value}")


--- Total Nutrisi Per Kemasan ---
garam     : 0
gula      : 0
kalori    : 110
karbo     : 0
lemak     : 156
protein   : 0
serat     : 0


## Nutri-Score

In [16]:
def calculate_nutri_score_simplified(cleaned_data: dict) -> str:
    """
    Menghitung Nutri-Score (A hingga E) untuk makanan padat.
    Mengacu pada pedoman resmi Nutri-Score (EU/Singapura).

    Args:
        cleaned_data (dict): Data nutrisi per sajian.
            Wajib mengandung kunci:
            'kalori' (kkal), 'gula' (g), 'lemak' (g),
            'natrium' (mg), 'protein' (g), 'serat' (g), 'takaran-satuan' (g)

    Returns:
        str: Huruf Nutri-Score (A, B, C, D, atau E)
    """

    #  Normalisasi ke per 100g
    serving_weight_g = cleaned_data.get('takaran-satuan', 100)
    if serving_weight_g <= 0:
        serving_weight_g = 100

    factor = 100 / serving_weight_g

    norm_data = {}
    for key in ["kalori", "gula", "lemak", "natrium", "protein", "serat"]:
        value = cleaned_data.get(key, 0)
        norm_data[key] = value * factor

    #  POIN A (negatif): energi, gula, lemak jenuh, natrium
    A_score = 0

    # Energi (kcal → kJ)
    energi_kj = norm_data.get('kalori', 0) * 4.184
    gula_g = norm_data.get('gula', 0)
    lemak_g = norm_data.get('lemak', 0)
    natrium_mg = norm_data.get('natrium', 0)

    # Energi (kJ/100g)
    if energi_kj > 3350: A_score += 10
    elif energi_kj > 3015: A_score += 9
    elif energi_kj > 2680: A_score += 8
    elif energi_kj > 2345: A_score += 7
    elif energi_kj > 2010: A_score += 6
    elif energi_kj > 1675: A_score += 5
    elif energi_kj > 1340: A_score += 4
    elif energi_kj > 1005: A_score += 3
    elif energi_kj > 670: A_score += 2
    elif energi_kj > 335: A_score += 1

    # Gula total (g/100g)
    if gula_g > 45: A_score += 10
    elif gula_g > 40: A_score += 9
    elif gula_g > 36: A_score += 8
    elif gula_g > 31: A_score += 7
    elif gula_g > 27: A_score += 6
    elif gula_g > 22.5: A_score += 5
    elif gula_g > 18: A_score += 4
    elif gula_g > 13.5: A_score += 3
    elif gula_g > 9: A_score += 2
    elif gula_g > 4.5: A_score += 1

    # Lemak jenuh (g/100g)
    if lemak_g > 10: A_score += 10
    elif lemak_g > 9: A_score += 9
    elif lemak_g > 8: A_score += 8
    elif lemak_g > 7: A_score += 7
    elif lemak_g > 6: A_score += 6
    elif lemak_g > 5: A_score += 5
    elif lemak_g > 4: A_score += 4
    elif lemak_g > 3: A_score += 3
    elif lemak_g > 2: A_score += 2
    elif lemak_g > 1: A_score += 1

    # Natrium (mg/100g)
    if natrium_mg > 900: A_score += 10
    elif natrium_mg > 810: A_score += 9
    elif natrium_mg > 720: A_score += 8
    elif natrium_mg > 630: A_score += 7
    elif natrium_mg > 540: A_score += 6
    elif natrium_mg > 450: A_score += 5
    elif natrium_mg > 360: A_score += 4
    elif natrium_mg > 270: A_score += 3
    elif natrium_mg > 180: A_score += 2
    elif natrium_mg > 90: A_score += 1

    #  POIN C (positif): serat, protein
    C_score = 0
    serat_g = norm_data.get('serat', 0)
    protein_g = norm_data.get('protein', 0)

    # Serat (g/100g)
    if serat_g >= 4.7: C_score += 5
    elif serat_g >= 3.7: C_score += 4
    elif serat_g >= 2.8: C_score += 3
    elif serat_g >= 1.9: C_score += 2
    elif serat_g >= 0.9: C_score += 1

    # Protein (g/100g)
    if protein_g >= 8.0: C_score += 5
    elif protein_g >= 6.4: C_score += 4
    elif protein_g >= 4.8: C_score += 3
    elif protein_g >= 3.2: C_score += 2
    elif protein_g >= 1.6: C_score += 1

    #  Skor akhir dan konversi ke huruf
    final_score = A_score - C_score

    if final_score <= -1:
        return "A"
    elif final_score <= 2:
        return "B"
    elif final_score <= 10:
        return "C"
    elif final_score <= 18:
        return "D"
    else:
        return "E"


In [17]:


nutri_score = calculate_nutri_score_simplified(final_cleaned_data)

# print(f"Data Per Porsi: {contoh_data_bersih}")
# print(f"Berat Sajian: {contoh_data_bersih['takaran-satuan']} g")
print(f"\nNutri-Score Hasil Simplifikasi: {nutri_score}")


Nutri-Score Hasil Simplifikasi: D


# **INISIASI VARIABEL UNTUK DATABASE**

In [18]:
total_nutrients

{'garam': 0,
 'gula': 0,
 'kalori': 110,
 'karbo': 0,
 'lemak': 156,
 'protein': 0,
 'serat': 0}

In [19]:
# variabel-variabel yang siap masuk dalam database

garam, gula, kalori, karbo, lemak, protein, serat = total_nutrients.values()
nutri_score

'D'

In [20]:
print(f'nutri-score: {nutri_score}\ngaram: {garam}\ngula: {gula}\nkalori: {kalori}\nkarbohidrat: {karbo}\nlemak: {lemak}\nprotein: {protein}\nserat: {serat}')

nutri-score: D
garam: 0
gula: 0
kalori: 110
karbohidrat: 0
lemak: 156
protein: 0
serat: 0


# **Menyimpan library yang dibutuhkan**

In [21]:
!pip freeze | grep -E 'ultralytics|torch|opencv-python|pandas|scikit-learn|pytesseract|Pillow|numpy|matplotlib|seaborn|tqdm' > requirements.txt