# Exploring if we can get the "ground trouth" ingredients via barcodes 

- Install requirements: 
  ``` Brew install zbar ``` 


In [None]:
import requests
import cv2
import os
import pyzbar.pyzbar as pyzbar
import json

import yaml

In [None]:
data_dir = os.path.join(os.path.dirname(os.getcwd()), "data")

In [None]:
products = [entity for entity in os.listdir(os.path.join(data_dir, "raw")) if entity.startswith("product")]

In [None]:
def barcode_reader(image_path: str):

    img = cv2.imread(image_path)
    test_pyzbar = pyzbar.decode(img)
    if len(test_pyzbar) > 1:
        print(f"warning: found {len(test_pyzbar)} barcodes in picture: {image_path}")

    for barcode in pyzbar.decode(img):
        decoded = barcode.data.decode("utf-8")
        return decoded

In [None]:
found_counter = 0
for product in products:
    product_path = os.path.join(data_dir, "raw", product, "Barcode")

    # We take the first one for now, maybe we need to look at more photos
    image_name = os.listdir(os.path.join(data_dir, "raw", product, "Barcode"))[0]

    image_path = os.path.join(product_path, image_name)
    # print(image_path)
    barcode = barcode_reader(image_path)

    # print(barcode)
    r = requests.get(f"https://world.openfoodfacts.org/api/v0/product/{barcode}.json")

    result = json.loads(r.text)
    if result["status_verbose"] == "product not found":
        print(f"Product {barcode}, {product} not found, testing other API")
        r = requests.get(f"https://world.openbeautyfacts.org/api/v2/product/{barcode}.json")
        result = json.loads(r.text)
        if result["status_verbose"] == "product not found":
            print(f"Product {barcode}, {product} not found")
        else:
            found_counter += 1
            print(f"Product {product} found, barcode: {barcode}")
            save_dir = os.path.join(data_dir, "validation", product)
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            with open(os.path.join(save_dir, "openbeautyfacts_data.yaml"), "w") as file:
                data = yaml.dump(result, file)
    else:
        found_counter += 1
        print(f"Product {product} found, barcode: {barcode}")
        save_dir = os.path.join(data_dir, "validation", product)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        with open(os.path.join(save_dir, "openfoodfacts_data.yaml"), "w") as file:
            data = yaml.dump(result, file)

print(f"Found {found_counter}/{len(products)} products")

# Create barcode and image dataset, to get ground trouth 


In [None]:
output_dir = os.path.join(data_dir, "tmp")

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
save_barcodes = []
for product in products:
    print("Processing product", product)
    product_path = os.path.join(data_dir, "raw", product, "Barcode")

    # We take the first one for now, maybe we need to look at more photos
    image_name = os.listdir(os.path.join(data_dir, "raw", product, "Barcode"))[0]

    barcode_path = os.path.join(product_path, image_name)
    # print(image_path)
    barcode = barcode_reader(barcode_path)

    if barcode == None:
        print(f"Could not read barcode for {product}, skipping")
        continue
    # Get all images
    image_dir_path = os.path.join(data_dir, "raw", product, "Inhaltsstoffe")
    images = os.listdir(image_dir_path)

    if len(images) > 1:
        for i, image in enumerate(images):
            print("image: ", image)
            image_path = os.path.join(image_dir_path, image)
            img = cv2.imread(image_path)
            postfix = image.split(".")[-1]
            save_name = f"{barcode}_{i}.{postfix}"
            cv2.imwrite(os.path.join(output_dir, save_name), img)
    else:
        print("processing image ", images[0])
        image_path = os.path.join(image_dir_path, images[0])
        img = cv2.imread(image_path)
        postfix = images[0].split(".")[-1]
        save_name = f"{barcode}.{postfix}"
        cv2.imwrite(os.path.join(output_dir, save_name), img)

In [None]:
with open(os.path.join(os.getcwd(), "barcodes.csv"), "w") as text_file:
    text_file.write("\n".join(filter(None, save_barcodes)))

In [None]:
save_barcodes