In [1]:
# %pip install azure-cognitiveservices-vision-customvision kagglehub matplotlib
import os, time, uuid
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from msrest.authentication import ApiKeyCredentials

In [2]:
import kagglehub
import os

# Download dataset
path = kagglehub.dataset_download("elinteerie/nigeria-food-ai-dataset")
print("Downloaded to:", path)
print("Contents:", os.listdir(path))


  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/elinteerie/nigeria-food-ai-dataset?dataset_version_number=3...


100%|████████████████████████████████████████| 843M/843M [05:02<00:00, 2.92MB/s]

Extracting files...





Downloaded to: /Users/mac/.cache/kagglehub/datasets/elinteerie/nigeria-food-ai-dataset/versions/3
Contents: ['1HZhs21IE2oNn_V7PF_atNDizdhV8z-vP']


The dataset was in a binary format,it has to be extracted to view the content


In [3]:
import zipfile
import shutil
import os

# path from kagglehub
dataset_file = os.path.join(path, "1HZhs21IE2oNn_V7PF_atNDizdhV8z-vP")

# Give it a temporary .zip extension if it doesn't have one
alt_file = dataset_file + ".zip"
shutil.copy(dataset_file, alt_file)

# Extract to a folder
extract_dir = os.path.join(path, "unzipped_dataset")

if zipfile.is_zipfile(alt_file):
    with zipfile.ZipFile(alt_file, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
        print("Unzipped dataset to:", extract_dir)
else:
    print("File was not a zip archive — might be nested differently.")

# Check what we got
print("Extracted contents:", os.listdir(extract_dir))


Unzipped dataset to: /Users/mac/.cache/kagglehub/datasets/elinteerie/nigeria-food-ai-dataset/versions/3/unzipped_dataset
Extracted contents: ['nigfoodai']


In [4]:

inner_path = os.path.join(path, "unzipped_dataset", "nigfoodai")
print("Inspecting inner contents:")
print(os.listdir(inner_path))


Inspecting inner contents:
['jellof', 'bitterleaf', 'okra', 'edikakong', 'moimoi', 'egusi', 'ogbono', 'pufpuf', 'banga', 'nkwobi', 'akarabread', 'garriandgrounut', 'ofeowerri', 'ewedu']


In [5]:
base_images_dir = inner_path
for folder in os.listdir(base_images_dir):
    folder_path = os.path.join(base_images_dir, folder)
    if os.path.isdir(folder_path):
        imgs = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        print(f"{folder}: {len(imgs)} images")


jellof: 455 images
bitterleaf: 49 images
okra: 549 images
edikakong: 107 images
moimoi: 228 images
egusi: 550 images
ogbono: 621 images
pufpuf: 197 images
banga: 49 images
nkwobi: 52 images
akarabread: 190 images
garriandgrounut: 97 images
ofeowerri: 58 images
ewedu: 247 images


In [7]:
from dotenv import load_dotenv
load_dotenv()

True

In [20]:
ENDPOINT = os.getenv("VISION_TRAINING_ENDPOINT")
training_key = os.getenv("VISION_TRAINING_KEY")
PRED_ENDPOINT = os.getenv("VISION_PREDICTION_ENDPOINT")
prediction_key = os.getenv("VISION_PREDICTION_KEY")
prediction_resource_id = os.getenv("VISION_PREDICTION_RESOURCE_ID")

# Authenticate
training_credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
prediction_credentials = ApiKeyCredentials(in_headers={"Prediction-key": prediction_key})

trainer = CustomVisionTrainingClient(ENDPOINT, training_credentials)
predictor = CustomVisionPredictionClient(PRED_ENDPOINT, prediction_credentials)


In [9]:
print("Creating project...")
project_name = f"FoodImageClassifier-{uuid.uuid4()}"
project = trainer.create_project(project_name)
print("Project created:", project.name)


Creating project...
Project created: FoodImageClassifier-091ba31a-1a64-4605-83d3-13b54cf77a6a


In [10]:
import os, math, json, time
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry

BATCH_SIZE = 64
PROGRESS_FILE = "upload_progress.json"

# load checkpoint if any
if os.path.exists(PROGRESS_FILE):
    with open(PROGRESS_FILE, "r") as f:
        uploaded = json.load(f)
else:
    uploaded = {}

def mark_uploaded(label, filenames):
    uploaded.setdefault(label, [])
    uploaded[label].extend(filenames)
    with open(PROGRESS_FILE, "w") as f:
        json.dump(uploaded, f)

for label in os.listdir(base_images_dir):
    folder = os.path.join(base_images_dir, label)
    if not os.path.isdir(folder):
        continue

    # skip labels already finished
    if label in uploaded and uploaded[label] == "DONE":
        print(f"{label}: already completed.")
        continue

    # find or create tag
    existing_tags = {t.name: t for t in trainer.get_tags(project.id)}
    tag = existing_tags.get(label) or trainer.create_tag(project.id, label)
    print(f"Uploading for label: {label}")

    images = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg','.jpeg','.png'))]
    already = set(uploaded.get(label, []))
    remaining = [img for img in images if img not in already]
    total_batches = math.ceil(len(remaining) / BATCH_SIZE)

    for i in range(total_batches):
        batch_files = remaining[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        entries = []
        for img_file in batch_files:
            try:
                img_path = os.path.join(folder, img_file)
                with open(img_path, "rb") as f:
                    entries.append(ImageFileCreateEntry(name=img_file, contents=f.read(), tag_ids=[tag.id]))
            except Exception as e:
                print(f"Skipped {img_file}: {e}")

        if not entries:
            continue

        try:
            result = trainer.create_images_from_files(project.id, ImageFileCreateBatch(images=entries))
            if result.is_batch_successful:
                mark_uploaded(label, batch_files)
                print(f"  Batch {i+1}/{total_batches}: uploaded {len(batch_files)}")
            else:
                print(f"  Batch {i+1}/{total_batches}: failed — retry later.")
        except Exception as e:
            print(f"Batch {i+1}/{total_batches} failed: {e}")
            break  # break to avoid hammering Azure API

    # mark label done
    uploaded[label] = "DONE"
    with open(PROGRESS_FILE, "w") as f:
        json.dump(uploaded, f)

    print(f"Completed: {label}\n")
    time.sleep(1)  # small pause to stay under rate limits


Uploading for label: jellof
  Batch 1/8: uploaded 64
  Batch 2/8: failed — retry later.
  Batch 3/8: failed — retry later.
  Batch 4/8: failed — retry later.
  Batch 5/8: failed — retry later.
  Batch 6/8: failed — retry later.
  Batch 7/8: failed — retry later.
  Batch 8/8: uploaded 7
Completed: jellof

Uploading for label: bitterleaf
  Batch 1/1: uploaded 49
Completed: bitterleaf

Uploading for label: okra
  Batch 1/9: uploaded 64
  Batch 2/9: failed — retry later.
  Batch 3/9: failed — retry later.
  Batch 4/9: failed — retry later.
  Batch 5/9: failed — retry later.
  Batch 6/9: failed — retry later.
  Batch 7/9: failed — retry later.
  Batch 8/9: failed — retry later.
  Batch 9/9: failed — retry later.
Completed: okra

Uploading for label: edikakong
  Batch 1/2: uploaded 64
  Batch 2/2: uploaded 43
Completed: edikakong

Uploading for label: moimoi
  Batch 1/4: failed — retry later.
  Batch 2/4: failed — retry later.
  Batch 3/4: failed — retry later.
  Batch 4/4: failed — retry la

In [11]:
tags = trainer.get_tags(project.id)
for tag in tags:
    print(f"{tag.name}: {tag.image_count} images")


garriandgrounut: 94 images
nkwobi: 50 images
moimoi: 219 images
jellof: 428 images
ewedu: 140 images
egusi: 531 images
ofeowerri: 56 images
akarabread: 186 images
edikakong: 107 images
pufpuf: 124 images
okra: 524 images
banga: 49 images
bitterleaf: 49 images
ogbono: 595 images


In [12]:
print("Starting training...")
iteration = trainer.train_project(project.id)

while iteration.status != "Completed":
    time.sleep(10)
    iteration = trainer.get_iteration(project.id, iteration.id)
    print("Training status:", iteration.status)

print("Training completed successfully!")


Starting training...
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: 

In [13]:
publish_iteration_name = "Naija-Food-Classifier-Model"

trainer.publish_iteration(
    project.id,
    iteration.id,
    publish_iteration_name,
    prediction_resource_id
)

print("Published iteration:", publish_iteration_name)


Published iteration: Naija-Food-Classifier-Model


In [26]:
# Testing with a sample image
sample_image_path = "../src/food_classifier/test_images/image.jpg"

with open(sample_image_path, "rb") as image_data:
    results = predictor.classify_image(
        project.id,
        publish_iteration_name,
        image_data.read()
    )

for prediction in results.predictions:
    print(f"{prediction.tag_name}: {prediction.probability * 100:.2f}%")


akarabread: 98.43%
garriandgrounut: 2.23%
pufpuf: 2.07%
moimoi: 1.64%
egusi: 1.35%
okra: 0.46%
ogbono: 0.31%
ewedu: 0.21%
ofeowerri: 0.17%
edikakong: 0.13%
jellof: 0.10%
nkwobi: 0.07%
bitterleaf: 0.05%
banga: 0.02%


In [27]:
#Testing with multiple images
test_path = "../src/food_classifier/test_images"

# If directory, pick one or loop through all
if os.path.isdir(test_path):
    files = [f for f in os.listdir(test_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    if not files:
        raise FileNotFoundError(f"No image files found in directory: {test_path}")
else:
    files = [os.path.basename(test_path)]
    test_path = os.path.dirname(test_path)

# Loop through each image file
for filename in files:
    img_path = os.path.join(test_path, filename)
    with open(img_path, "rb") as image_data:
        image_bytes = image_data.read()
        results = predictor.classify_image(
            project.id,
            publish_iteration_name,
            image_bytes
        )

    print(f"\n{filename}")
    for prediction in results.predictions[:3]:
        print(f"  {prediction.tag_name}: {prediction.probability * 100:.2f}%")



Ogbono-soup-Draw-Soup-IG-1-500x500.jpg
  ogbono: 89.59%
  banga: 32.24%
  ofeowerri: 13.85%

Amala-And-Ewedu.jpeg
  ewedu: 95.19%
  ogbono: 4.62%
  egusi: 4.37%

image.jpg
  akarabread: 98.43%
  garriandgrounut: 2.23%
  pufpuf: 2.07%


## **Documentations**

---

# Azure Custom Vision Food Image Classifier  
**Contributor:** *Robiu Olalere / Algebra101*  

---

## Overview  
This notebook demonstrates how to build a **food image classifier** using **Azure Custom Vision** and a **Kaggle dataset of Nigerian foods**.  
The objective was to automate dataset upload, train a multiclass classifier, and expose prediction endpoints for local inference.

---

## Dataset  
- **Source:** [Nigeria Food AI Dataset (Kaggle)](https://www.kaggle.com/datasets/elinteerie/nigeria-food-ai-dataset)  
- The dataset is stored in a **binary format**, so it must be **extracted** before Azure Custom Vision recognizes the images.  
- There is noticeable **class imbalance**, which slightly reduced recall for underrepresented classes.  
- Example: foods with fewer samples showed recall below 50%, while classes with more samples achieved higher precision and recall.

---

## Data Upload  
A custom uploader script was implemented to handle robust dataset uploading:

- Handles extraction from KaggleHub cache.  
- Uploads images in **batches of 64** (Azure’s upload limit).  
- Supports **automatic resume** after network interruptions using a local progress checkpoint file.  
- Total upload time: **≈ 29 minutes 30 seconds**.  

---

## Model Training  
- **Project type:** Classification (Multiclass)  
- **Training duration:** 12 minutes 32 seconds  

**Performance (Threshold = 50%)**
| Metric | Score |
|:--|:--|
| Precision | 92.2% |
| Recall | 74.7% |
| Average Precision (AP) | 91.2% |

**Observations**
- High-sample classes → stronger precision and recall.  
- Some smaller classes still performed well despite low image counts.

---

## API Keys and Environment  
During experimentation, it was observed that:
- If the initial API key fails or expires, a **manual override** in the code may be required even after updating the `.env` file.  
- The guide includes steps for resetting and re-authenticating both **Training** and **Prediction** clients programmatically.  

---

## Prediction Endpoints  
**Image URL Endpoint:**  


[https://myclassifier-prediction.cognitiveservices.azure.com/customvision/v3.0/Prediction/cbb3b58c-7d5e-4436-9ab3-5af77ee3f197/classify/iterations/FoodClassifierModel/url](https://myclassifier-prediction.cognitiveservices.azure.com/customvision/v3.0/Prediction/cbb3b58c-7d5e-4436-9ab3-5af77ee3f197/classify/iterations/FoodClassifierModel/url)


**Image File Endpoint:**  


[https://myclassifier-prediction.cognitiveservices.azure.com/customvision/v3.0/Prediction/cbb3b58c-7d5e-4436-9ab3-5af77ee3f197/classify/iterations/FoodClassifierModel/image](https://myclassifier-prediction.cognitiveservices.azure.com/customvision/v3.0/Prediction/cbb3b58c-7d5e-4436-9ab3-5af77ee3f197/classify/iterations/FoodClassifierModel/image)



---

## Example Inference (Python)

```python
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from msrest.authentication import ApiKeyCredentials

# Replace with your own values
PREDICTION_KEY = "<your_prediction_key>"
ENDPOINT = "<your_endpoint>"
PROJECT_ID = "<your_project_id>"
ITERATION_NAME = "FoodClassifierModel"

credentials = ApiKeyCredentials(in_headers={"Prediction-key": PREDICTION_KEY})
predictor = CustomVisionPredictionClient(ENDPOINT, credentials)

with open("test_image.jpg", "rb") as image_data:
    results = predictor.classify_image(PROJECT_ID, ITERATION_NAME, image_data.read())

for prediction in results.predictions:
    print(f"{prediction.tag_name}: {prediction.probability * 100:.2f}%")
````

---

## Notes and Recommendations

* The class imbalance could be addressed by **oversampling** or **data augmentation** for smaller categories.
* Future contributors can experiment with:

  * **Advanced Training** mode in Azure Custom Vision
  * **AutoML** for additional optimization
* The notebook and documentation serve as a reproducible baseline for food image classification tasks in Azure Custom Vision.

---



