In [38]:
%pip install azure-cognitiveservices-vision-customvision kagglehub matplotlib
import os, time, uuid
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from msrest.authentication import ApiKeyCredentials



Note: you may need to restart the kernel to use updated packages.


In [8]:
import kagglehub
import os

# Download dataset
path = kagglehub.dataset_download("elinteerie/nigeria-food-ai-dataset")
print("Downloaded to:", path)
print("Contents:", os.listdir(path))


Downloaded to: C:\Users\OLALERE\.cache\kagglehub\datasets\elinteerie\nigeria-food-ai-dataset\versions\3
Contents: ['1HZhs21IE2oNn_V7PF_atNDizdhV8z-vP', '1HZhs21IE2oNn_V7PF_atNDizdhV8z-vP.zip', 'unzipped_dataset']


The dataset was in a binary format,it has to be extracted to view the content


In [9]:
import zipfile
import shutil
import os

# path from kagglehub
dataset_file = os.path.join(path, "1HZhs21IE2oNn_V7PF_atNDizdhV8z-vP")

# Give it a temporary .zip extension if it doesn't have one
alt_file = dataset_file + ".zip"
shutil.copy(dataset_file, alt_file)

# Extract to a folder
extract_dir = os.path.join(path, "unzipped_dataset")

if zipfile.is_zipfile(alt_file):
    with zipfile.ZipFile(alt_file, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
        print("Unzipped dataset to:", extract_dir)
else:
    print("File was not a zip archive — might be nested differently.")

# Check what we got
print("Extracted contents:", os.listdir(extract_dir))


Unzipped dataset to: C:\Users\OLALERE\.cache\kagglehub\datasets\elinteerie\nigeria-food-ai-dataset\versions\3\unzipped_dataset
Extracted contents: ['nigfoodai']


In [None]:

inner_path = os.path.join(path, "unzipped_dataset", "nigfoodai")
print("Inspecting inner contents:")
print(os.listdir(inner_path))


Inspecting inner contents:
['akarabread', 'banga', 'bitterleaf', 'edikakong', 'egusi', 'ewedu', 'garriandgrounut', 'jellof', 'moimoi', 'nkwobi', 'ofeowerri', 'ogbono', 'okra', 'pufpuf']


In [11]:
base_images_dir = inner_path
for folder in os.listdir(base_images_dir):
    folder_path = os.path.join(base_images_dir, folder)
    if os.path.isdir(folder_path):
        imgs = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        print(f"{folder}: {len(imgs)} images")


akarabread: 190 images
banga: 49 images
bitterleaf: 49 images
edikakong: 107 images
egusi: 550 images
ewedu: 247 images
garriandgrounut: 97 images
jellof: 455 images
moimoi: 228 images
nkwobi: 52 images
ofeowerri: 58 images
ogbono: 621 images
okra: 549 images
pufpuf: 197 images


In [12]:
ENDPOINT = os.environ["VISION_TRAINING_ENDPOINT"]
training_key = os.environ["VISION_TRAINING_KEY"]
prediction_key = os.environ["VISION_PREDICTION_KEY"]
prediction_resource_id = os.environ["VISION_PREDICTION_RESOURCE_ID"]

# Authenticate
training_credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
prediction_credentials = ApiKeyCredentials(in_headers={"Prediction-key": prediction_key})

trainer = CustomVisionTrainingClient(ENDPOINT, training_credentials)
predictor = CustomVisionPredictionClient(ENDPOINT, prediction_credentials)


In [13]:
print("Creating project...")
project_name = f"FoodImageClassifier-{uuid.uuid4()}"
project = trainer.create_project(project_name)
print("Project created:", project.name)


Creating project...
Project created: FoodImageClassifier-bb0f7e9d-c4ba-44cd-a9fb-74d35e963c1f


In [14]:
import os, math, json, time
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry

BATCH_SIZE = 64
PROGRESS_FILE = "upload_progress.json"

# load checkpoint if any
if os.path.exists(PROGRESS_FILE):
    with open(PROGRESS_FILE, "r") as f:
        uploaded = json.load(f)
else:
    uploaded = {}

def mark_uploaded(label, filenames):
    uploaded.setdefault(label, [])
    uploaded[label].extend(filenames)
    with open(PROGRESS_FILE, "w") as f:
        json.dump(uploaded, f)

for label in os.listdir(base_images_dir):
    folder = os.path.join(base_images_dir, label)
    if not os.path.isdir(folder):
        continue

    # skip labels already finished
    if label in uploaded and uploaded[label] == "DONE":
        print(f"{label}: already completed.")
        continue

    # find or create tag
    existing_tags = {t.name: t for t in trainer.get_tags(project.id)}
    tag = existing_tags.get(label) or trainer.create_tag(project.id, label)
    print(f"Uploading for label: {label}")

    images = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg','.jpeg','.png'))]
    already = set(uploaded.get(label, []))
    remaining = [img for img in images if img not in already]
    total_batches = math.ceil(len(remaining) / BATCH_SIZE)

    for i in range(total_batches):
        batch_files = remaining[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        entries = []
        for img_file in batch_files:
            try:
                img_path = os.path.join(folder, img_file)
                with open(img_path, "rb") as f:
                    entries.append(ImageFileCreateEntry(name=img_file, contents=f.read(), tag_ids=[tag.id]))
            except Exception as e:
                print(f"Skipped {img_file}: {e}")

        if not entries:
            continue

        try:
            result = trainer.create_images_from_files(project.id, ImageFileCreateBatch(images=entries))
            if result.is_batch_successful:
                mark_uploaded(label, batch_files)
                print(f"  Batch {i+1}/{total_batches}: uploaded {len(batch_files)}")
            else:
                print(f"  Batch {i+1}/{total_batches}: failed — retry later.")
        except Exception as e:
            print(f"Batch {i+1}/{total_batches} failed: {e}")
            break  # break to avoid hammering Azure API

    # mark label done
    uploaded[label] = "DONE"
    with open(PROGRESS_FILE, "w") as f:
        json.dump(uploaded, f)

    print(f"Completed: {label}\n")
    time.sleep(1)  # small pause to stay under rate limits


Uploading for label: akarabread
  Batch 1/3: uploaded 64
  Batch 2/3: failed — retry later.
  Batch 3/3: failed — retry later.
Completed: akarabread

Uploading for label: banga
  Batch 1/1: uploaded 49
Completed: banga

Uploading for label: bitterleaf
  Batch 1/1: uploaded 49
Completed: bitterleaf

Uploading for label: edikakong
  Batch 1/2: uploaded 64
  Batch 2/2: failed — retry later.
Completed: edikakong

Uploading for label: egusi
  Batch 1/9: uploaded 64
  Batch 2/9: failed — retry later.
  Batch 3/9: failed — retry later.
  Batch 4/9: failed — retry later.
  Batch 5/9: failed — retry later.
  Batch 6/9: failed — retry later.
  Batch 7/9: failed — retry later.
  Batch 8/9: failed — retry later.
  Batch 9/9: uploaded 38
Completed: egusi

Uploading for label: ewedu
  Batch 1/4: failed — retry later.
  Batch 2/4: failed — retry later.
  Batch 3/4: failed — retry later.
  Batch 4/4: failed — retry later.
Completed: ewedu

Uploading for label: garriandgrounut
  Batch 1/2: failed — ret

In [15]:
tags = trainer.get_tags(project.id)
for tag in tags:
    print(f"{tag.name}: {tag.image_count} images")


okra: 524 images
ogbono: 595 images
moimoi: 219 images
garriandgrounut: 94 images
edikakong: 107 images
akarabread: 186 images
pufpuf: 187 images
ewedu: 228 images
ofeowerri: 56 images
bitterleaf: 49 images
nkwobi: 50 images
jellof: 428 images
egusi: 531 images
banga: 49 images


In [16]:
print("Starting training...")
iteration = trainer.train_project(project.id)

while iteration.status != "Completed":
    time.sleep(10)
    iteration = trainer.get_iteration(project.id, iteration.id)
    print("Training status:", iteration.status)

print("Training completed successfully!")


Starting training...
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: 

In [20]:
publish_iteration_name = "FoodClassifierModel"

trainer.publish_iteration(
    project.id,
    iteration.id,
    publish_iteration_name,
    prediction_resource_id
)

print("Published iteration:", publish_iteration_name)


CustomVisionErrorException: Iteration is already published as: FoodClassifierModel

In [None]:
# Testing with a sample image
sample_image_path = r"C:\Users\OLALERE\Downloads\Testimages\IMG-20251027-WA0008.jpg"

with open(sample_image_path, "rb") as image_data:
    results = predictor.classify_image(
        project.id,
        publish_iteration_name,
        image_data.read()
    )

for prediction in results.predictions:
    print(f"{prediction.tag_name}: {prediction.probability * 100:.2f}%")


ofeowerri: 67.07%
ogbono: 49.80%
bitterleaf: 22.95%
banga: 4.91%
egusi: 4.46%
nkwobi: 0.84%
okra: 0.47%
edikakong: 0.30%
jellof: 0.20%
moimoi: 0.16%
ewedu: 0.15%
akarabread: 0.05%
garriandgrounut: 0.03%
pufpuf: 0.01%


In [None]:
#Testing with multiple images
test_path = r"C:\Users\OLALERE\Downloads\Testimages"

# If directory, pick one or loop through all
if os.path.isdir(test_path):
    files = [f for f in os.listdir(test_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    if not files:
        raise FileNotFoundError(f"No image files found in directory: {test_path}")
else:
    files = [os.path.basename(test_path)]
    test_path = os.path.dirname(test_path)

# Loop through each image file
for filename in files:
    img_path = os.path.join(test_path, filename)
    with open(img_path, "rb") as image_data:
        image_bytes = image_data.read()
        results = predictor.classify_image(
            project.id,
            publish_iteration_name,
            image_bytes
        )

    print(f"\n{filename}")
    for prediction in results.predictions[:3]:
        print(f"  {prediction.tag_name}: {prediction.probability * 100:.2f}%")



IMG-20251027-WA0004.jpg
  moimoi: 98.34%
  ewedu: 31.11%
  akarabread: 5.79%

IMG-20251027-WA0005.jpg
  moimoi: 90.42%
  nkwobi: 35.60%
  egusi: 5.30%

IMG-20251027-WA0006.jpg
  ogbono: 76.98%
  banga: 12.87%
  nkwobi: 5.56%

IMG-20251027-WA0007.jpg
  pufpuf: 98.91%
  ogbono: 2.89%
  bitterleaf: 2.25%

IMG-20251027-WA0008.jpg
  ofeowerri: 67.07%
  ogbono: 49.80%
  bitterleaf: 22.95%

IMG-20251027-WA0009.jpg
  pufpuf: 99.40%
  ogbono: 3.05%
  akarabread: 1.88%

IMG-20251027-WA0010.jpg
  ogbono: 28.39%
  banga: 27.29%
  egusi: 7.29%

IMG-20251027-WA0011.jpg
  ewedu: 96.21%
  edikakong: 9.88%
  egusi: 4.36%

IMG-20251027-WA0012.jpg
  jellof: 99.69%
  egusi: 7.36%
  banga: 2.22%

IMG-20251027-WA0013.jpg
  nkwobi: 95.16%
  okra: 5.30%
  jellof: 3.07%

IMG-20251027-WA0014.jpg
  garriandgrounut: 98.68%
  okra: 2.01%
  akarabread: 1.78%

IMG-20251027-WA0015.jpg
  nkwobi: 92.42%
  banga: 13.16%
  ogbono: 4.98%

IMG-20251027-WA0016.jpg
  nkwobi: 99.30%
  moimoi: 3.36%
  jellof: 3.20%

IMG-20251

## **Documentations**

---

```markdown
# Azure Custom Vision Food Image Classifier  
**Contributor:** *Robiu Olalere / Algebra101*  

---

## Overview  
This notebook demonstrates how to build a **food image classifier** using **Azure Custom Vision** and a **Kaggle dataset of Nigerian foods**.  
The objective was to automate dataset upload, train a multiclass classifier, and expose prediction endpoints for local inference.

---

## Dataset  
- **Source:** [Nigeria Food AI Dataset (Kaggle)](https://www.kaggle.com/datasets/elinteerie/nigeria-food-ai-dataset)  
- The dataset is stored in a **binary format**, so it must be **extracted** before Azure Custom Vision recognizes the images.  
- There is noticeable **class imbalance**, which slightly reduced recall for underrepresented classes.  
- Example: foods with fewer samples showed recall below 50%, while classes with more samples achieved higher precision and recall.

---

## Data Upload  
A custom uploader script was implemented to handle robust dataset uploading:

- Handles extraction from KaggleHub cache.  
- Uploads images in **batches of 64** (Azure’s upload limit).  
- Supports **automatic resume** after network interruptions using a local progress checkpoint file.  
- Total upload time: **≈ 29 minutes 30 seconds**.  

---

## Model Training  
- **Project type:** Classification (Multiclass)  
- **Training duration:** 12 minutes 32 seconds  

**Performance (Threshold = 50%)**
| Metric | Score |
|:--|:--|
| Precision | 92.2% |
| Recall | 74.7% |
| Average Precision (AP) | 91.2% |

**Observations**
- High-sample classes → stronger precision and recall.  
- Some smaller classes still performed well despite low image counts.

---

## API Keys and Environment  
During experimentation, it was observed that:
- If the initial API key fails or expires, a **manual override** in the code may be required even after updating the `.env` file.  
- The guide includes steps for resetting and re-authenticating both **Training** and **Prediction** clients programmatically.  

---

## Prediction Endpoints  
**Image URL Endpoint:**  
```

[https://myclassifier-prediction.cognitiveservices.azure.com/customvision/v3.0/Prediction/cbb3b58c-7d5e-4436-9ab3-5af77ee3f197/classify/iterations/FoodClassifierModel/url]
```

**Image File Endpoint:**  
```

[https://myclassifier-prediction.cognitiveservices.azure.com/customvision/v3.0/Prediction/cbb3b58c-7d5e-4436-9ab3-5af77ee3f197/classify/iterations/FoodClassifierModel/image]

````

---

## Example Inference (Python)
```python
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from msrest.authentication import ApiKeyCredentials

# Replace with your own values
PREDICTION_KEY = "<your_prediction_key>"
ENDPOINT = "<your_endpoint>"
PROJECT_ID = "<your_project_id>"
ITERATION_NAME = "FoodClassifierModel"

credentials = ApiKeyCredentials(in_headers={"Prediction-key": PREDICTION_KEY})
predictor = CustomVisionPredictionClient(ENDPOINT, credentials)

with open("test_image.jpg", "rb") as image_data:
    results = predictor.classify_image(PROJECT_ID, ITERATION_NAME, image_data.read())

for prediction in results.predictions:
    print(f"{prediction.tag_name}: {prediction.probability * 100:.2f}%")
````

---

## Notes and Recommendations

* The class imbalance could be addressed by **oversampling** or **data augmentation** for smaller categories.
* Future contributors can experiment with:

  * **Advanced Training** mode in Azure Custom Vision
  * **AutoML** for additional optimization
* The notebook and documentation serve as a reproducible baseline for food image classification tasks in Azure Custom Vision.

---

```

