# Data augmentation and tensorflow download

In [4]:
!pip install rarfile

Collecting rarfile
  Downloading rarfile-4.2-py3-none-any.whl.metadata (4.4 kB)
Downloading rarfile-4.2-py3-none-any.whl (29 kB)
Installing collected packages: rarfile
Successfully installed rarfile-4.2


In [5]:
import rarfile
import os

rar_path = "/content/drive/MyDrive/Motmaen/food photos.rar"
extract_path = "/content/drive/MyDrive/Motmaen/food_photos"
with rarfile.RarFile(rar_path) as rf:
    rf.extractall(path=extract_path)

print("Extraction complete!")


Extraction complete!


In [7]:
import os
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img, array_to_img
root = "/content/drive/MyDrive/Motmaen/food_photos/food photos"
# define augmentation generator
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

# define max images per folder type
target_counts = {
    'train': 64,
    'valid': 19,
    'test': 15
}


for dish in os.listdir(root):
    dish_path = os.path.join(root, dish)
    if not os.path.isdir(dish_path):
        continue

    # go through Train, Valid, Test
    for split in ['train', 'valid', 'test']:
        folder = os.path.join(dish_path, split)
        if not os.path.exists(folder):
            continue

        images = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg'))]
        count = len(images)
        target = target_counts[split]

        print(f"{dish} - {split}: {count}/{target}")

        # If already enough, skip
        if count >= target:
            continue

        # Otherwise, augment until target reached
        i = 0
        while count < target:
            img_name = random.choice(images)
            img_path = os.path.join(folder, img_name)
            img = load_img(img_path)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)

            # Create new augmented image
            for batch in datagen.flow(x, batch_size=1,
                                      save_to_dir=folder,
                                      save_prefix='aug',
                                      save_format='jpg'):
                count += 1
                i += 1
                if count >= target:
                    break

        print(f"Finished {dish}/{split} → {count} images\n")


Fattah - train: 53/64
Finished Fattah/train → 64 images

Fattah - valid: 15/19
Finished Fattah/valid → 19 images

Fattah - test: 14/15
Finished Fattah/test → 15 images

Fool - train: 46/64
Finished Fool/train → 64 images

Fool - valid: 15/19
Finished Fool/valid → 19 images

Fool - test: 12/15
Finished Fool/test → 15 images

Hawawshy - train: 12/64
Finished Hawawshy/train → 64 images

Hawawshy - valid: 17/19
Finished Hawawshy/valid → 19 images

Hawawshy - test: 56/15
Koshari - train: 60/64
Finished Koshari/train → 64 images

Koshari - valid: 18/19
Finished Koshari/valid → 19 images

Koshari - test: 15/15
Kunafa - train: 53/64
Finished Kunafa/train → 64 images

Kunafa - valid: 10/19
Finished Kunafa/valid → 19 images

Kunafa - test: 10/15
Finished Kunafa/test → 15 images

Mahshy El Kosa - train: 51/64
Finished Mahshy El Kosa/train → 64 images

Mahshy El Kosa - valid: 15/19
Finished Mahshy El Kosa/valid → 19 images

Mahshy El Kosa - test: 11/15
Finished Mahshy El Kosa/test → 15 images

Pta

In [16]:
import os

for dish in os.listdir(root):
    dish_path = os.path.join(root, dish)
    if dish in ['.env','.ipynb_checkpoints']:
      continue
    for split in ['train', 'valid', 'test']:
        folder = os.path.join(dish_path, split)
        print(split)
        images = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg'))]
        count = len(images)
        print(count)

train
64
valid
19
test
15
train
64
valid
19
test
15
train
64
valid
19
test
56
train
64
valid
19
test
15
train
64
valid
19
test
15
train
64
valid
19
test
15
train
64
valid
19
test
15
train
64
valid
19
test
15
train
64
valid
19
test
15
