# [1] Import Functions

## [1.1] Google Colab

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%run /content/drive/MyDrive/FYP/ImportFunctions.ipynb

## [1.2] Jupyter

In [None]:
%run ImportFunctions.ipynb

2024-03-22 13:23:40.940696: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-22 13:23:41.121791: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-03-22 13:23:41.121837: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-03-22 13:23:42.390625: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-

# [2] Import Libraries

In [None]:
# !pip install tqdm

In [None]:
import numpy as np
import pandas as pd
import requests
from PIL import Image
from tqdm import tqdm
from io import BytesIO

# [3] Prepare Dataset

In [None]:
def load_and_preprocess_images(df):
    # Initialize lists to store images and labels
    images = []
    labels = []

    # Use tqdm to show progress bar
    for _, row in tqdm(df.iterrows(), total=len(df)):
        image_url = row['image_url']
        label = row['scientific_name']

        response = requests.get(image_url)

        if response.status_code == 200:
            # Load the image using PIL
            img = Image.open(BytesIO(response.content))

            # Resize the image to a fixed size (e.g., 224x224)
            img = img.resize((224, 224))

            # Convert image to NumPy array
            img_array = np.array(img)

            # Check if image has 3 channels (RGB)
            if len(img_array.shape) == 3 and img_array.shape[2] == 3:
                # Normalize pixel values
                img_array = img_array / 255.0

                images.append(img_array)
                labels.append(label)
            else:
                print(f"Ignoring image with invalid shape: {img_array.shape}")

    # Convert lists to NumPy arrays
    images = np.array(images)
    labels = np.array(labels)

    return images, labels

In [None]:
# Load the dataset (Google Colab)
df = pd.read_csv('/content/drive/My Drive/FYP/dataset/all_removed_9labels.csv')

In [None]:
# Load the dataset (Jupyter)
df = pd.read_csv('dataset/all_removed_9labels.csv')

In [None]:
# Load images and labels
images, labels = load_and_preprocess_images(df)

100%|██████████| 941/941 [11:43<00:00,  1.34it/s]


In [None]:
images = resize_images(images, (299,299))

# [4] Save Dataset

## [4.1] Google Colab

In [None]:
np.save('/content/drive/My Drive/FYP/dataset/images.npy', images)
np.save('/content/drive/My Drive/FYP/dataset/labels.npy', labels)

## [4.2] Jupyter

In [None]:
np.save('dataset/images.npy', images)
np.save('dataset/labels.npy', labels)

# [5] Load Dataset

## [5.1] Google Colab

In [None]:
# Load images and labels
images = np.load('/content/drive/My Drive/FYP/dataset/images.npy')
labels = np.load('/content/drive/My Drive/FYP/dataset/labels.npy')

## [5.2] Jupyter

In [None]:
images = np.load('dataset/images.npy')
labels = np.load('dataset/labels.npy')

# [6] Data Augmentation

In [None]:
# Count the number of occurrences of each label
label_counts = Counter(labels)

# Display the counts in a table
display_label_counts(label_counts)

Label	Count
----------------
Abelmoschus esculentus	75
Abelmoschus moschatus	31
Abroma augustum	37
Acacia auriculiformis	153
Acacia mangium	111
Acalypha hispida	151
Acalypha indica	223
Acalypha siamensis	93
Acalypha wilkesiana	67


In [None]:
augmented_images, augmented_labels = augment_data(images, labels, label_counts)

Label	Count
----------------
Abelmoschus moschatus	93
Abroma augustum	111
Acalypha wilkesiana	201


In [None]:
augmented_images_combined, augmented_labels_combined = combine_augmented_data(images, labels, augmented_images, augmented_labels)

Original images shape: (941, 299, 299, 3)
Original labels shape: (941,)
Augmented images shape: (405, 299, 299, 3)
Augmented labels shape: (405,)
Remaining images shape: (806, 299, 299, 3)
Remaining labels shape: (806,)
Augmented images combined shape: (1211, 299, 299, 3)
Augmented labels combined shape: (1211,)


# [7] Save Augmented Dataset

## [7.1] Google Colab

In [None]:
np.save('/content/drive/My Drive/FYP/dataset/augmented_images.npy', resized_augmented_images)
np.save('/content/drive/My Drive/FYP/dataset/augmented_labels.npy', augmented_labels_combined)

## [7.2] Jupyter

In [None]:
np.save('dataset/augmented_images.npy', resized_augmented_images)
np.save('dataset/augmented_labels.npy', augmented_labels_combined)