In [21]:
import os

import numpy as np
import tensorflow as tf

from tqdm import tqdm
from pprint import pprint
from sklearn.svm import SVC
from keras.applications import VGG16
from sklearn.model_selection import train_test_split
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import load_img, img_to_array

# Constants

In [22]:
data = []
labels = []

data_directory = 'D:\VS_CODE\DATASETS\Rice_Image_Dataset'

number_of_images_per_species = 500

img_size = 224

train_features = []
train_labels = []

# Creating the data generator

- We also perform some basic data augmentation on the loaded images using this generator

In [23]:
train_data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Extracting images from the folders

- We extract equal number of images from each folder so as to avoid class imbalance

In [24]:
for species in os.listdir(data_directory):
    species_dir = os.path.join(data_directory, species)
    print(species_dir)
    
    for i in range(1, number_of_images_per_species+1):
        img_path = os.path.join(species_dir, f'{species} ({i}).jpg')
        img = load_img(img_path, target_size=(224, 224))
        img_array = img_to_array(img)
        img_array = img_array.reshape((1,) + img_array.shape)
        img_array = train_data_generator.standardize(img_array)
        
        data.append(img_array)
        labels.append(species)

D:\VS_CODE\DATASETS\Rice_Image_Dataset\Arborio


D:\VS_CODE\DATASETS\Rice_Image_Dataset\Basmati
D:\VS_CODE\DATASETS\Rice_Image_Dataset\Ipsala
D:\VS_CODE\DATASETS\Rice_Image_Dataset\Jasmine
D:\VS_CODE\DATASETS\Rice_Image_Dataset\Karacadag


# Verify the number of images

In [25]:
len(data)

2500

# Load the VGG16 model

In [27]:
model = VGG16(
    include_top=False,
    weights='imagenet',
    input_shape=(img_size, img_size, 3)
)

In [28]:
model.summary()

# Freeze model layers

In [29]:
for layer in model.layers[:-4]:
    layer.trainable = False

# Extracting features

In [30]:
for i in tqdm(range(len(data)), desc='Extracting features'):
    image = data[i].reshape((1, 224, 224, 3))
    image = preprocess_input(image)

    feature = model.predict(image, verbose=0)

    train_features.append(feature.flatten())
    train_labels.append(labels[i])

features = np.array(train_features)
labels = np.array(train_labels)

Extracting features:   0%|          | 0/2500 [00:00<?, ?it/s]

Extracting features: 100%|██████████| 2500/2500 [17:05<00:00,  2.44it/s]


In [31]:
features.shape

(2500, 25088)

In [32]:
labels.shape

(2500,)

In [33]:
labels

array(['Arborio', 'Arborio', 'Arborio', ..., 'Karacadag', 'Karacadag',
       'Karacadag'], dtype='<U9')

# Shuffling the features and labels

In [34]:
shuffled_indexes = np.arange(len(features))
np.random.shuffle(shuffled_indexes)

features = features[shuffled_indexes]
labels = labels[shuffled_indexes]

In [35]:
labels

array(['Basmati', 'Arborio', 'Arborio', ..., 'Jasmine', 'Karacadag',
       'Ipsala'], dtype='<U9')

# Spiting features

In [36]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [37]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [38]:
X_train.shape

(2000, 25088)

In [39]:
y_train

array(['Arborio', 'Ipsala', 'Arborio', ..., 'Ipsala', 'Ipsala', 'Ipsala'],
      dtype='<U9')

# Training SVM model on extracted features

In [40]:
svm = SVC(kernel='linear', C=1.0)
svm.fit(X_train, y_train)

accuracy = svm.score(X_test, y_test)
print("SVM accuracy: {:.2f}%".format(accuracy * 100))

SVM accuracy: 97.20%
