In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


HDF5 CLASS

In [None]:
import h5py
import os
class HDF5DatasetWriter:
	def __init__(self, dims, outputPath, dataKey="images", bufSize=1000):
		if os.path.exists(outputPath):
			raise ValueError("Output path exist cannot be overwritten. Manually delete the file before continue", outputPath)
		self.db = h5py.File(outputPath, "w")
		self.data = self.db.create_dataset(dataKey, dims, dtype="float")
		self.labels = self.db.create_dataset("labels", (dims[0],), dtype="int")
		self.bufSize = bufSize
		self.buffer = {"data":[], "labels":[]}
		self.idx = 0
	
	def add(self, rows, labels):
		self.buffer["data"].extend(rows)
		self.buffer["labels"].extend(labels)
		if (len(self.buffer["data"]) >= self.bufSize):
			self.flush()
	
	def flush(self):
		i = self.idx + len(self.buffer["data"])
		self.data[self.idx:i] = self.buffer["data"]
		self.labels[self.idx:i] = self.buffer["labels"]
		self.idx = i
		self.buffer = {"data":[], "labels":[]}
	
	def storeClassLabels(self, classLabels):
		dt = h5py.special_dtype(vlen=str)
		labelSet = self.db.create_dataset("label_names", (len(classLabels), ), dtype=dt)
		labelSet[:] = classLabels

	def close(self):
		if (len(self.buffer["data"])>0):
			self.flush()
		self.db.close()

EXTRACTING FEATURE USING VGG16

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
#from pyimagesearch.io.hdf5datasetwriter import HDF5DatasetWriter
from imutils import paths
import numpy as np
import progressbar
import argparse
import random
import pandas as pd
import os
'''
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", help= "path to input dataset")
ap.add_argument("-o", "--output", help= "path to output HDF5 dataset")
ap.add_argument("-c", "--csv", help = "path to csv file")
ap.add_argument("-b", "--batch_size", type = int, default = 32, help= "batch size of images to be passed through network")
ap.add_argument("-s", "--buffer_size", type= int, default = 1000, help= "size of feature extraction buffer")
args = vars(ap.parse_args())
bs = args["batch_size"]
'''
bs = 32
labels = []

'''
imagePaths = list(paths.list_images(args["dataset"]))
random.shuffle(imagePaths)
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
'''
#After mounting the google drive
#something like /content/drive/
csvPath = "/content/drive/My Drive/Colab_Notebooks/python code/flora.csv"
dataset = "/content/drive/My Drive/cc78ftcdf9-1/"
outPathHDF5 = "/content/drive/My Drive/Colab_Notebooks/features.hdf5"

dataframe = pd.read_csv(csvPath)
allImages = os.listdir(dataset)

random.shuffle(allImages)

for eachImage in allImages:
	findDot = eachImage.find('.')
	eachImageLabel = dataframe[dataframe["ID"] == int(eachImage[:findDot])]
	labels.append(eachImageLabel.SPECIES.values[0])
le = LabelEncoder()
labels = le.fit_transform(labels)

print("[INFO] loading network...")
model = VGG16(weights="imagenet", include_top = False)

dataset = HDF5DatasetWriter((len(allImages), 512 * 7 * 7), outPathHDF5 , dataKey="features", bufSize=1000)
dataset.storeClassLabels(le.classes_)

widgets = ["Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval = len(allImages), widgets = widgets).start()
for i in np.arange(0, len(allImages), bs):
	batchPaths = allImages[i: i + bs]
	batchLabels = labels[i: i + bs]
	batchImages = []
	
	for (j, allImage) in enumerate(batchPaths):
		imagePath = dataset + allImage
		image = load_img(imagePath, target_size=(224, 224))
		image = img_to_array(image)
		image = np.expand_dims(image, axis=0)
		image = imagenet_utils.preprocess_input(image)
		batchImages.append(image)
  
	batchImages = np.vstack(batchImages)
	features = model.predict(batchImages, batch_size=bs)
	features = features.reshape((features.shape[0], 512*7*7))
	dataset.add(features, batchLabels)
	pbar.update(i)
dataset.close()
pbar.finish()

[INFO] loading network...


Extracting Features: 100% |####################################| Time:  0:26:14


USING GRIDSEARCHCV WITH THE EXTRACTED FEATURES.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import h5py
import pickle
'''
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--db", help="PATH TO HDF5 DATASET")
ap.add_argument("-m", "--model", help="PATH TO OUTPUT MODEL")
ap.add_argument("-j", "--jobs", type=int, default=-1, help= "# of jobs to run when tuning hyperparameter")

args = vars(ap.parse_args())
'''
db = h5py.File("/content/drive/My Drive/Colab_Notebooks/python code/features.hdf5","r")
i = int(db["labels"].shape[0]*0.75)

print("[INFO] tuning hyperparameters...")
params = {"C": [0.1, 1.0, 10.0]}
model = GridSearchCV(LogisticRegression(solver="lbfgs", multi_class="auto", max_iter=10000), params, cv=3, n_jobs=-1)
model.fit(db["features"][:i], db["labels"][:i])
print("[INFO] best hyperparameter: {}".format(model.best_params_))

print("[INFO] evaluating...")
preds = model.predict(db["features"][i:])
print(classification_report(db["labels"][i:], preds, target_names=db["label_names"]))

'''
print("[INFO] saving model...")
f = open(args["model"], "wb")
f.write(pickle.dumps(model.best_estimator_))
f.close()
'''
db.close()

[INFO] tuning hyperparameters...
[INFO] best hyperparameter: {'C': 1.0}
[INFO] evaluating...
                             precision    recall  f1-score   support

   Acrocarpus fraxinifolius       1.00      1.00      1.00         5
     Araucaria angustifolia       1.00      1.00      1.00        17
    Aspidosperma polyneuron       1.00      1.00      1.00         5
            Aspidosperma sp       1.00      1.00      1.00        10
         Bagassa guianensis       1.00      1.00      1.00        16
Balfourodendron riedelianum       1.00      1.00      1.00        17
       Bertholletia excelsa       1.00      1.00      1.00         8
               Bowdichia sp       1.00      1.00      1.00        19
     Brosimum parinarioides       1.00      1.00      1.00         7
          Carapa guianensis       0.78      1.00      0.88         7
     Cariniana estrellensis       1.00      1.00      1.00        12
           Cedrela fissilis       1.00      1.00      1.00         9
    Cedre