## Converting Image Dataset into Hdf5 Files 

In [None]:
import h5py
import numpy as np
import os

base_path = 'C://caltech101//101_ObjectCategories'  # dataset path

save_path = './/tesp.hdf5'  # path to save the hdf5 file

hf = h5py.File(save_path, 'a')  # open the file in append mode
for i in os.listdir(base_path):
    if i== "BACKGROUND_Google": # Removeing the BACKGROUND_Google
        continue
    # read all as' inside A
    vid_name = os.path.join(base_path, i)
    grp = hf.create_group(i)  # create a subgroup for the above created group. each small
                                      # a is one subgroup

    for k in os.listdir(vid_name):   # find all images inside a.
        img_path = os.path.join(vid_name, k)

        with open(img_path, 'rb') as img_f:  # open images as python binary
            binary_data = img_f.read()

        binary_data_np = np.asarray(binary_data)

        dset = grp.create_dataset(k, data=binary_data_np) # save it in the subgroup. each a-subgroup contains all the images.

hf.close()

## Assigning the Image from Hdf5 files

In [None]:
import h5py
import numpy as np
import os

import io
from PIL import Image
import cv2 


save_path = './/Caltech_101.hdf5'  # path to save the hdf5 file
data = []  # list all images files full path 'group/subgroup/b.png' for e.g. ./A/a/b.png. These are basically keys to access our image data.

group= [] # list all groups and subgroups in hdf5 file

def func(name, obj):     # function to recursively store all the keys
    if isinstance(obj, h5py.Dataset):
        data.append(name)
    elif isinstance(obj, h5py.Group):
        group.append(name)

hf = h5py.File(save_path, 'r')
hf.visititems(func)  # this is the operation we are talking about.
print("No. of total images : ", len(data))
print("No. of Groups : ", len(group))

In [None]:
import pywt
from skimage.feature import hog
from skimage.transform import resize
from scipy.fftpack import dct, idct
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
# implement 2D DCT
def dct2(a):
    return dct(dct(a.T, norm='ortho').T, norm='ortho')

# implement 2D IDCT
def idct2(a):
    return idct(idct(a.T, norm='ortho').T, norm='ortho') 

In [None]:
labels=[]
features=[]


for j in data:
    kk = np.array(hf[j])
    img = Image.open(io.BytesIO(kk))# our image file
    gray_image = ImageOps.grayscale(img)
    img=gray_image.resize((128,64))
    img1=np.asarray(gray_image)
    imF = dct2(img1)
    im1 = idct2(imF)
    LL, (LH, HL, HH) = pywt.dwt2(im1, 'db2')
    fdA, hog_image = hog(im, orientations=9, pixels_per_cell=(8, 8),cells_per_block=(2, 2), visualize=True)
    features.append(list(fdA)
    t=j.find('/')
    labels.append(group.index(j[:t]))
feature1=np.asarray(features)

In [None]:
from sklearn.model_selection import train_test_split

#Split data 15%-85% into training set and test set
X_train, X_test, y_train, y_test = train_test_split(feature1, labels, test_size=0.15, random_state=0,stratify=labels)

In [None]:
print(feature1.shape)
print(X_train.shape)
print(X_test.shape)

## Stochastic Gradient Descent (SGD) Classifier

In [None]:
from sklearn.linear_model import SGDClassifier
model = SGDClassifier(random_state=42) # instantiate
model=model.fit(X_train, y_train) # train the classifier
prediction = model.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, prediction))

## RandomForest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
model1 = RandomForestClassifier(random_state=1000)
model1=model1.fit(X_train, y_train) # train the classifier
prediction1 = model1.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, prediction1))

## LogisticRegression Classifier

In [None]:
from sklearn.linear_model import LogisticRegression
# Set regularization rate
reg = 1
# train a logistic regression model on the training set
model3 = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)
prediction3 = model3.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, prediction3))

##  DecisionTree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
model5 = DecisionTreeClassifier(random_state=1000)
model5=model5.fit(X_train, y_train)
prediction5 = model5.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, prediction5))

## Saving The Model 

In [None]:
import pickle
pickle.dump(model, open('SGD_101.sav', 'wb'))
pickle.dump(model, open('RandomFores_101.sav', 'wb'))
pickle.dump(model, open('LogisticRegression_101.sav', 'wb'))
pickle.dump(model, open('DecisionTree_101.sav', 'wb'))