In [14]:
# Imports
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier,AdaBoostClassifier
from sklearn import svm
from skimage.feature import greycomatrix, greycoprops
from sklearn import metrics
from skimage.feature import hog
from sklearn.decomposition import PCA

## Read Data

In [2]:

x_data, y_data = [], []
#load male images
for filename in sorted(glob.glob('CMP23-Dataset/Male/*.jpg')):
    try:
        img = cv2.imread(filename, 0)                           # 0 for gray-scale
        x_data.append(img)
        y_data.append(0)
    except:
        print("corrupted image detected.")
    

## load female images
for filename in sorted(glob.glob('CMP23-Dataset/Female/*.jpg')):
    try:
        img = cv2.imread(filename, 0)
        x_data.append(img)
        y_data.append(1)
    except:
        print("corrupted image detected.")
    

x_data = np.array(x_data)
y_data = np.array(y_data)

# Shuffle both
p = np.random.permutation(len(x_data))
x_data = x_data[p]
y_data = y_data[p]

train_size = int(0.8 * len(x_data))
x_train, y_train = x_data[:train_size], y_data[:train_size]
x_test, y_test = x_data[train_size:], y_data[train_size:]



  x_data = np.array(x_data)


## Extract Features

In [3]:
def get_hog_feature(imag):
    imag = cv2.resize(imag,(600,600))
    out = hog(imag, orientations=9, pixels_per_cell=(15, 15),
                        block_norm='L2-Hys',
                        cells_per_block=(1, 1), visualize=False, feature_vector=True)
                 
    return out.tolist()

In [4]:
# def get_glcm_feature(gray_scale_img, name):
#     GLCM = greycomatrix(gray_scale_img, distances = [1], angles = [0, np.pi/2, np.pi, np.pi * 1.5 ], levels=256, normed=True)
#     # P[i,j,d,theta] is the number of times that grey-level j occurs at a distance d and at an angle theta from grey-level i.
#     return np.sum(np.abs(greycoprops(GLCM, prop=name)))
    

In [5]:
# # converting the dataset into features (x1, x2)
# x_train = np.array([(get_glcm_feature(img, 'energy'), (get_glcm_feature(img, 'homogeneity'))) for img in x_train])
# x_test = np.array([(get_glcm_feature(img, 'energy'), (get_glcm_feature(img, 'homogeneity'))) for img in x_test])

In [4]:
data_train =[]
data_test =[]
for img in x_train:
   data_train .append(get_hog_feature(img))
for img in x_test:   
    data_test.append(get_hog_feature(img))

In [5]:
#print(data_train[0])
print(len(data_train[0]))
# Initialize SVM classifier
modelofHOG = svm.LinearSVC()                # Let's case into an infinte dimensional space.
modelofHOG = modelofHOG.fit(data_train,y_train)
predictions = modelofHOG.predict(data_test)

print(metrics.accuracy_score(y_test, predictions) * 100)


14400
71.05263157894737


In [6]:
modelofHOG = svm.SVC(kernel='rbf')                 # Let's case into an infinte dimensional space.
modelofHOG = modelofHOG.fit(data_train,y_train)
predictions = modelofHOG.predict(data_test)

print(metrics.accuracy_score(y_test, predictions) * 100)

68.42105263157895


In [9]:
# Boost 100 weak decision trees
model = RandomForestClassifier(n_estimators=100)

model = model.fit(data_train, y_train)
predictions = model.predict(data_test)
print(metrics.accuracy_score(y_test, predictions) * 100)

68.42105263157895


In [11]:
model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1)
model = model.fit(data_train, y_train)
predictions = model.predict(data_test)
print(metrics.accuracy_score(y_test, predictions) * 100)

60.526315789473685


In [13]:
model = AdaBoostClassifier(n_estimators=100, random_state=0)
model = model.fit(data_train, y_train)
predictions = model.predict(data_test)
print(metrics.accuracy_score(y_test, predictions) * 100)

63.1578947368421


In [27]:
pca = PCA(n_components=3)
X_train = pca.fit_transform(data_train)
X_test = pca.transform(data_test)

In [28]:
# Initialize SVM classifier
modelofHOG = svm.LinearSVC()                # Let's case into an infinte dimensional space.
modelofHOG = modelofHOG.fit(X_train,y_train)
predictions = modelofHOG.predict(X_test)

print(metrics.accuracy_score(y_test, predictions) * 100)

71.05263157894737




In [29]:
modelofHOG = svm.SVC(kernel='rbf')                 # Let's case into an infinte dimensional space.
modelofHOG = modelofHOG.fit(X_train,y_train)
predictions = modelofHOG.predict(X_test)

print(metrics.accuracy_score(y_test, predictions) * 100)

68.42105263157895


In [30]:
# Boost 100 weak decision trees
model = RandomForestClassifier(n_estimators=100)

model = model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(metrics.accuracy_score(y_test, predictions) * 100)

76.31578947368422


In [31]:
model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1)
model = model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(metrics.accuracy_score(y_test, predictions) * 100)

76.31578947368422


In [32]:
model = AdaBoostClassifier(n_estimators=100, random_state=0)
model = model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(metrics.accuracy_score(y_test, predictions) * 100)

63.1578947368421


In [68]:
# Generate scatter plot for training data 
data_train = np.array(data_train)
x_train_male = data_train[y_train == 0]
print(len(x_train_male[0]))
x_train_female = data_train[y_train == 1]
plt.plot(x_train_female)
plt.plot(x_train_male)
plt.title('Linearly separable data')
plt.xlabel('feature 1')
plt.ylabel('feature 2')
plt.show()


86436


KeyboardInterrupt: 