In [1]:
import pandas as pd
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Images:
from skimage.transform import resize
from skimage.io import imread

import os
import matplotlib.pyplot as plt

import pickle

In [3]:
datadir = 'Images/'
categories = ['ChickenHead', 'ElephantHead']

flat_data_arr = []
target_arr = []

for i in categories:
    print ('loading... category: ', i)
    path = os.path.join (datadir, i) #Images/ChickenHead   #Images/ElephantHead
    for img in os.listdir (path):  #listdir - return a list of all file names in the path
        img_array = imread (os.path.join (path, img))
        img_resized = resize (img_array, (150, 150, 3)) #matrix of 150,150,3
        flat_data_arr.append (img_resized.flatten()) #array: 1x(150x150x30) flat_data_array(img1.., img2,..)
        target_arr.append (categories.index(i))
    print ("loaded category ", i, "successfully")  

loading... category:  ChickenHead
loaded category  ChickenHead successfully
loading... category:  ElephantHead
loaded category  ElephantHead successfully


In [4]:
flat_data = np.array (flat_data_arr)
target = np.array (target_arr)

df = pd.DataFrame (flat_data)
df['Target'] = target

df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,67491,67492,67493,67494,67495,67496,67497,67498,67499,Target
0,0.764706,0.74902,0.6,0.764706,0.74902,0.6,0.768627,0.752941,0.603922,0.764706,...,0.698039,0.705882,0.54902,0.698039,0.705882,0.54902,0.698039,0.705882,0.54902,0
1,0.031373,0.031373,0.039216,0.031373,0.031373,0.039216,0.031373,0.031373,0.039216,0.031373,...,0.223529,0.215686,0.227451,0.188235,0.180392,0.192157,0.227451,0.219608,0.231373,0
2,0.239216,0.270588,0.219608,0.239216,0.270588,0.219608,0.243137,0.27451,0.223529,0.247059,...,0.439216,0.447059,0.352941,0.439216,0.447059,0.352941,0.439216,0.447059,0.352941,0
3,0.254902,0.25098,0.172549,0.254902,0.25098,0.172549,0.247059,0.243137,0.164706,0.243137,...,0.105882,0.113725,0.062745,0.105882,0.113725,0.062745,0.101961,0.109804,0.058824,0
4,0.058824,0.05098,0.070588,0.058824,0.05098,0.070588,0.058824,0.05098,0.070588,0.058824,...,0.192157,0.203922,0.223529,0.180392,0.192157,0.211765,0.172549,0.184314,0.203922,0


In [16]:
df['Target'].value_counts

<bound method IndexOpsMixin.value_counts of 0      0
1      0
2      0
3      0
4      0
      ..
195    1
196    1
197    1
198    1
199    1
Name: Target, Length: 200, dtype: int64>

In [8]:
X = df.values[:, :-1]
Y = df.values[:, -1]

xtrain, xtest, ytrain, ytest = train_test_split (X, Y, test_size=0.2, random_state=47)

## Support Vector Machine

In [9]:
param_grid = {'C': [10, 100], 'gamma': [0.0001, 0.001, 0.1, 1, 10], 'kernel': ['rbf', 'poly']}
svc = SVC()

model = GridSearchCV (svc, param_grid)
model.fit (xtrain, ytrain)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [10, 100], 'gamma': [0.0001, 0.001, 0.1, 1, 10],
                         'kernel': ['rbf', 'poly']})

In [10]:
print (model.best_params_)
ypred_train = model.predict (xtrain)
ypred_test = model.predict (xtest)

print ('Train Accuracy: ', accuracy_score (ypred_train, ytrain))
print ('Test Accuracy: ', accuracy_score (ypred_test, ytest))

{'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
Train Accuracy:  1.0
Test Accuracy:  0.925


In [12]:
pickle.dump (model, open('svcModel.pkl', 'wb'))

In [13]:
model = pickle.load (open('svcModel.pkl', 'rb'))

In [14]:
filename = 'testImages/chicken1.png'

img_array = imread (filename)
img_resized = resize (img_array, (150, 150, 3))
test_data = np.array (img_resized.flatten())
test_data = test_data.reshape(1, -1)
print (test_data.shape)

(1, 67500)


In [17]:
p = model.predict (test_data)
if p==0: print ('Chicken')
elif p==1: print('Elephant')

Chicken


## Random Forest

In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

maxAccuracy = -1
best_estimators = 25
for n in range(25, 200, 1):
    rf = RandomForestClassifier (n_estimators =n)
    rf.fit (xtrain, ytrain)
    ypred3 = rf.predict (xtest)
    a3 = accuracy_score (ypred3, ytest)
    if a3 > maxAccuracy: 
        maxAccuracy = a3
        best_estimators = n
        
print ('Accuracy = ', a3, 'for n = ', n)

Accuracy =  0.925 for n =  199


## KNN

In [25]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

scores = []
for k in range(1, 5):
    knn = KNeighborsClassifier (n_neighbors = k)
    knn.fit (xtrain, ytrain)
    
    ypred = knn.predict (xtest)
    score = accuracy_score (ypred, ytest)
    
    print("k=%d, accuracy=%.2f%%" % (k, score * 100))
    scores.append (score)
     
m = max (scores)
print('max accuracy: ', m, '  for k=', scores.index(m)+1)

k=1, accuracy=92.50%
k=2, accuracy=95.00%
k=3, accuracy=85.00%
k=4, accuracy=95.00%
max accuracy:  0.95   for k= 2


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [None]:
from sklearn.ensemble import AdaBoostClassifier

maxAccuracy = -1
best_estimators = 25
for n in range(25, 200, 1):
    ab = AdaBoostClassifier (n_estimators = n)
    ab.fit(xtrain, ytrain)
    ypred4 = ab.predict (xtest)

    a4 = accuracy_score (ypred4, ytest)
    if a4 > maxAccuracy: 
        maxAccuracy = a4
        best_estimators = n
        
print ('Accuarcy = ', a4)

## Neural Networks

In [28]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(max_iter=1000)
param_grid = {'activation': ['relu', 'identity'],'hidden_layer_sizes': [1,2]}

model = GridSearchCV(mlp, param_grid, cv=3)
model.fit(xtrain, ytrain)

# Print the best hyperparameters
print("Best hyperparameters: ", model.best_params_)

Best hyperparameters:  {'activation': 'identity', 'hidden_layer_sizes': 1}


In [29]:
print (model.best_params_)
ypred_train = model.predict (xtrain)
ypred_test = model.predict (xtest)

print ('Train Accuracy: ', accuracy_score (ypred_train, ytrain))
print ('Test Accuracy: ', accuracy_score (ypred_test, ytest))

{'activation': 'identity', 'hidden_layer_sizes': 1}
Train Accuracy:  1.0
Test Accuracy:  0.95


In [30]:
pickle.dump (model, open('nnModel.pkl', 'wb'))

In [31]:
model = pickle.load (open('nnModel.pkl', 'rb'))

In [34]:
filename = 'testImages/elephent2.jpg'

img_array = imread (filename)
img_resized = resize (img_array, (150, 150, 3))
test_data = np.array (img_resized.flatten())
test_data = test_data.reshape(1, -1)
print (test_data.shape)

(1, 67500)


In [35]:
p = model.predict (test_data)
if p==0: print ('Chicken')
elif p==1: print('Elephant')

Elephant
