In [1]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 10256857478017240533, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 4511286886
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 9265358490561280710
 physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1"]

In [15]:
import os
import numpy as np
np.random.seed(777)

import keras.backend as K
from keras.preprocessing.image import ImageDataGenerator

import sklearn

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from PIL import Image
from sklearn.model_selection import KFold, train_test_split
from sklearn.model_selection import cross_val_score

from sklearn.ensemble import RandomForestClassifier

from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
img_height, img_width = 224, 224

nb_train_samples = 386
nb_validation_samples = 199
nb_test_samples = 155

In [5]:
train_dir = 'data_reduced/train/'
validation_dir = 'data_reduced/validation'
test_dir = 'data_reduced/test'

In [6]:
random_seed = np.random.seed(777)

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    featurewise_center=True,
    featurewise_std_normalization=True)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size = 1,
    seed = random_seed,
    shuffle = True,
    class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
    validation_dir,
    target_size=(img_height, img_width),
    batch_size = 1,
    seed = random_seed,
    shuffle = True,
    class_mode='categorical')

test_datagen = ImageDataGenerator(rescale=1. / 255)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=1,
    seed = random_seed,
    shuffle = False,
    class_mode='categorical')

Found 386 images belonging to 2 classes.
Found 199 images belonging to 2 classes.
Found 155 images belonging to 2 classes.


In [7]:
X_train, y_train = [], []
for _ in tqdm(range(nb_train_samples)):
    x, y = train_generator.next()
    X_train.append(x[0])
    y_train.append(y[0])
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
y_train = np.argmax(y_train, axis=1)
# np.save('data/npy/X_train.npy', X_train)
# np.save('data/npy/y_train.npy', y_train)

100%|███████████████████████████████████████████████████████████████████████████████| 386/386 [00:00<00:00, 512.46it/s]


In [8]:
X_train = np.array([x.flatten() for x in X_train])

In [9]:
X_validation, y_validation = [], []
for _ in tqdm(range(nb_validation_samples)):
    x_val, y_val = validation_generator.next()
    X_validation.append(x_val[0])
    y_validation.append(y_val[0])
X_validation = np.asarray(X_validation)
y_validation = np.asarray(y_validation)
y_validation = np.argmax(y_validation, axis=1)
# np.save('data/npy/X_validation.npy', X_validation)
# np.save('data/npy/y_validation.npy', y_validation)

100%|███████████████████████████████████████████████████████████████████████████████| 199/199 [00:00<00:00, 622.51it/s]


In [10]:
X_validation = np.array([x.flatten() for x in X_validation])

In [11]:
X_test, y_test = [], []
for _ in tqdm(range(nb_test_samples)):
    x_t, y_t = test_generator.next()
    X_test.append(x_t[0])
    y_test.append(y_t[0])
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)
y_test = np.argmax(y_test, axis=1)
# np.save('data/npy/X_test.npy', X_test)
# np.save('data/npy/y_test.npy', y_test)

100%|███████████████████████████████████████████████████████████████████████████████| 155/155 [00:00<00:00, 442.05it/s]


In [12]:
X_test = np.array([x.flatten() for x in X_test])

In [13]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_validation shape:", X_validation.shape)
print("y_validation shape:", y_validation.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)
# plt.imshow(X_train[0])
# plt.show()

X_train shape: (386, 150528)
y_train shape: (386,)
X_validation shape: (199, 150528)
y_validation shape: (199,)
X_test shape: (155, 150528)
y_test shape: (155,)


____

## KNeighborsClassifier

In [17]:
clf  =  KNeighborsClassifier(n_neighbors = 5)
k_fold = KFold(n_splits=10, shuffle=True, random_state=5)
scoring = 'accuracy'
scores = cross_val_score(clf, X_train, y_train, cv=k_fold, n_jobs=1, scoring=scoring)

In [21]:
print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)

Scores Mean: 94.0418 and (STDEV 0.0261)
Best result for fold 0
Best accuracy is 0.9743589743589743
Scores of all folds: [0.97435897 0.92307692 0.92307692 0.92307692 0.97435897 0.92307692
 0.94736842 0.94736842 0.97368421 0.89473684]


___

## Decision Tree

In [28]:
clf = DecisionTreeClassifier()
scoring = 'accuracy'
scores = cross_val_score(clf, X_train, y_train, cv=k_fold, n_jobs=1, scoring=scoring)

In [29]:
print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)

Scores Mean: 91.1808 and (STDEV 0.0354)
Best result for fold 0
Best accuracy is 0.9487179487179487
Scores of all folds: [0.94871795 0.92307692 0.87179487 0.94871795 0.94871795 0.87179487
 0.86842105 0.86842105 0.94736842 0.92105263]


____

## RandomForest

In [30]:
clf = RandomForestClassifier(n_estimators=5)
scoring = 'accuracy'
scores = cross_val_score(clf, X_train, y_train, cv=k_fold, n_jobs=1, scoring=scoring)

In [31]:
print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)

Scores Mean: 95.0742 and (STDEV 0.0339)
Best result for fold 1
Best accuracy is 1.0
Scores of all folds: [0.97435897 1.         0.94871795 0.92307692 0.94871795 0.92307692
 0.89473684 1.         0.97368421 0.92105263]


___

## SVC

In [32]:
clf = SVC()
scoring = 'accuracy'
scores = cross_val_score(clf, X_train, y_train, cv=k_fold, n_jobs=1, scoring=scoring)

In [33]:
print ("Scores Mean: %.4f and (STDEV %.4f)" % (np.mean(scores)*100, np.std(scores)))
print ("Best result for fold %s" % np.argmax(scores))
print ("Best accuracy is", (scores[np.argmax(scores)]))
print ("Scores of all folds:", scores)

Scores Mean: 91.4710 and (STDEV 0.0362)
Best result for fold 8
Best accuracy is 0.9736842105263158
Scores of all folds: [0.94871795 0.87179487 0.84615385 0.92307692 0.8974359  0.92307692
 0.94736842 0.92105263 0.97368421 0.89473684]


____

## Prediction on Test set

In [36]:
clf = SVC()
clf.fit( X_train, y_train)
print(clf.score(X_train, y_train))
print(clf.score(X_validation, y_validation))
print(clf.score(X_test, y_test))

0.9404145077720207
0.6030150753768844
0.5225806451612903


In [37]:
clf = RandomForestClassifier()
clf.fit( X_train, y_train)
print(clf.score(X_train, y_train))
print(clf.score(X_validation, y_validation))
print(clf.score(X_test, y_test))

1.0
0.6532663316582915
0.6129032258064516


In [None]:
  # models.append(('LR', LogisticRegression()))
    # models.append(('LDA', LinearDiscriminantAnalysis()))
    # models.append(('KNN', KNeighborsClassifier()))
    # models.append(('CART', DecisionTreeClassifier()))
# models.append(('NB', GaussianNB()))