In [1]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 5576677488370614915, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 9214062756
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 15196934700989649275
 physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1"]

In [4]:
import os
import numpy as np
np.random.seed(777)

import keras.backend as K
from keras.preprocessing.image import ImageDataGenerator

import sklearn
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
import xgboost

from sklearn.ensemble import RandomForestClassifier

from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
img_height, img_width = 224, 224

nb_train_samples = 386
nb_validation_samples = 199
nb_test_samples = 155

In [6]:
train_dir = 'data_reduced/train/'
validation_dir = 'data_reduced/validation'
test_dir = 'data_reduced/test'

In [7]:
random_seed = np.random.seed(777)

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    featurewise_center=True,
    featurewise_std_normalization=True)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size = 1,
    seed = random_seed,
    shuffle = True,
    class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
    validation_dir,
    target_size=(img_height, img_width),
    batch_size = 1,
    seed = random_seed,
    shuffle = True,
    class_mode='categorical')

test_datagen = ImageDataGenerator(rescale=1. / 255)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=1,
    seed = random_seed,
    shuffle = False,
    class_mode='categorical')

Found 386 images belonging to 2 classes.
Found 199 images belonging to 2 classes.
Found 155 images belonging to 2 classes.


In [8]:
X_train, y_train = [], []
for _ in tqdm(range(nb_train_samples)):
    x, y = train_generator.next()
    X_train.append(x[0])
    y_train.append(y[0])
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
y_train = np.argmax(y_train, axis=1)
# np.save('data/npy/X_train.npy', X_train)
# np.save('data/npy/y_train.npy', y_train)

100%|███████████████████████████████████████████████████████████████████████████████| 386/386 [00:00<00:00, 496.65it/s]


In [9]:
X_train = np.array([x.flatten() for x in X_train])

In [10]:
X_validation, y_validation = [], []
for _ in tqdm(range(nb_validation_samples)):
    x_val, y_val = validation_generator.next()
    X_validation.append(x_val[0])
    y_validation.append(y_val[0])
X_validation = np.asarray(X_validation)
y_validation = np.asarray(y_validation)
y_validation = np.argmax(y_validation, axis=1)
# np.save('data/npy/X_validation.npy', X_validation)
# np.save('data/npy/y_validation.npy', y_validation)

100%|███████████████████████████████████████████████████████████████████████████████| 199/199 [00:00<00:00, 579.08it/s]


In [11]:
X_validation = np.array([x.flatten() for x in X_validation])

In [12]:
X_test, y_test = [], []
for _ in tqdm(range(nb_test_samples)):
    x_t, y_t = test_generator.next()
    X_test.append(x_t[0])
    y_test.append(y_t[0])
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)
y_test = np.argmax(y_test, axis=1)
# np.save('data/npy/X_test.npy', X_test)
# np.save('data/npy/y_test.npy', y_test)

100%|███████████████████████████████████████████████████████████████████████████████| 155/155 [00:00<00:00, 513.77it/s]


In [13]:
X_test = np.array([x.flatten() for x in X_test])

In [14]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_validation shape:", X_validation.shape)
print("y_validation shape:", y_validation.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)
# plt.imshow(X_train[0])
# plt.show()

X_train shape: (386, 150528)
y_train shape: (386,)
X_validation shape: (199, 150528)
y_validation shape: (199,)
X_test shape: (155, 150528)
y_test shape: (155,)


____

In [15]:
np.savez('models/bottleneck_datasets.npz', X_train, y_train, X_validation, y_validation)

data = np.load('models/bottleneck_datasets.npz')

train_x = data['arr_0']
train_y = data['arr_1']
val_x = data['arr_2']
val_y = data['arr_3']

train_x.shape, train_y.shape, val_x.shape, val_y.shape

((386, 150528), (386,), (199, 150528), (199,))

In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

channels = 3

clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, bootstrap=True)
clf.fit(np.reshape(train_x, (nb_train_samples,img_height*img_width*channels)), train_y)

valid_prediction = clf.predict(np.reshape(val_x, (nb_validation_samples, img_height*img_width*channels)))
test_prediction = clf.predict(np.reshape(X_test, (nb_test_samples, img_height*img_width*channels))) 

valid_score = clf.score(np.reshape(val_x, (nb_validation_samples, img_height*img_width*channels)), val_y)
test_score = clf.score(np.reshape(X_test, (nb_test_samples, img_height*img_width*channels)), y_test)

print("valid accuracy:", valid_score)
print("test accuracy:", test_score)

valid accuracy: 0.6231155778894473
test accuracy: 0.5870967741935483


### Predict on Test Set

In [None]:
# val_x_lda = lda.transform(X_validation)


# svms = [SVC(C=1.0, gamma='auto', probability=True, tol=0.001, 
#             verbose=False, decision_function_shape='ovr') for _ in range(3)]

# t0 = time.time()
# svms[0].fit(train_x_lda, train_y) # 
# svms[1].fit(train_x_pca, train_y) # 
# svms[2].fit(train_x, train_y)     # 
# print('finished in %.1fs' % (time.time() - t0))

# test_x_lda = lda.transform(X_test)
# test_x_pca = pca.transform(X_test)

# lda_preds  = svms[0].predict(val_x_lda)
# pca_preds  = svms[1].predict(val_x_pca)
# nodr_preds = svms[2].predict(val_x)

# print(accuracy_score(val_y, lda_preds), f1_score(val_y, lda_preds, average='macro'))
# print(accuracy_score(val_y, pca_preds), f1_score(val_y, pca_preds, average='macro'))
# print(accuracy_score(val_y, nodr_preds), f1_score(val_y, nodr_preds, average='macro'))

In [None]:
# # RBF SVC variance estimated s^2 = (0.003)^2

# def inverse_variance_weighting(predictions, variances):
#     if len(predictions) != len(variances):
#         print('Precictions-variances mismatch.')
#         sys.exit(0)
    
#     aa = np.sum(np.divide(predictions, variances))
#     bb = 1 / np.sum(variances)
    
#     return aa / bb

In [None]:
# voting_clf = VotingClassifier([model, svm_bottleneck])  # (inception model (first top model))
# voting_clf 